OP#189 update readme
This commit is contained in:
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -1,9 +1,11 @@
|
|||||||
{
|
{
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
|
"blockdir",
|
||||||
"changeme",
|
"changeme",
|
||||||
"GITEA",
|
"GITEA",
|
||||||
"GOMAXPROCS",
|
"GOMAXPROCS",
|
||||||
"Iseconds",
|
"Iseconds",
|
||||||
|
"nullglob",
|
||||||
"PGDATABASE",
|
"PGDATABASE",
|
||||||
"PGHOST",
|
"PGHOST",
|
||||||
"PGPASSWORD",
|
"PGPASSWORD",
|
||||||
@@ -14,6 +16,7 @@
|
|||||||
"readaccess",
|
"readaccess",
|
||||||
"reqpackage",
|
"reqpackage",
|
||||||
"rpms",
|
"rpms",
|
||||||
|
"shopt",
|
||||||
"sslcacert",
|
"sslcacert",
|
||||||
"sslverify"
|
"sslverify"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ All files and directories are configured with correct selinux context. If selinu
|
|||||||
## Contact Us
|
## Contact Us
|
||||||
|
|
||||||
[contact Us](https://confdroid.com/contact/)
|
[contact Us](https://confdroid.com/contact/)
|
||||||
|
[feedback collection](https://feedback.confdroid.com)
|
||||||
|
|
||||||
## Disclaimer
|
## Disclaimer
|
||||||
|
|
||||||
|
|||||||
@@ -1,114 +1,48 @@
|
|||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
IFS=$'\n\t'
|
|
||||||
|
|
||||||
# Configuration - edit or override via environment in the systemd unit
|
# Load environment
|
||||||
DATA_DIR="${DATA_DIR:-/var/lib/prometheus/data}" # path to Prometheus TSDB dir (blocks)
|
source <%= @ps_env_file %>
|
||||||
PSQL_CONN="${PSQL_CONN:-}" # psql connection string, e.g. "postgresql://user:pass@host:5432/db"
|
|
||||||
CHECK_SQL_TEMPLATE="${CHECK_SQL_TEMPLATE:-}" # SQL template that must return at least one row when block is present.
|
|
||||||
# Use placeholders {min} {max} (epoch ms). Example:
|
|
||||||
# "SELECT 1 FROM metrics WHERE time >= to_timestamp({min}/1000.0) AND time <= to_timestamp({max}/1000.0) LIMIT 1;"
|
|
||||||
MIN_AGE_HOURS="${MIN_AGE_HOURS:-3}" # minimum block age before being eligible for deletion
|
|
||||||
GRACE_HOURS="${GRACE_HOURS:-1}" # extra grace period before deletion (safety)
|
|
||||||
DRY_RUN="${DRY_RUN:-false}" # set to "true" to test without deleting
|
|
||||||
|
|
||||||
log() { echo "$(date -Iseconds) $*"; }
|
LOG_FILE="/var/log/prometheus-prune.log"
|
||||||
|
echo "$(date '+%Y-%m-%d %H:%M:%S') Starting prune run" >> "$LOG_FILE"
|
||||||
|
|
||||||
# helper: convert Prometheus block times from meta.json (ms) to integers
|
# Find all TSDB blocks
|
||||||
get_times_from_meta() {
|
TSDB_DIR="/var/lib/prometheus"
|
||||||
local meta="$1"
|
for block in "$TSDB_DIR"/*/; do
|
||||||
# meta.json contains "minTime" and "maxTime" in milliseconds since epoch
|
[[ -d "$block" ]] || continue
|
||||||
jq -r '.minTime, .maxTime' "$meta"
|
meta="$block/meta.json"
|
||||||
}
|
[[ -f "$meta" ]] || continue
|
||||||
|
|
||||||
# helper: run SQL check (returns 0 if found)
|
# Get block min and max timestamps
|
||||||
sql_check_block() {
|
min_time=$(jq -r '.minTime' "$meta")
|
||||||
local min_ms="$1" max_ms="$2"
|
max_time=$(jq -r '.maxTime' "$meta")
|
||||||
if [[ -z "$PSQL_CONN" || -z "$CHECK_SQL_TEMPLATE" ]]; then
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# fill placeholders
|
# Skip very recent blocks
|
||||||
local sql
|
block_age_hours=$(( ($(date +%s) - min_time/1000) / 3600 ))
|
||||||
sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_ms}"
|
if (( block_age_hours < MIN_AGE_HOURS + GRACE_HOURS )); then
|
||||||
sql="${sql//\{max\}/$max_ms}"
|
echo "Skipping block $block (age ${block_age_hours}h)" >> "$LOG_FILE"
|
||||||
|
|
||||||
# run psql quietly - return success if any rows
|
|
||||||
if psql "$PSQL_CONN" -t -c "$sql" 2>/dev/null | grep -q '.'; then
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
now_s="$(date +%s)"
|
|
||||||
now_ms="$(( now_s * 1000 ))"
|
|
||||||
|
|
||||||
# Iterate blocks. Each block is a directory named <ULID>
|
|
||||||
shopt -s nullglob
|
|
||||||
for blockdir in "$DATA_DIR"/*; do
|
|
||||||
# find meta.json
|
|
||||||
meta="$blockdir/meta.json"
|
|
||||||
if [[ ! -f "$meta" ]]; then
|
|
||||||
# skip non-blocks (like WAL, lock files)
|
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ensure it's a directory and readable
|
# Verify block is in PostgreSQL
|
||||||
if [[ ! -r "$meta" ]]; then
|
check_sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_time}"
|
||||||
log "Skipping unreadable meta: $meta"
|
check_sql="${check_sql//\{max\}/$max_time}"
|
||||||
|
|
||||||
|
exists=$(psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDATABASE" -t -c "$check_sql" | xargs)
|
||||||
|
|
||||||
|
if [[ "$exists" != "1" ]]; then
|
||||||
|
echo "Block $block not found in PostgreSQL, skipping deletion" >> "$LOG_FILE"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
read -r min_ms max_ms < <(get_times_from_meta "$meta")
|
# Dry-run or delete
|
||||||
# if jq didn't return numbers, skip
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
if ! [[ "$min_ms" =~ ^[0-9]+$ && "$max_ms" =~ ^[0-9]+$ ]]; then
|
echo "[DRY-RUN] Would delete block $block" >> "$LOG_FILE"
|
||||||
log "Skipping malformed meta: $meta"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# compute block age in seconds (use max_ms as block end)
|
|
||||||
block_end_s=$(( max_ms / 1000 ))
|
|
||||||
age_s=$(( now_s - block_end_s ))
|
|
||||||
min_age_s=$(( MIN_AGE_HOURS * 3600 ))
|
|
||||||
grace_s=$(( GRACE_HOURS * 3600 ))
|
|
||||||
|
|
||||||
block_name="$(basename "$blockdir")"
|
|
||||||
log "Inspecting block $block_name (min=$min_ms max=$max_ms) age=${age_s}s"
|
|
||||||
|
|
||||||
deleted=false
|
|
||||||
|
|
||||||
# First: try SQL verification if configured
|
|
||||||
if [[ -n "$PSQL_CONN" && -n "$CHECK_SQL_TEMPLATE" ]]; then
|
|
||||||
if sql_check_block "$min_ms" "$max_ms"; then
|
|
||||||
log "SQL verification OK for block $block_name → deleting"
|
|
||||||
if [[ "$DRY_RUN" == "false" ]]; then
|
|
||||||
rm -rf "$blockdir"
|
|
||||||
else
|
else
|
||||||
log "DRY_RUN=true, not deleting $blockdir"
|
echo "Deleting block $block" >> "$LOG_FILE"
|
||||||
|
rm -rf "$block"
|
||||||
fi
|
fi
|
||||||
deleted=true
|
|
||||||
else
|
|
||||||
log "SQL verification did NOT find block data for $block_name"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Second: if SQL verification did not run or failed, apply age+grace policy
|
|
||||||
if [[ "$deleted" == "false" ]]; then
|
|
||||||
if (( age_s >= (min_age_s + grace_s) )); then
|
|
||||||
log "Block $block_name is older than MIN_AGE+GRACE → deleting (no SQL verification)"
|
|
||||||
if [[ "$DRY_RUN" == "false" ]]; then
|
|
||||||
rm -rf "$blockdir"
|
|
||||||
else
|
|
||||||
log "DRY_RUN=true, not deleting $blockdir"
|
|
||||||
fi
|
|
||||||
deleted=true
|
|
||||||
else
|
|
||||||
log "Block $block_name is too new (age ${age_s}s). Skipping."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# finished block
|
|
||||||
done
|
done
|
||||||
|
|
||||||
log "prune run completed"
|
echo "$(date '+%Y-%m-%d %H:%M:%S') Prune run finished" >> "$LOG_FILE"
|
||||||
exit 0
|
|
||||||
|
|||||||
Reference in New Issue
Block a user