#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' # Configuration - edit or override via environment in the systemd unit DATA_DIR="${DATA_DIR:-/var/lib/prometheus/data}" # path to Prometheus TSDB dir (blocks) PSQL_CONN="${PSQL_CONN:-}" # psql connection string, e.g. "postgresql://user:pass@host:5432/db" CHECK_SQL_TEMPLATE="${CHECK_SQL_TEMPLATE:-}" # SQL template that must return at least one row when block is present. # Use placeholders {min} {max} (epoch ms). Example: # "SELECT 1 FROM metrics WHERE time >= to_timestamp({min}/1000.0) AND time <= to_timestamp({max}/1000.0) LIMIT 1;" MIN_AGE_HOURS="${MIN_AGE_HOURS:-3}" # minimum block age before being eligible for deletion GRACE_HOURS="${GRACE_HOURS:-1}" # extra grace period before deletion (safety) DRY_RUN="${DRY_RUN:-false}" # set to "true" to test without deleting log() { echo "$(date -Iseconds) $*"; } # helper: convert Prometheus block times from meta.json (ms) to integers get_times_from_meta() { local meta="$1" # meta.json contains "minTime" and "maxTime" in milliseconds since epoch jq -r '.minTime, .maxTime' "$meta" } # helper: run SQL check (returns 0 if found) sql_check_block() { local min_ms="$1" max_ms="$2" if [[ -z "$PSQL_CONN" || -z "$CHECK_SQL_TEMPLATE" ]]; then return 1 fi # fill placeholders local sql sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_ms}" sql="${sql//\{max\}/$max_ms}" # run psql quietly - return success if any rows if psql "$PSQL_CONN" -t -c "$sql" 2>/dev/null | grep -q '.'; then return 0 fi return 1 } now_s="$(date +%s)" now_ms="$(( now_s * 1000 ))" # Iterate blocks. Each block is a directory named shopt -s nullglob for blockdir in "$DATA_DIR"/*; do # find meta.json meta="$blockdir/meta.json" if [[ ! -f "$meta" ]]; then # skip non-blocks (like WAL, lock files) continue fi # ensure it's a directory and readable if [[ ! -r "$meta" ]]; then log "Skipping unreadable meta: $meta" continue fi read -r min_ms max_ms < <(get_times_from_meta "$meta") # if jq didn't return numbers, skip if ! [[ "$min_ms" =~ ^[0-9]+$ && "$max_ms" =~ ^[0-9]+$ ]]; then log "Skipping malformed meta: $meta" continue fi # compute block age in seconds (use max_ms as block end) block_end_s=$(( max_ms / 1000 )) age_s=$(( now_s - block_end_s )) min_age_s=$(( MIN_AGE_HOURS * 3600 )) grace_s=$(( GRACE_HOURS * 3600 )) block_name="$(basename "$blockdir")" log "Inspecting block $block_name (min=$min_ms max=$max_ms) age=${age_s}s" deleted=false # First: try SQL verification if configured if [[ -n "$PSQL_CONN" && -n "$CHECK_SQL_TEMPLATE" ]]; then if sql_check_block "$min_ms" "$max_ms"; then log "SQL verification OK for block $block_name → deleting" if [[ "$DRY_RUN" == "false" ]]; then rm -rf "$blockdir" else log "DRY_RUN=true, not deleting $blockdir" fi deleted=true else log "SQL verification did NOT find block data for $block_name" fi fi # Second: if SQL verification did not run or failed, apply age+grace policy if [[ "$deleted" == "false" ]]; then if (( age_s >= (min_age_s + grace_s) )); then log "Block $block_name is older than MIN_AGE+GRACE → deleting (no SQL verification)" if [[ "$DRY_RUN" == "false" ]]; then rm -rf "$blockdir" else log "DRY_RUN=true, not deleting $blockdir" fi deleted=true else log "Block $block_name is too new (age ${age_s}s). Skipping." fi fi # finished block done log "prune run completed" exit 0