From eb9473f2d496700c353a3a7cbf24d5369ee5521a Mon Sep 17 00:00:00 2001 From: 12ww1160 Date: Thu, 4 Dec 2025 13:35:53 +0100 Subject: [PATCH] OP#189 update readme --- .vscode/settings.json | 3 + README.md | 1 + templates/prune_blocks.erb | 138 ++++++++++--------------------------- 3 files changed, 40 insertions(+), 102 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 58088c5..47c9c38 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,9 +1,11 @@ { "cSpell.words": [ + "blockdir", "changeme", "GITEA", "GOMAXPROCS", "Iseconds", + "nullglob", "PGDATABASE", "PGHOST", "PGPASSWORD", @@ -14,6 +16,7 @@ "readaccess", "reqpackage", "rpms", + "shopt", "sslcacert", "sslverify" ] diff --git a/README.md b/README.md index 80140c4..c9437c2 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ All files and directories are configured with correct selinux context. If selinu ## Contact Us [contact Us](https://confdroid.com/contact/) +[feedback collection](https://feedback.confdroid.com) ## Disclaimer diff --git a/templates/prune_blocks.erb b/templates/prune_blocks.erb index 52a028d..b7615df 100644 --- a/templates/prune_blocks.erb +++ b/templates/prune_blocks.erb @@ -1,114 +1,48 @@ -#!/usr/bin/env bash +#!/bin/bash set -euo pipefail -IFS=$'\n\t' -# Configuration - edit or override via environment in the systemd unit -DATA_DIR="${DATA_DIR:-/var/lib/prometheus/data}" # path to Prometheus TSDB dir (blocks) -PSQL_CONN="${PSQL_CONN:-}" # psql connection string, e.g. "postgresql://user:pass@host:5432/db" -CHECK_SQL_TEMPLATE="${CHECK_SQL_TEMPLATE:-}" # SQL template that must return at least one row when block is present. - # Use placeholders {min} {max} (epoch ms). Example: - # "SELECT 1 FROM metrics WHERE time >= to_timestamp({min}/1000.0) AND time <= to_timestamp({max}/1000.0) LIMIT 1;" -MIN_AGE_HOURS="${MIN_AGE_HOURS:-3}" # minimum block age before being eligible for deletion -GRACE_HOURS="${GRACE_HOURS:-1}" # extra grace period before deletion (safety) -DRY_RUN="${DRY_RUN:-false}" # set to "true" to test without deleting +# Load environment +source <%= @ps_env_file %> -log() { echo "$(date -Iseconds) $*"; } +LOG_FILE="/var/log/prometheus-prune.log" +echo "$(date '+%Y-%m-%d %H:%M:%S') Starting prune run" >> "$LOG_FILE" -# helper: convert Prometheus block times from meta.json (ms) to integers -get_times_from_meta() { - local meta="$1" - # meta.json contains "minTime" and "maxTime" in milliseconds since epoch - jq -r '.minTime, .maxTime' "$meta" -} +# Find all TSDB blocks +TSDB_DIR="/var/lib/prometheus" +for block in "$TSDB_DIR"/*/; do + [[ -d "$block" ]] || continue + meta="$block/meta.json" + [[ -f "$meta" ]] || continue -# helper: run SQL check (returns 0 if found) -sql_check_block() { - local min_ms="$1" max_ms="$2" - if [[ -z "$PSQL_CONN" || -z "$CHECK_SQL_TEMPLATE" ]]; then - return 1 - fi + # Get block min and max timestamps + min_time=$(jq -r '.minTime' "$meta") + max_time=$(jq -r '.maxTime' "$meta") - # fill placeholders - local sql - sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_ms}" - sql="${sql//\{max\}/$max_ms}" - - # run psql quietly - return success if any rows - if psql "$PSQL_CONN" -t -c "$sql" 2>/dev/null | grep -q '.'; then - return 0 - fi - return 1 -} - -now_s="$(date +%s)" -now_ms="$(( now_s * 1000 ))" - -# Iterate blocks. Each block is a directory named -shopt -s nullglob -for blockdir in "$DATA_DIR"/*; do - # find meta.json - meta="$blockdir/meta.json" - if [[ ! -f "$meta" ]]; then - # skip non-blocks (like WAL, lock files) - continue - fi - - # ensure it's a directory and readable - if [[ ! -r "$meta" ]]; then - log "Skipping unreadable meta: $meta" - continue - fi - - read -r min_ms max_ms < <(get_times_from_meta "$meta") - # if jq didn't return numbers, skip - if ! [[ "$min_ms" =~ ^[0-9]+$ && "$max_ms" =~ ^[0-9]+$ ]]; then - log "Skipping malformed meta: $meta" - continue - fi - - # compute block age in seconds (use max_ms as block end) - block_end_s=$(( max_ms / 1000 )) - age_s=$(( now_s - block_end_s )) - min_age_s=$(( MIN_AGE_HOURS * 3600 )) - grace_s=$(( GRACE_HOURS * 3600 )) - - block_name="$(basename "$blockdir")" - log "Inspecting block $block_name (min=$min_ms max=$max_ms) age=${age_s}s" - - deleted=false - - # First: try SQL verification if configured - if [[ -n "$PSQL_CONN" && -n "$CHECK_SQL_TEMPLATE" ]]; then - if sql_check_block "$min_ms" "$max_ms"; then - log "SQL verification OK for block $block_name → deleting" - if [[ "$DRY_RUN" == "false" ]]; then - rm -rf "$blockdir" - else - log "DRY_RUN=true, not deleting $blockdir" - fi - deleted=true - else - log "SQL verification did NOT find block data for $block_name" + # Skip very recent blocks + block_age_hours=$(( ($(date +%s) - min_time/1000) / 3600 )) + if (( block_age_hours < MIN_AGE_HOURS + GRACE_HOURS )); then + echo "Skipping block $block (age ${block_age_hours}h)" >> "$LOG_FILE" + continue fi - fi - # Second: if SQL verification did not run or failed, apply age+grace policy - if [[ "$deleted" == "false" ]]; then - if (( age_s >= (min_age_s + grace_s) )); then - log "Block $block_name is older than MIN_AGE+GRACE → deleting (no SQL verification)" - if [[ "$DRY_RUN" == "false" ]]; then - rm -rf "$blockdir" - else - log "DRY_RUN=true, not deleting $blockdir" - fi - deleted=true - else - log "Block $block_name is too new (age ${age_s}s). Skipping." + # Verify block is in PostgreSQL + check_sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_time}" + check_sql="${check_sql//\{max\}/$max_time}" + + exists=$(psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDATABASE" -t -c "$check_sql" | xargs) + + if [[ "$exists" != "1" ]]; then + echo "Block $block not found in PostgreSQL, skipping deletion" >> "$LOG_FILE" + continue fi - fi - # finished block + # Dry-run or delete + if [[ "$DRY_RUN" == "true" ]]; then + echo "[DRY-RUN] Would delete block $block" >> "$LOG_FILE" + else + echo "Deleting block $block" >> "$LOG_FILE" + rm -rf "$block" + fi done -log "prune run completed" -exit 0 +echo "$(date '+%Y-%m-%d %H:%M:%S') Prune run finished" >> "$LOG_FILE"