From f0f5de8ffeeed323be6aacf424981681750b6544 Mon Sep 17 00:00:00 2001 From: 12ww1160 Date: Wed, 3 Dec 2025 16:39:02 +0100 Subject: [PATCH] OP#188 comment out the service options as they do not seem to work, Prometheus as service does not respond then --- .vscode/settings.json | 2 + manifests/main/files.pp | 2 +- templates/override.conf.erb | 11 ++-- templates/prune_blocks.erb | 114 ++++++++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 templates/prune_blocks.erb diff --git a/.vscode/settings.json b/.vscode/settings.json index 52e6580..cf89a25 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,8 @@ "changeme", "GITEA", "GOMAXPROCS", + "Iseconds", + "pipefail", "procs", "reqpackage", "rpms", diff --git a/manifests/main/files.pp b/manifests/main/files.pp index 0d8c38f..ce695f3 100644 --- a/manifests/main/files.pp +++ b/manifests/main/files.pp @@ -5,7 +5,7 @@ ############################################################################## class prometheus_cd::main::files ( ) inherits prometheus_cd::params { - if ($ps_prom_host == $fqdn) and ($manage_prometheus == true) { + if ($ps_prom_host == $fqdn) and ($manage_prometheus == true) { require prometheus_cd::main::dirs file { $ps_main_file: ensure => file, diff --git a/templates/override.conf.erb b/templates/override.conf.erb index f69f867..036739d 100644 --- a/templates/override.conf.erb +++ b/templates/override.conf.erb @@ -2,11 +2,11 @@ ##### File created by Puppet - manual changes will be overwritten ##### ############################################################################### [Service] -MemoryMax=<%= @ps_max_mem %> -MemoryHigh=<%= @ps_high_mem %> -MemorySwapMax=<%= @ps_swap_mem %> -OOMScoreAdjust=<%= @ps_oom_score %> -Environment="GOMAXPROCS=<%= @ps_max_procs %>" +#MemoryMax=<%= @ps_max_mem %> +#MemoryHigh=<%= @ps_high_mem %> +#MemorySwapMax=<%= @ps_swap_mem %> +#OOMScoreAdjust=<%= @ps_oom_score %> +#Environment="GOMAXPROCS=<%= @ps_max_procs %>" ExecStart= ExecStart=/usr/bin/prometheus \ @@ -21,4 +21,3 @@ ExecStart=/usr/bin/prometheus \ --storage.tsdb.wal-compression \ --storage.tsdb.min-block-duration=<%= @ps_min_block_dur %> \ --storage.tsdb.max-block-duration=<%= @ps_max_block_dur %> - \ No newline at end of file diff --git a/templates/prune_blocks.erb b/templates/prune_blocks.erb new file mode 100644 index 0000000..52a028d --- /dev/null +++ b/templates/prune_blocks.erb @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' + +# Configuration - edit or override via environment in the systemd unit +DATA_DIR="${DATA_DIR:-/var/lib/prometheus/data}" # path to Prometheus TSDB dir (blocks) +PSQL_CONN="${PSQL_CONN:-}" # psql connection string, e.g. "postgresql://user:pass@host:5432/db" +CHECK_SQL_TEMPLATE="${CHECK_SQL_TEMPLATE:-}" # SQL template that must return at least one row when block is present. + # Use placeholders {min} {max} (epoch ms). Example: + # "SELECT 1 FROM metrics WHERE time >= to_timestamp({min}/1000.0) AND time <= to_timestamp({max}/1000.0) LIMIT 1;" +MIN_AGE_HOURS="${MIN_AGE_HOURS:-3}" # minimum block age before being eligible for deletion +GRACE_HOURS="${GRACE_HOURS:-1}" # extra grace period before deletion (safety) +DRY_RUN="${DRY_RUN:-false}" # set to "true" to test without deleting + +log() { echo "$(date -Iseconds) $*"; } + +# helper: convert Prometheus block times from meta.json (ms) to integers +get_times_from_meta() { + local meta="$1" + # meta.json contains "minTime" and "maxTime" in milliseconds since epoch + jq -r '.minTime, .maxTime' "$meta" +} + +# helper: run SQL check (returns 0 if found) +sql_check_block() { + local min_ms="$1" max_ms="$2" + if [[ -z "$PSQL_CONN" || -z "$CHECK_SQL_TEMPLATE" ]]; then + return 1 + fi + + # fill placeholders + local sql + sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_ms}" + sql="${sql//\{max\}/$max_ms}" + + # run psql quietly - return success if any rows + if psql "$PSQL_CONN" -t -c "$sql" 2>/dev/null | grep -q '.'; then + return 0 + fi + return 1 +} + +now_s="$(date +%s)" +now_ms="$(( now_s * 1000 ))" + +# Iterate blocks. Each block is a directory named +shopt -s nullglob +for blockdir in "$DATA_DIR"/*; do + # find meta.json + meta="$blockdir/meta.json" + if [[ ! -f "$meta" ]]; then + # skip non-blocks (like WAL, lock files) + continue + fi + + # ensure it's a directory and readable + if [[ ! -r "$meta" ]]; then + log "Skipping unreadable meta: $meta" + continue + fi + + read -r min_ms max_ms < <(get_times_from_meta "$meta") + # if jq didn't return numbers, skip + if ! [[ "$min_ms" =~ ^[0-9]+$ && "$max_ms" =~ ^[0-9]+$ ]]; then + log "Skipping malformed meta: $meta" + continue + fi + + # compute block age in seconds (use max_ms as block end) + block_end_s=$(( max_ms / 1000 )) + age_s=$(( now_s - block_end_s )) + min_age_s=$(( MIN_AGE_HOURS * 3600 )) + grace_s=$(( GRACE_HOURS * 3600 )) + + block_name="$(basename "$blockdir")" + log "Inspecting block $block_name (min=$min_ms max=$max_ms) age=${age_s}s" + + deleted=false + + # First: try SQL verification if configured + if [[ -n "$PSQL_CONN" && -n "$CHECK_SQL_TEMPLATE" ]]; then + if sql_check_block "$min_ms" "$max_ms"; then + log "SQL verification OK for block $block_name → deleting" + if [[ "$DRY_RUN" == "false" ]]; then + rm -rf "$blockdir" + else + log "DRY_RUN=true, not deleting $blockdir" + fi + deleted=true + else + log "SQL verification did NOT find block data for $block_name" + fi + fi + + # Second: if SQL verification did not run or failed, apply age+grace policy + if [[ "$deleted" == "false" ]]; then + if (( age_s >= (min_age_s + grace_s) )); then + log "Block $block_name is older than MIN_AGE+GRACE → deleting (no SQL verification)" + if [[ "$DRY_RUN" == "false" ]]; then + rm -rf "$blockdir" + else + log "DRY_RUN=true, not deleting $blockdir" + fi + deleted=true + else + log "Block $block_name is too new (age ${age_s}s). Skipping." + fi + fi + + # finished block +done + +log "prune run completed" +exit 0