OP#188 comment out the service options as they do not seem to work, Prometheus as service does not respond then
This commit is contained in:
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -3,6 +3,8 @@
|
||||
"changeme",
|
||||
"GITEA",
|
||||
"GOMAXPROCS",
|
||||
"Iseconds",
|
||||
"pipefail",
|
||||
"procs",
|
||||
"reqpackage",
|
||||
"rpms",
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
##### File created by Puppet - manual changes will be overwritten #####
|
||||
###############################################################################
|
||||
[Service]
|
||||
MemoryMax=<%= @ps_max_mem %>
|
||||
MemoryHigh=<%= @ps_high_mem %>
|
||||
MemorySwapMax=<%= @ps_swap_mem %>
|
||||
OOMScoreAdjust=<%= @ps_oom_score %>
|
||||
Environment="GOMAXPROCS=<%= @ps_max_procs %>"
|
||||
#MemoryMax=<%= @ps_max_mem %>
|
||||
#MemoryHigh=<%= @ps_high_mem %>
|
||||
#MemorySwapMax=<%= @ps_swap_mem %>
|
||||
#OOMScoreAdjust=<%= @ps_oom_score %>
|
||||
#Environment="GOMAXPROCS=<%= @ps_max_procs %>"
|
||||
|
||||
ExecStart=
|
||||
ExecStart=/usr/bin/prometheus \
|
||||
@@ -21,4 +21,3 @@ ExecStart=/usr/bin/prometheus \
|
||||
--storage.tsdb.wal-compression \
|
||||
--storage.tsdb.min-block-duration=<%= @ps_min_block_dur %> \
|
||||
--storage.tsdb.max-block-duration=<%= @ps_max_block_dur %>
|
||||
|
||||
114
templates/prune_blocks.erb
Normal file
114
templates/prune_blocks.erb
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
|
||||
# Configuration - edit or override via environment in the systemd unit
|
||||
DATA_DIR="${DATA_DIR:-/var/lib/prometheus/data}" # path to Prometheus TSDB dir (blocks)
|
||||
PSQL_CONN="${PSQL_CONN:-}" # psql connection string, e.g. "postgresql://user:pass@host:5432/db"
|
||||
CHECK_SQL_TEMPLATE="${CHECK_SQL_TEMPLATE:-}" # SQL template that must return at least one row when block is present.
|
||||
# Use placeholders {min} {max} (epoch ms). Example:
|
||||
# "SELECT 1 FROM metrics WHERE time >= to_timestamp({min}/1000.0) AND time <= to_timestamp({max}/1000.0) LIMIT 1;"
|
||||
MIN_AGE_HOURS="${MIN_AGE_HOURS:-3}" # minimum block age before being eligible for deletion
|
||||
GRACE_HOURS="${GRACE_HOURS:-1}" # extra grace period before deletion (safety)
|
||||
DRY_RUN="${DRY_RUN:-false}" # set to "true" to test without deleting
|
||||
|
||||
log() { echo "$(date -Iseconds) $*"; }
|
||||
|
||||
# helper: convert Prometheus block times from meta.json (ms) to integers
|
||||
get_times_from_meta() {
|
||||
local meta="$1"
|
||||
# meta.json contains "minTime" and "maxTime" in milliseconds since epoch
|
||||
jq -r '.minTime, .maxTime' "$meta"
|
||||
}
|
||||
|
||||
# helper: run SQL check (returns 0 if found)
|
||||
sql_check_block() {
|
||||
local min_ms="$1" max_ms="$2"
|
||||
if [[ -z "$PSQL_CONN" || -z "$CHECK_SQL_TEMPLATE" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# fill placeholders
|
||||
local sql
|
||||
sql="${CHECK_SQL_TEMPLATE//\{min\}/$min_ms}"
|
||||
sql="${sql//\{max\}/$max_ms}"
|
||||
|
||||
# run psql quietly - return success if any rows
|
||||
if psql "$PSQL_CONN" -t -c "$sql" 2>/dev/null | grep -q '.'; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
now_s="$(date +%s)"
|
||||
now_ms="$(( now_s * 1000 ))"
|
||||
|
||||
# Iterate blocks. Each block is a directory named <ULID>
|
||||
shopt -s nullglob
|
||||
for blockdir in "$DATA_DIR"/*; do
|
||||
# find meta.json
|
||||
meta="$blockdir/meta.json"
|
||||
if [[ ! -f "$meta" ]]; then
|
||||
# skip non-blocks (like WAL, lock files)
|
||||
continue
|
||||
fi
|
||||
|
||||
# ensure it's a directory and readable
|
||||
if [[ ! -r "$meta" ]]; then
|
||||
log "Skipping unreadable meta: $meta"
|
||||
continue
|
||||
fi
|
||||
|
||||
read -r min_ms max_ms < <(get_times_from_meta "$meta")
|
||||
# if jq didn't return numbers, skip
|
||||
if ! [[ "$min_ms" =~ ^[0-9]+$ && "$max_ms" =~ ^[0-9]+$ ]]; then
|
||||
log "Skipping malformed meta: $meta"
|
||||
continue
|
||||
fi
|
||||
|
||||
# compute block age in seconds (use max_ms as block end)
|
||||
block_end_s=$(( max_ms / 1000 ))
|
||||
age_s=$(( now_s - block_end_s ))
|
||||
min_age_s=$(( MIN_AGE_HOURS * 3600 ))
|
||||
grace_s=$(( GRACE_HOURS * 3600 ))
|
||||
|
||||
block_name="$(basename "$blockdir")"
|
||||
log "Inspecting block $block_name (min=$min_ms max=$max_ms) age=${age_s}s"
|
||||
|
||||
deleted=false
|
||||
|
||||
# First: try SQL verification if configured
|
||||
if [[ -n "$PSQL_CONN" && -n "$CHECK_SQL_TEMPLATE" ]]; then
|
||||
if sql_check_block "$min_ms" "$max_ms"; then
|
||||
log "SQL verification OK for block $block_name → deleting"
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
rm -rf "$blockdir"
|
||||
else
|
||||
log "DRY_RUN=true, not deleting $blockdir"
|
||||
fi
|
||||
deleted=true
|
||||
else
|
||||
log "SQL verification did NOT find block data for $block_name"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Second: if SQL verification did not run or failed, apply age+grace policy
|
||||
if [[ "$deleted" == "false" ]]; then
|
||||
if (( age_s >= (min_age_s + grace_s) )); then
|
||||
log "Block $block_name is older than MIN_AGE+GRACE → deleting (no SQL verification)"
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
rm -rf "$blockdir"
|
||||
else
|
||||
log "DRY_RUN=true, not deleting $blockdir"
|
||||
fi
|
||||
deleted=true
|
||||
else
|
||||
log "Block $block_name is too new (age ${age_s}s). Skipping."
|
||||
fi
|
||||
fi
|
||||
|
||||
# finished block
|
||||
done
|
||||
|
||||
log "prune run completed"
|
||||
exit 0
|
||||
Reference in New Issue
Block a user