168 lines
5.4 KiB
Bash
168 lines
5.4 KiB
Bash
#!/usr/bin/env sh
|
|
set -eu
|
|
|
|
WARN_PERCENT="${WARN_PERCENT:-85}"
|
|
CLEANUP_PERCENT="${CLEANUP_PERCENT:-85}"
|
|
CRITICAL_PERCENT="${CRITICAL_PERCENT:-95}"
|
|
MIN_TMP_AGE_MINUTES="${MIN_TMP_AGE_MINUTES:-360}"
|
|
MOUNT_PATH="${MOUNT_PATH:-/}"
|
|
STATUS_DIR="${STATUS_DIR:-/tmp/rap-web-admin/html/downloads/ops}"
|
|
LOG_DIR="${LOG_DIR:-/tmp/rap-ops}"
|
|
WEBHOOK_URL="${WEBHOOK_URL:-}"
|
|
|
|
mkdir -p "$STATUS_DIR" "$LOG_DIR"
|
|
|
|
started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
log_file="$LOG_DIR/test-docker-disk-guard.log"
|
|
status_file="$STATUS_DIR/test-docker-disk-guard-status.json"
|
|
|
|
log() {
|
|
printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" >>"$log_file"
|
|
}
|
|
|
|
json_escape() {
|
|
printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'
|
|
}
|
|
|
|
disk_used_percent() {
|
|
df -P "$MOUNT_PATH" | awk 'NR==2 { gsub("%", "", $5); print $5 }'
|
|
}
|
|
|
|
disk_avail_human() {
|
|
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $4 }'
|
|
}
|
|
|
|
disk_used_human() {
|
|
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $3 }'
|
|
}
|
|
|
|
disk_size_human() {
|
|
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $2 }'
|
|
}
|
|
|
|
docker_df_summary() {
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker system df 2>&1 | tr '\n' '; ' | sed 's/"/\\"/g'
|
|
else
|
|
printf 'docker cli not found'
|
|
fi
|
|
}
|
|
|
|
cleanup_safe() {
|
|
log "cleanup started: docker build cache and old RAP tmp artifacts"
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker builder prune -af >>"$log_file" 2>&1 || true
|
|
docker image prune -f >>"$log_file" 2>&1 || true
|
|
fi
|
|
find /tmp -maxdepth 1 \( \
|
|
-name 'rap-android-build-*' -o \
|
|
-name 'rap-agent-build*' -o \
|
|
-name 'rap-backend-build*' -o \
|
|
-name 'rap-c*-build*' -o \
|
|
-name 'rap-node-agent-*' -o \
|
|
-name 'rap-*vpnfarm*' -o \
|
|
-name 'rap-build-*' \
|
|
\) -mmin +"$MIN_TMP_AGE_MINUTES" -exec rm -rf {} + >>"$log_file" 2>&1 || true
|
|
sync || true
|
|
log "cleanup finished"
|
|
}
|
|
|
|
expansion_hint() {
|
|
root_source="$(df -P "$MOUNT_PATH" | awk 'NR==2 { print $1 }')"
|
|
vg_free=""
|
|
root_lv_bytes=""
|
|
parent_part_bytes=""
|
|
if command -v vgs >/dev/null 2>&1; then
|
|
vg_free="$(vgs --noheadings --units g -o vg_free 2>/dev/null | awk '{print $1}' | tr '\n' ' ' | sed 's/^ *//;s/ *$//')"
|
|
fi
|
|
if command -v lsblk >/dev/null 2>&1; then
|
|
root_lv_bytes="$(lsblk -b -n -o TYPE,SIZE,MOUNTPOINTS 2>/dev/null | awk '$1 == "lvm" && $3 == "/" { print $2; exit }')"
|
|
parent_part_bytes="$(lsblk -b -n -o TYPE,SIZE 2>/dev/null | awk '$1 == "part" { last=$2 } $1 == "lvm" { print last; exit }')"
|
|
fi
|
|
if printf '%s' "$root_source" | grep -q '^/dev/mapper/'; then
|
|
if [ -n "$root_lv_bytes" ] && [ -n "$parent_part_bytes" ] && [ "$parent_part_bytes" -gt "$((root_lv_bytes + 1073741824))" ]; then
|
|
printf 'LVM root detected on %s. Backing partition is larger than root LV, so expansion is likely available. Run with sudo: sudo lvextend -r -l +100%%FREE %s' "$root_source" "$root_source"
|
|
return
|
|
fi
|
|
if [ -n "$vg_free" ] && ! printf '%s' "$vg_free" | grep -Eq '(^| )0(\.00)?g( |$)'; then
|
|
printf 'LVM root detected on %s. If approved, extend inside existing VG: sudo lvextend -r -l +100%%FREE %s' "$root_source" "$root_source"
|
|
else
|
|
printf 'LVM root detected on %s. No obvious free VG space. Expand VM disk, then run pvresize on the PV and lvextend -r for root LV.' "$root_source"
|
|
fi
|
|
else
|
|
printf 'Root filesystem is %s. Expand underlying disk/volume, then grow filesystem according to host partition layout.' "$root_source"
|
|
fi
|
|
}
|
|
|
|
notify() {
|
|
level="$1"
|
|
message="$2"
|
|
if [ -n "$WEBHOOK_URL" ] && command -v curl >/dev/null 2>&1; then
|
|
payload="$(printf '{"level":"%s","message":"%s","host":"%s","observed_at":"%s"}' \
|
|
"$(json_escape "$level")" \
|
|
"$(json_escape "$message")" \
|
|
"$(json_escape "$(hostname)")" \
|
|
"$(date -u +%Y-%m-%dT%H:%M:%SZ)")"
|
|
curl -fsS -m 5 -H 'Content-Type: application/json' -d "$payload" "$WEBHOOK_URL" >>"$log_file" 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
before_percent="$(disk_used_percent)"
|
|
action="none"
|
|
level="ok"
|
|
|
|
if [ "$before_percent" -ge "$CLEANUP_PERCENT" ]; then
|
|
action="cleanup_safe"
|
|
cleanup_safe
|
|
fi
|
|
|
|
after_percent="$(disk_used_percent)"
|
|
if [ "$after_percent" -ge "$CRITICAL_PERCENT" ]; then
|
|
level="critical"
|
|
elif [ "$after_percent" -ge "$WARN_PERCENT" ]; then
|
|
level="warning"
|
|
fi
|
|
|
|
hint="$(expansion_hint)"
|
|
summary="$(docker_df_summary)"
|
|
finished_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
|
|
cat >"$status_file.tmp" <<EOF_STATUS
|
|
{
|
|
"schema_version": "rap.ops.test_docker_disk_guard.v1",
|
|
"host": "$(json_escape "$(hostname)")",
|
|
"mount_path": "$(json_escape "$MOUNT_PATH")",
|
|
"started_at": "$(json_escape "$started_at")",
|
|
"finished_at": "$(json_escape "$finished_at")",
|
|
"level": "$(json_escape "$level")",
|
|
"action": "$(json_escape "$action")",
|
|
"thresholds": {
|
|
"warn_percent": $WARN_PERCENT,
|
|
"cleanup_percent": $CLEANUP_PERCENT,
|
|
"critical_percent": $CRITICAL_PERCENT
|
|
},
|
|
"disk": {
|
|
"before_used_percent": $before_percent,
|
|
"after_used_percent": $after_percent,
|
|
"size": "$(json_escape "$(disk_size_human)")",
|
|
"used": "$(json_escape "$(disk_used_human)")",
|
|
"available": "$(json_escape "$(disk_avail_human)")"
|
|
},
|
|
"docker_system_df": "$(json_escape "$summary")",
|
|
"expansion_hint": "$(json_escape "$hint")",
|
|
"log_file": "$(json_escape "$log_file")"
|
|
}
|
|
EOF_STATUS
|
|
mv "$status_file.tmp" "$status_file"
|
|
|
|
message="test-docker disk ${level}: ${after_percent}% used after action=${action}. ${hint}"
|
|
log "$message"
|
|
if [ "$level" != "ok" ]; then
|
|
notify "$level" "$message"
|
|
fi
|
|
|
|
if [ "$level" = "critical" ]; then
|
|
exit 2
|
|
fi
|
|
exit 0
|