Files
rdp-proxy/scripts/ops/test-docker-cluster-guard.sh
2026-05-18 21:33:39 +03:00

153 lines
5.0 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
STATUS_FILE="${STATUS_FILE:-/tmp/rap-web-admin/html/downloads/ops/test-docker-cluster-guard-status.json}"
LOCK_FILE="${LOCK_FILE:-/tmp/rap-cluster-guard.lock}"
DISK_WARN="${DISK_WARN:-85}"
DISK_CRITICAL="${DISK_CRITICAL:-92}"
DOWNLOADS_DIR="${DOWNLOADS_DIR:-/tmp/rap-web-admin/html/downloads}"
KEEP_DIR="${KEEP_DIR:-/tmp/rap-web-admin-downloads.keep}"
BACKEND_URL="${BACKEND_URL:-http://127.0.0.1:18080}"
PUBLIC_URL="${PUBLIC_URL:-http://195.123.240.88:19131}"
CLUSTER_ID="${CLUSTER_ID:-cfc0743d-d960-49fb-9de8-96e063d5e4aa}"
mkdir -p "$(dirname "$STATUS_FILE")"
exec 9>"$LOCK_FILE"
if ! flock -n 9; then
exit 0
fi
actions=()
errors=()
json_escape() {
sed 's/\\/\\\\/g; s/"/\\"/g' <<<"$1"
}
record_action() {
actions+=("$1")
}
record_error() {
errors+=("$1")
}
disk_used_percent() {
df -P / | awk 'NR==2 {gsub("%","",$5); print $5}'
}
cleanup_rap_artifacts() {
if [ -d "$DOWNLOADS_DIR" ]; then
find "$DOWNLOADS_DIR" -maxdepth 1 -type f \
\( -name 'rap-node-agent-0.2.*' -o -name 'rap-host-agent-0.2.*' \) \
! -name '*0.2.260-vpnfarm*' \
! -name '*0.2.261-vpnfarm*' \
! -name '*latest*' \
-delete 2>/dev/null || true
fi
if [ -d "$KEEP_DIR" ]; then
find "$KEEP_DIR" -maxdepth 1 -type f \
\( -name 'rap-node-agent-*.tar' -o -name 'rap-host-agent-*' \) \
! -name '*0.2.260*' \
! -name '*0.2.261*' \
-delete 2>/dev/null || true
fi
find /tmp -maxdepth 1 -type f -name 'rap-web-admin-dist-*.tar' -mtime +1 -delete 2>/dev/null || true
find /tmp -maxdepth 1 -type d -name 'rap-web-admin-dist-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
find /tmp -maxdepth 1 -type d -name 'rap-repo-build-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
record_action "cleanup_rap_artifacts"
}
ensure_container() {
local name="$1"
local running
running="$(docker inspect -f '{{.State.Running}}' "$name" 2>/dev/null || echo missing)"
if [ "$running" != "true" ]; then
if docker start "$name" >/dev/null 2>&1; then
record_action "start_container:$name"
else
record_error "start_container_failed:$name"
fi
return
fi
local health
health="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' "$name" 2>/dev/null || echo missing)"
if [ "$health" = "unhealthy" ]; then
if docker restart "$name" >/dev/null 2>&1; then
record_action "restart_unhealthy:$name"
else
record_error "restart_unhealthy_failed:$name"
fi
fi
}
redis_guard() {
if docker exec rap_test_redis redis-cli PING >/dev/null 2>&1; then
docker exec rap_test_redis redis-cli CONFIG SET stop-writes-on-bgsave-error no >/dev/null 2>&1 || true
docker exec rap_test_redis redis-cli BGSAVE >/dev/null 2>&1 || true
record_action "redis_guard"
else
record_error "redis_ping_failed"
docker restart rap_test_redis >/dev/null 2>&1 && record_action "restart_container:rap_test_redis" || true
fi
}
probe_http() {
local name="$1"
local url="$2"
if ! curl -fsS -m 8 "$url" >/dev/null 2>&1; then
record_error "http_probe_failed:$name"
docker restart rap_web_admin rap_test_backend >/dev/null 2>&1 && record_action "restart_web_backend_for:$name" || true
fi
}
used_before="$(disk_used_percent)"
if [ "$used_before" -ge "$DISK_WARN" ]; then
cleanup_rap_artifacts
docker builder prune -af --filter 'until=24h' >/dev/null 2>&1 && record_action "docker_builder_prune" || true
docker image prune -af --filter 'until=168h' >/dev/null 2>&1 && record_action "docker_image_prune" || true
docker container prune -f >/dev/null 2>&1 && record_action "docker_container_prune" || true
fi
for container in rap_test_postgres rap_test_redis rap_test_backend rap_web_admin; do
ensure_container "$container"
done
redis_guard
probe_http "downloads" "$BACKEND_URL/downloads/rap-android-vpn-build.json"
probe_http "web_admin_root" "$BACKEND_URL/"
probe_http "backend_healthz" "http://127.0.0.1:18121/healthz"
used_after="$(disk_used_percent)"
status="ok"
if [ "$used_after" -ge "$DISK_CRITICAL" ]; then
status="critical"
elif [ "$used_after" -ge "$DISK_WARN" ] || [ "${#errors[@]}" -gt 0 ]; then
status="warning"
fi
actions_json="$(printf '%s\n' "${actions[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
errors_json="$(printf '%s\n' "${errors[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
observed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
free_bytes="$(df -PB1 / | awk 'NR==2 {print $4}')"
total_bytes="$(df -PB1 / | awk 'NR==2 {print $2}')"
cat >"$STATUS_FILE.tmp" <<JSON
{
"schema_version": "rap.test_docker_cluster_guard.v1",
"status": "$(json_escape "$status")",
"observed_at": "$observed_at",
"disk": {
"path": "/",
"used_percent_before": $used_before,
"used_percent": $used_after,
"free_bytes": $free_bytes,
"total_bytes": $total_bytes
},
"actions": [${actions_json}],
"errors": [${errors_json}]
}
JSON
mv "$STATUS_FILE.tmp" "$STATUS_FILE"