153 lines
5.0 KiB
Bash
153 lines
5.0 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
STATUS_FILE="${STATUS_FILE:-/tmp/rap-web-admin/html/downloads/ops/test-docker-cluster-guard-status.json}"
|
|
LOCK_FILE="${LOCK_FILE:-/tmp/rap-cluster-guard.lock}"
|
|
DISK_WARN="${DISK_WARN:-85}"
|
|
DISK_CRITICAL="${DISK_CRITICAL:-92}"
|
|
DOWNLOADS_DIR="${DOWNLOADS_DIR:-/tmp/rap-web-admin/html/downloads}"
|
|
KEEP_DIR="${KEEP_DIR:-/tmp/rap-web-admin-downloads.keep}"
|
|
BACKEND_URL="${BACKEND_URL:-http://127.0.0.1:18080}"
|
|
PUBLIC_URL="${PUBLIC_URL:-http://195.123.240.88:19131}"
|
|
CLUSTER_ID="${CLUSTER_ID:-cfc0743d-d960-49fb-9de8-96e063d5e4aa}"
|
|
|
|
mkdir -p "$(dirname "$STATUS_FILE")"
|
|
|
|
exec 9>"$LOCK_FILE"
|
|
if ! flock -n 9; then
|
|
exit 0
|
|
fi
|
|
|
|
actions=()
|
|
errors=()
|
|
|
|
json_escape() {
|
|
sed 's/\\/\\\\/g; s/"/\\"/g' <<<"$1"
|
|
}
|
|
|
|
record_action() {
|
|
actions+=("$1")
|
|
}
|
|
|
|
record_error() {
|
|
errors+=("$1")
|
|
}
|
|
|
|
disk_used_percent() {
|
|
df -P / | awk 'NR==2 {gsub("%","",$5); print $5}'
|
|
}
|
|
|
|
cleanup_rap_artifacts() {
|
|
if [ -d "$DOWNLOADS_DIR" ]; then
|
|
find "$DOWNLOADS_DIR" -maxdepth 1 -type f \
|
|
\( -name 'rap-node-agent-0.2.*' -o -name 'rap-host-agent-0.2.*' \) \
|
|
! -name '*0.2.260-vpnfarm*' \
|
|
! -name '*0.2.261-vpnfarm*' \
|
|
! -name '*latest*' \
|
|
-delete 2>/dev/null || true
|
|
fi
|
|
if [ -d "$KEEP_DIR" ]; then
|
|
find "$KEEP_DIR" -maxdepth 1 -type f \
|
|
\( -name 'rap-node-agent-*.tar' -o -name 'rap-host-agent-*' \) \
|
|
! -name '*0.2.260*' \
|
|
! -name '*0.2.261*' \
|
|
-delete 2>/dev/null || true
|
|
fi
|
|
find /tmp -maxdepth 1 -type f -name 'rap-web-admin-dist-*.tar' -mtime +1 -delete 2>/dev/null || true
|
|
find /tmp -maxdepth 1 -type d -name 'rap-web-admin-dist-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
|
|
find /tmp -maxdepth 1 -type d -name 'rap-repo-build-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
|
|
record_action "cleanup_rap_artifacts"
|
|
}
|
|
|
|
ensure_container() {
|
|
local name="$1"
|
|
local running
|
|
running="$(docker inspect -f '{{.State.Running}}' "$name" 2>/dev/null || echo missing)"
|
|
if [ "$running" != "true" ]; then
|
|
if docker start "$name" >/dev/null 2>&1; then
|
|
record_action "start_container:$name"
|
|
else
|
|
record_error "start_container_failed:$name"
|
|
fi
|
|
return
|
|
fi
|
|
local health
|
|
health="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' "$name" 2>/dev/null || echo missing)"
|
|
if [ "$health" = "unhealthy" ]; then
|
|
if docker restart "$name" >/dev/null 2>&1; then
|
|
record_action "restart_unhealthy:$name"
|
|
else
|
|
record_error "restart_unhealthy_failed:$name"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
redis_guard() {
|
|
if docker exec rap_test_redis redis-cli PING >/dev/null 2>&1; then
|
|
docker exec rap_test_redis redis-cli CONFIG SET stop-writes-on-bgsave-error no >/dev/null 2>&1 || true
|
|
docker exec rap_test_redis redis-cli BGSAVE >/dev/null 2>&1 || true
|
|
record_action "redis_guard"
|
|
else
|
|
record_error "redis_ping_failed"
|
|
docker restart rap_test_redis >/dev/null 2>&1 && record_action "restart_container:rap_test_redis" || true
|
|
fi
|
|
}
|
|
|
|
probe_http() {
|
|
local name="$1"
|
|
local url="$2"
|
|
if ! curl -fsS -m 8 "$url" >/dev/null 2>&1; then
|
|
record_error "http_probe_failed:$name"
|
|
docker restart rap_web_admin rap_test_backend >/dev/null 2>&1 && record_action "restart_web_backend_for:$name" || true
|
|
fi
|
|
}
|
|
|
|
used_before="$(disk_used_percent)"
|
|
if [ "$used_before" -ge "$DISK_WARN" ]; then
|
|
cleanup_rap_artifacts
|
|
docker builder prune -af --filter 'until=24h' >/dev/null 2>&1 && record_action "docker_builder_prune" || true
|
|
docker image prune -af --filter 'until=168h' >/dev/null 2>&1 && record_action "docker_image_prune" || true
|
|
docker container prune -f >/dev/null 2>&1 && record_action "docker_container_prune" || true
|
|
fi
|
|
|
|
for container in rap_test_postgres rap_test_redis rap_test_backend rap_web_admin; do
|
|
ensure_container "$container"
|
|
done
|
|
|
|
redis_guard
|
|
probe_http "downloads" "$BACKEND_URL/downloads/rap-android-vpn-build.json"
|
|
probe_http "web_admin_root" "$BACKEND_URL/"
|
|
probe_http "backend_healthz" "http://127.0.0.1:18121/healthz"
|
|
|
|
used_after="$(disk_used_percent)"
|
|
status="ok"
|
|
if [ "$used_after" -ge "$DISK_CRITICAL" ]; then
|
|
status="critical"
|
|
elif [ "$used_after" -ge "$DISK_WARN" ] || [ "${#errors[@]}" -gt 0 ]; then
|
|
status="warning"
|
|
fi
|
|
|
|
actions_json="$(printf '%s\n' "${actions[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
|
|
errors_json="$(printf '%s\n' "${errors[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
|
|
observed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
free_bytes="$(df -PB1 / | awk 'NR==2 {print $4}')"
|
|
total_bytes="$(df -PB1 / | awk 'NR==2 {print $2}')"
|
|
|
|
cat >"$STATUS_FILE.tmp" <<JSON
|
|
{
|
|
"schema_version": "rap.test_docker_cluster_guard.v1",
|
|
"status": "$(json_escape "$status")",
|
|
"observed_at": "$observed_at",
|
|
"disk": {
|
|
"path": "/",
|
|
"used_percent_before": $used_before,
|
|
"used_percent": $used_after,
|
|
"free_bytes": $free_bytes,
|
|
"total_bytes": $total_bytes
|
|
},
|
|
"actions": [${actions_json}],
|
|
"errors": [${errors_json}]
|
|
}
|
|
JSON
|
|
mv "$STATUS_FILE.tmp" "$STATUS_FILE"
|