1
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATUS_FILE="${STATUS_FILE:-/tmp/rap-web-admin/html/downloads/ops/test-docker-cluster-guard-status.json}"
|
||||
LOCK_FILE="${LOCK_FILE:-/tmp/rap-cluster-guard.lock}"
|
||||
DISK_WARN="${DISK_WARN:-85}"
|
||||
DISK_CRITICAL="${DISK_CRITICAL:-92}"
|
||||
DOWNLOADS_DIR="${DOWNLOADS_DIR:-/tmp/rap-web-admin/html/downloads}"
|
||||
KEEP_DIR="${KEEP_DIR:-/tmp/rap-web-admin-downloads.keep}"
|
||||
BACKEND_URL="${BACKEND_URL:-http://127.0.0.1:18080}"
|
||||
PUBLIC_URL="${PUBLIC_URL:-http://195.123.240.88:19131}"
|
||||
CLUSTER_ID="${CLUSTER_ID:-cfc0743d-d960-49fb-9de8-96e063d5e4aa}"
|
||||
|
||||
mkdir -p "$(dirname "$STATUS_FILE")"
|
||||
|
||||
exec 9>"$LOCK_FILE"
|
||||
if ! flock -n 9; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
actions=()
|
||||
errors=()
|
||||
|
||||
json_escape() {
|
||||
sed 's/\\/\\\\/g; s/"/\\"/g' <<<"$1"
|
||||
}
|
||||
|
||||
record_action() {
|
||||
actions+=("$1")
|
||||
}
|
||||
|
||||
record_error() {
|
||||
errors+=("$1")
|
||||
}
|
||||
|
||||
disk_used_percent() {
|
||||
df -P / | awk 'NR==2 {gsub("%","",$5); print $5}'
|
||||
}
|
||||
|
||||
cleanup_rap_artifacts() {
|
||||
if [ -d "$DOWNLOADS_DIR" ]; then
|
||||
find "$DOWNLOADS_DIR" -maxdepth 1 -type f \
|
||||
\( -name 'rap-node-agent-0.2.*' -o -name 'rap-host-agent-0.2.*' \) \
|
||||
! -name '*0.2.260-vpnfarm*' \
|
||||
! -name '*0.2.261-vpnfarm*' \
|
||||
! -name '*latest*' \
|
||||
-delete 2>/dev/null || true
|
||||
fi
|
||||
if [ -d "$KEEP_DIR" ]; then
|
||||
find "$KEEP_DIR" -maxdepth 1 -type f \
|
||||
\( -name 'rap-node-agent-*.tar' -o -name 'rap-host-agent-*' \) \
|
||||
! -name '*0.2.260*' \
|
||||
! -name '*0.2.261*' \
|
||||
-delete 2>/dev/null || true
|
||||
fi
|
||||
find /tmp -maxdepth 1 -type f -name 'rap-web-admin-dist-*.tar' -mtime +1 -delete 2>/dev/null || true
|
||||
find /tmp -maxdepth 1 -type d -name 'rap-web-admin-dist-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
|
||||
find /tmp -maxdepth 1 -type d -name 'rap-repo-build-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
|
||||
record_action "cleanup_rap_artifacts"
|
||||
}
|
||||
|
||||
ensure_container() {
|
||||
local name="$1"
|
||||
local running
|
||||
running="$(docker inspect -f '{{.State.Running}}' "$name" 2>/dev/null || echo missing)"
|
||||
if [ "$running" != "true" ]; then
|
||||
if docker start "$name" >/dev/null 2>&1; then
|
||||
record_action "start_container:$name"
|
||||
else
|
||||
record_error "start_container_failed:$name"
|
||||
fi
|
||||
return
|
||||
fi
|
||||
local health
|
||||
health="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' "$name" 2>/dev/null || echo missing)"
|
||||
if [ "$health" = "unhealthy" ]; then
|
||||
if docker restart "$name" >/dev/null 2>&1; then
|
||||
record_action "restart_unhealthy:$name"
|
||||
else
|
||||
record_error "restart_unhealthy_failed:$name"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
redis_guard() {
|
||||
if docker exec rap_test_redis redis-cli PING >/dev/null 2>&1; then
|
||||
docker exec rap_test_redis redis-cli CONFIG SET stop-writes-on-bgsave-error no >/dev/null 2>&1 || true
|
||||
docker exec rap_test_redis redis-cli BGSAVE >/dev/null 2>&1 || true
|
||||
record_action "redis_guard"
|
||||
else
|
||||
record_error "redis_ping_failed"
|
||||
docker restart rap_test_redis >/dev/null 2>&1 && record_action "restart_container:rap_test_redis" || true
|
||||
fi
|
||||
}
|
||||
|
||||
probe_http() {
|
||||
local name="$1"
|
||||
local url="$2"
|
||||
if ! curl -fsS -m 8 "$url" >/dev/null 2>&1; then
|
||||
record_error "http_probe_failed:$name"
|
||||
docker restart rap_web_admin rap_test_backend >/dev/null 2>&1 && record_action "restart_web_backend_for:$name" || true
|
||||
fi
|
||||
}
|
||||
|
||||
used_before="$(disk_used_percent)"
|
||||
if [ "$used_before" -ge "$DISK_WARN" ]; then
|
||||
cleanup_rap_artifacts
|
||||
docker builder prune -af --filter 'until=24h' >/dev/null 2>&1 && record_action "docker_builder_prune" || true
|
||||
docker image prune -af --filter 'until=168h' >/dev/null 2>&1 && record_action "docker_image_prune" || true
|
||||
docker container prune -f >/dev/null 2>&1 && record_action "docker_container_prune" || true
|
||||
fi
|
||||
|
||||
for container in rap_test_postgres rap_test_redis rap_test_backend rap_web_admin; do
|
||||
ensure_container "$container"
|
||||
done
|
||||
|
||||
redis_guard
|
||||
probe_http "downloads" "$BACKEND_URL/downloads/rap-android-rdp-vpn-build.json"
|
||||
probe_http "web_admin_root" "$BACKEND_URL/"
|
||||
probe_http "diagnostics" "$PUBLIC_URL/api/v1/clusters/$CLUSTER_ID/vpn/client-diagnostics"
|
||||
|
||||
used_after="$(disk_used_percent)"
|
||||
status="ok"
|
||||
if [ "$used_after" -ge "$DISK_CRITICAL" ]; then
|
||||
status="critical"
|
||||
elif [ "$used_after" -ge "$DISK_WARN" ] || [ "${#errors[@]}" -gt 0 ]; then
|
||||
status="warning"
|
||||
fi
|
||||
|
||||
actions_json="$(printf '%s\n' "${actions[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
|
||||
errors_json="$(printf '%s\n' "${errors[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
|
||||
observed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
free_bytes="$(df -PB1 / | awk 'NR==2 {print $4}')"
|
||||
total_bytes="$(df -PB1 / | awk 'NR==2 {print $2}')"
|
||||
|
||||
cat >"$STATUS_FILE.tmp" <<JSON
|
||||
{
|
||||
"schema_version": "rap.test_docker_cluster_guard.v1",
|
||||
"status": "$(json_escape "$status")",
|
||||
"observed_at": "$observed_at",
|
||||
"disk": {
|
||||
"path": "/",
|
||||
"used_percent_before": $used_before,
|
||||
"used_percent": $used_after,
|
||||
"free_bytes": $free_bytes,
|
||||
"total_bytes": $total_bytes
|
||||
},
|
||||
"actions": [${actions_json}],
|
||||
"errors": [${errors_json}]
|
||||
}
|
||||
JSON
|
||||
mv "$STATUS_FILE.tmp" "$STATUS_FILE"
|
||||
Reference in New Issue
Block a user