This commit is contained in:
2026-05-14 23:30:34 +03:00
parent 26cb65e936
commit 04c46042d9
239 changed files with 34102 additions and 438 deletions
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -euo pipefail
DISK="${DISK:-/dev/sda}"
PARTITION_NUMBER="${PARTITION_NUMBER:-3}"
PARTITION="${PARTITION:-${DISK}${PARTITION_NUMBER}}"
LV_PATH="${LV_PATH:-/dev/ubuntu-vg/ubuntu-lv}"
if [ "$(id -u)" -ne 0 ]; then
echo "This script must run as root. Use: sudo $0" >&2
exit 1
fi
echo "Before:"
lsblk -o NAME,SIZE,TYPE,FSTYPE,MOUNTPOINTS "$DISK"
df -h /
pvs
vgs
lvs
echo "Growing partition ${PARTITION} to fill ${DISK}..."
growpart "$DISK" "$PARTITION_NUMBER"
echo "Resizing LVM physical volume ${PARTITION}..."
pvresize "$PARTITION"
echo "Extending ${LV_PATH} to all free VG space and resizing filesystem..."
lvextend -l +100%FREE -r "$LV_PATH"
echo "After:"
lsblk -o NAME,SIZE,TYPE,FSTYPE,MOUNTPOINTS "$DISK"
df -h /
pvs
vgs
lvs
+152
View File
@@ -0,0 +1,152 @@
#!/usr/bin/env bash
set -euo pipefail
STATUS_FILE="${STATUS_FILE:-/tmp/rap-web-admin/html/downloads/ops/test-docker-cluster-guard-status.json}"
LOCK_FILE="${LOCK_FILE:-/tmp/rap-cluster-guard.lock}"
DISK_WARN="${DISK_WARN:-85}"
DISK_CRITICAL="${DISK_CRITICAL:-92}"
DOWNLOADS_DIR="${DOWNLOADS_DIR:-/tmp/rap-web-admin/html/downloads}"
KEEP_DIR="${KEEP_DIR:-/tmp/rap-web-admin-downloads.keep}"
BACKEND_URL="${BACKEND_URL:-http://127.0.0.1:18080}"
PUBLIC_URL="${PUBLIC_URL:-http://195.123.240.88:19131}"
CLUSTER_ID="${CLUSTER_ID:-cfc0743d-d960-49fb-9de8-96e063d5e4aa}"
mkdir -p "$(dirname "$STATUS_FILE")"
exec 9>"$LOCK_FILE"
if ! flock -n 9; then
exit 0
fi
actions=()
errors=()
json_escape() {
sed 's/\\/\\\\/g; s/"/\\"/g' <<<"$1"
}
record_action() {
actions+=("$1")
}
record_error() {
errors+=("$1")
}
disk_used_percent() {
df -P / | awk 'NR==2 {gsub("%","",$5); print $5}'
}
cleanup_rap_artifacts() {
if [ -d "$DOWNLOADS_DIR" ]; then
find "$DOWNLOADS_DIR" -maxdepth 1 -type f \
\( -name 'rap-node-agent-0.2.*' -o -name 'rap-host-agent-0.2.*' \) \
! -name '*0.2.260-vpnfarm*' \
! -name '*0.2.261-vpnfarm*' \
! -name '*latest*' \
-delete 2>/dev/null || true
fi
if [ -d "$KEEP_DIR" ]; then
find "$KEEP_DIR" -maxdepth 1 -type f \
\( -name 'rap-node-agent-*.tar' -o -name 'rap-host-agent-*' \) \
! -name '*0.2.260*' \
! -name '*0.2.261*' \
-delete 2>/dev/null || true
fi
find /tmp -maxdepth 1 -type f -name 'rap-web-admin-dist-*.tar' -mtime +1 -delete 2>/dev/null || true
find /tmp -maxdepth 1 -type d -name 'rap-web-admin-dist-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
find /tmp -maxdepth 1 -type d -name 'rap-repo-build-*' -mtime +1 -exec rm -rf {} + 2>/dev/null || true
record_action "cleanup_rap_artifacts"
}
ensure_container() {
local name="$1"
local running
running="$(docker inspect -f '{{.State.Running}}' "$name" 2>/dev/null || echo missing)"
if [ "$running" != "true" ]; then
if docker start "$name" >/dev/null 2>&1; then
record_action "start_container:$name"
else
record_error "start_container_failed:$name"
fi
return
fi
local health
health="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' "$name" 2>/dev/null || echo missing)"
if [ "$health" = "unhealthy" ]; then
if docker restart "$name" >/dev/null 2>&1; then
record_action "restart_unhealthy:$name"
else
record_error "restart_unhealthy_failed:$name"
fi
fi
}
redis_guard() {
if docker exec rap_test_redis redis-cli PING >/dev/null 2>&1; then
docker exec rap_test_redis redis-cli CONFIG SET stop-writes-on-bgsave-error no >/dev/null 2>&1 || true
docker exec rap_test_redis redis-cli BGSAVE >/dev/null 2>&1 || true
record_action "redis_guard"
else
record_error "redis_ping_failed"
docker restart rap_test_redis >/dev/null 2>&1 && record_action "restart_container:rap_test_redis" || true
fi
}
probe_http() {
local name="$1"
local url="$2"
if ! curl -fsS -m 8 "$url" >/dev/null 2>&1; then
record_error "http_probe_failed:$name"
docker restart rap_web_admin rap_test_backend >/dev/null 2>&1 && record_action "restart_web_backend_for:$name" || true
fi
}
used_before="$(disk_used_percent)"
if [ "$used_before" -ge "$DISK_WARN" ]; then
cleanup_rap_artifacts
docker builder prune -af --filter 'until=24h' >/dev/null 2>&1 && record_action "docker_builder_prune" || true
docker image prune -af --filter 'until=168h' >/dev/null 2>&1 && record_action "docker_image_prune" || true
docker container prune -f >/dev/null 2>&1 && record_action "docker_container_prune" || true
fi
for container in rap_test_postgres rap_test_redis rap_test_backend rap_web_admin; do
ensure_container "$container"
done
redis_guard
probe_http "downloads" "$BACKEND_URL/downloads/rap-android-rdp-vpn-build.json"
probe_http "web_admin_root" "$BACKEND_URL/"
probe_http "diagnostics" "$PUBLIC_URL/api/v1/clusters/$CLUSTER_ID/vpn/client-diagnostics"
used_after="$(disk_used_percent)"
status="ok"
if [ "$used_after" -ge "$DISK_CRITICAL" ]; then
status="critical"
elif [ "$used_after" -ge "$DISK_WARN" ] || [ "${#errors[@]}" -gt 0 ]; then
status="warning"
fi
actions_json="$(printf '%s\n' "${actions[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
errors_json="$(printf '%s\n' "${errors[@]:-}" | awk 'NF {gsub(/\\/,"\\\\"); gsub(/"/,"\\\""); printf "%s\"%s\"", sep, $0; sep=","}')"
observed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
free_bytes="$(df -PB1 / | awk 'NR==2 {print $4}')"
total_bytes="$(df -PB1 / | awk 'NR==2 {print $2}')"
cat >"$STATUS_FILE.tmp" <<JSON
{
"schema_version": "rap.test_docker_cluster_guard.v1",
"status": "$(json_escape "$status")",
"observed_at": "$observed_at",
"disk": {
"path": "/",
"used_percent_before": $used_before,
"used_percent": $used_after,
"free_bytes": $free_bytes,
"total_bytes": $total_bytes
},
"actions": [${actions_json}],
"errors": [${errors_json}]
}
JSON
mv "$STATUS_FILE.tmp" "$STATUS_FILE"
+110
View File
@@ -0,0 +1,110 @@
param(
[string]$SshAlias = "test-docker",
[switch]$InstallCron,
[switch]$RunOnce,
[int]$WarnPercent = 85,
[int]$CleanupPercent = 85,
[int]$CriticalPercent = 95,
[int]$MinTmpAgeMinutes = 360,
[string]$RemoteScriptPath = "/home/test/bin/rap-test-docker-disk-guard",
[string]$StatusUrl = "http://docker-test.cin.su:18080/downloads/ops/test-docker-disk-guard-status.json"
)
$ErrorActionPreference = "Stop"
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$localScript = Join-Path $scriptDir "test-docker-disk-guard.sh"
if (-not (Test-Path $localScript)) {
throw "Guard script not found: $localScript"
}
function Invoke-Remote {
param([string]$Command)
& ssh $SshAlias $Command
if ($LASTEXITCODE -ne 0) {
throw "ssh command failed with exit code $LASTEXITCODE"
}
}
function Test-RemoteCommand {
param([string]$CommandName)
& ssh $SshAlias "command -v $CommandName >/dev/null 2>&1"
return $LASTEXITCODE -eq 0
}
$remoteUploadPath = "/home/test/.rap-test-docker-disk-guard.upload"
Write-Host "Uploading disk guard to ${SshAlias}:${RemoteScriptPath}"
& scp $localScript "${SshAlias}:$remoteUploadPath"
if ($LASTEXITCODE -ne 0) {
throw "scp failed with exit code $LASTEXITCODE"
}
Invoke-Remote "mkdir -p `$(dirname $RemoteScriptPath) && install -m 0755 $remoteUploadPath $RemoteScriptPath"
$envPrefix = "WARN_PERCENT=$WarnPercent CLEANUP_PERCENT=$CleanupPercent CRITICAL_PERCENT=$CriticalPercent MIN_TMP_AGE_MINUTES=$MinTmpAgeMinutes"
$runCommand = "$envPrefix $RemoteScriptPath"
if ($InstallCron) {
if (Test-RemoteCommand "crontab") {
$cronLine = "*/15 * * * * $runCommand >/tmp/rap-ops/test-docker-disk-guard.cron.log 2>&1"
$escapedCronLine = $cronLine.Replace("'", "'\''")
$installCronCommand = "(crontab -l 2>/dev/null | grep -v 'rap-test-docker-disk-guard'; printf '%s\n' '$escapedCronLine') | crontab -"
Write-Host "Installing cron: $cronLine"
Invoke-Remote $installCronCommand
} else {
Write-Host "crontab is not available; installing user systemd timer."
$serviceContent = @"
[Unit]
Description=RAP test-docker disk guard
[Service]
Type=oneshot
Environment=WARN_PERCENT=$WarnPercent
Environment=CLEANUP_PERCENT=$CleanupPercent
Environment=CRITICAL_PERCENT=$CriticalPercent
Environment=MIN_TMP_AGE_MINUTES=$MinTmpAgeMinutes
ExecStart=$RemoteScriptPath
"@
$timerContent = @"
[Unit]
Description=Run RAP test-docker disk guard every 15 minutes
[Timer]
OnBootSec=2min
OnUnitActiveSec=15min
AccuracySec=1min
Persistent=true
[Install]
WantedBy=timers.target
"@
$tmpBase = Join-Path ([System.IO.Path]::GetTempPath()) ("rap-disk-guard-" + [System.Guid]::NewGuid().ToString("N"))
New-Item -ItemType Directory -Path $tmpBase | Out-Null
$servicePath = Join-Path $tmpBase "rap-test-docker-disk-guard.service"
$timerPath = Join-Path $tmpBase "rap-test-docker-disk-guard.timer"
Set-Content -Path $servicePath -Value $serviceContent -Encoding ascii
Set-Content -Path $timerPath -Value $timerContent -Encoding ascii
Invoke-Remote "mkdir -p /home/test/.config/systemd/user"
& scp $servicePath "${SshAlias}:/home/test/.config/systemd/user/rap-test-docker-disk-guard.service"
if ($LASTEXITCODE -ne 0) { throw "scp service failed with exit code $LASTEXITCODE" }
& scp $timerPath "${SshAlias}:/home/test/.config/systemd/user/rap-test-docker-disk-guard.timer"
if ($LASTEXITCODE -ne 0) { throw "scp timer failed with exit code $LASTEXITCODE" }
Remove-Item -Recurse -Force -LiteralPath $tmpBase
Invoke-Remote "systemctl --user daemon-reload && systemctl --user enable --now rap-test-docker-disk-guard.timer && systemctl --user start rap-test-docker-disk-guard.service"
}
}
if ($RunOnce -or -not $InstallCron) {
Write-Host "Running disk guard once..."
& ssh $SshAlias $runCommand
$exitCode = $LASTEXITCODE
if ($exitCode -ne 0 -and $exitCode -ne 2) {
throw "disk guard failed with exit code $exitCode"
}
if ($exitCode -eq 2) {
Write-Warning "Disk guard reports critical state after cleanup."
}
}
Write-Host "Status: $StatusUrl"
+167
View File
@@ -0,0 +1,167 @@
#!/usr/bin/env sh
set -eu
WARN_PERCENT="${WARN_PERCENT:-85}"
CLEANUP_PERCENT="${CLEANUP_PERCENT:-85}"
CRITICAL_PERCENT="${CRITICAL_PERCENT:-95}"
MIN_TMP_AGE_MINUTES="${MIN_TMP_AGE_MINUTES:-360}"
MOUNT_PATH="${MOUNT_PATH:-/}"
STATUS_DIR="${STATUS_DIR:-/tmp/rap-web-admin/html/downloads/ops}"
LOG_DIR="${LOG_DIR:-/tmp/rap-ops}"
WEBHOOK_URL="${WEBHOOK_URL:-}"
mkdir -p "$STATUS_DIR" "$LOG_DIR"
started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
log_file="$LOG_DIR/test-docker-disk-guard.log"
status_file="$STATUS_DIR/test-docker-disk-guard-status.json"
log() {
printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" >>"$log_file"
}
json_escape() {
printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'
}
disk_used_percent() {
df -P "$MOUNT_PATH" | awk 'NR==2 { gsub("%", "", $5); print $5 }'
}
disk_avail_human() {
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $4 }'
}
disk_used_human() {
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $3 }'
}
disk_size_human() {
df -hP "$MOUNT_PATH" | awk 'NR==2 { print $2 }'
}
docker_df_summary() {
if command -v docker >/dev/null 2>&1; then
docker system df 2>&1 | tr '\n' '; ' | sed 's/"/\\"/g'
else
printf 'docker cli not found'
fi
}
cleanup_safe() {
log "cleanup started: docker build cache and old RAP tmp artifacts"
if command -v docker >/dev/null 2>&1; then
docker builder prune -af >>"$log_file" 2>&1 || true
docker image prune -f >>"$log_file" 2>&1 || true
fi
find /tmp -maxdepth 1 \( \
-name 'rap-android-build-*' -o \
-name 'rap-agent-build*' -o \
-name 'rap-backend-build*' -o \
-name 'rap-c*-build*' -o \
-name 'rap-node-agent-*' -o \
-name 'rap-*vpnfarm*' -o \
-name 'rap-build-*' \
\) -mmin +"$MIN_TMP_AGE_MINUTES" -exec rm -rf {} + >>"$log_file" 2>&1 || true
sync || true
log "cleanup finished"
}
expansion_hint() {
root_source="$(df -P "$MOUNT_PATH" | awk 'NR==2 { print $1 }')"
vg_free=""
root_lv_bytes=""
parent_part_bytes=""
if command -v vgs >/dev/null 2>&1; then
vg_free="$(vgs --noheadings --units g -o vg_free 2>/dev/null | awk '{print $1}' | tr '\n' ' ' | sed 's/^ *//;s/ *$//')"
fi
if command -v lsblk >/dev/null 2>&1; then
root_lv_bytes="$(lsblk -b -n -o TYPE,SIZE,MOUNTPOINTS 2>/dev/null | awk '$1 == "lvm" && $3 == "/" { print $2; exit }')"
parent_part_bytes="$(lsblk -b -n -o TYPE,SIZE 2>/dev/null | awk '$1 == "part" { last=$2 } $1 == "lvm" { print last; exit }')"
fi
if printf '%s' "$root_source" | grep -q '^/dev/mapper/'; then
if [ -n "$root_lv_bytes" ] && [ -n "$parent_part_bytes" ] && [ "$parent_part_bytes" -gt "$((root_lv_bytes + 1073741824))" ]; then
printf 'LVM root detected on %s. Backing partition is larger than root LV, so expansion is likely available. Run with sudo: sudo lvextend -r -l +100%%FREE %s' "$root_source" "$root_source"
return
fi
if [ -n "$vg_free" ] && ! printf '%s' "$vg_free" | grep -Eq '(^| )0(\.00)?g( |$)'; then
printf 'LVM root detected on %s. If approved, extend inside existing VG: sudo lvextend -r -l +100%%FREE %s' "$root_source" "$root_source"
else
printf 'LVM root detected on %s. No obvious free VG space. Expand VM disk, then run pvresize on the PV and lvextend -r for root LV.' "$root_source"
fi
else
printf 'Root filesystem is %s. Expand underlying disk/volume, then grow filesystem according to host partition layout.' "$root_source"
fi
}
notify() {
level="$1"
message="$2"
if [ -n "$WEBHOOK_URL" ] && command -v curl >/dev/null 2>&1; then
payload="$(printf '{"level":"%s","message":"%s","host":"%s","observed_at":"%s"}' \
"$(json_escape "$level")" \
"$(json_escape "$message")" \
"$(json_escape "$(hostname)")" \
"$(date -u +%Y-%m-%dT%H:%M:%SZ)")"
curl -fsS -m 5 -H 'Content-Type: application/json' -d "$payload" "$WEBHOOK_URL" >>"$log_file" 2>&1 || true
fi
}
before_percent="$(disk_used_percent)"
action="none"
level="ok"
if [ "$before_percent" -ge "$CLEANUP_PERCENT" ]; then
action="cleanup_safe"
cleanup_safe
fi
after_percent="$(disk_used_percent)"
if [ "$after_percent" -ge "$CRITICAL_PERCENT" ]; then
level="critical"
elif [ "$after_percent" -ge "$WARN_PERCENT" ]; then
level="warning"
fi
hint="$(expansion_hint)"
summary="$(docker_df_summary)"
finished_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
cat >"$status_file.tmp" <<EOF_STATUS
{
"schema_version": "rap.ops.test_docker_disk_guard.v1",
"host": "$(json_escape "$(hostname)")",
"mount_path": "$(json_escape "$MOUNT_PATH")",
"started_at": "$(json_escape "$started_at")",
"finished_at": "$(json_escape "$finished_at")",
"level": "$(json_escape "$level")",
"action": "$(json_escape "$action")",
"thresholds": {
"warn_percent": $WARN_PERCENT,
"cleanup_percent": $CLEANUP_PERCENT,
"critical_percent": $CRITICAL_PERCENT
},
"disk": {
"before_used_percent": $before_percent,
"after_used_percent": $after_percent,
"size": "$(json_escape "$(disk_size_human)")",
"used": "$(json_escape "$(disk_used_human)")",
"available": "$(json_escape "$(disk_avail_human)")"
},
"docker_system_df": "$(json_escape "$summary")",
"expansion_hint": "$(json_escape "$hint")",
"log_file": "$(json_escape "$log_file")"
}
EOF_STATUS
mv "$status_file.tmp" "$status_file"
message="test-docker disk ${level}: ${after_percent}% used after action=${action}. ${hint}"
log "$message"
if [ "$level" != "ok" ]; then
notify "$level" "$message"
fi
if [ "$level" = "critical" ]; then
exit 2
fi
exit 0