Files
rdp-proxy/agents/rap-node-agent/internal/mesh/peer_recovery_plan.go
T
2026-04-28 22:29:50 +03:00

277 lines
7.9 KiB
Go

package mesh
import (
"sort"
"strings"
"time"
)
const (
PeerRecoveryModeSteady = "steady"
PeerRecoveryModeRecovery = "recovery"
)
const (
DefaultStablePeerTarget = 3
DefaultRecoveryProbeLimit = 6
)
type PeerRecoveryPlanConfig struct {
PeerCache PeerCacheSnapshot
Connections PeerConnectionSnapshot
TargetReadyPeers int
MaxProbeCandidates int
Now time.Time
}
type PeerRecoveryPlan struct {
Mode string `json:"mode"`
Healthy bool `json:"healthy"`
TargetReadyPeers int `json:"target_ready_peers"`
ReadyPeerCount int `json:"ready_peer_count"`
DegradedPeerCount int `json:"degraded_peer_count"`
BackoffPeerCount int `json:"backoff_peer_count"`
ConnectablePeerCount int `json:"connectable_peer_count"`
Deficit int `json:"deficit"`
ProbeCandidateCount int `json:"probe_candidate_count"`
RecoverySeedCandidateCount int `json:"recovery_seed_candidate_count"`
GeneratedAt time.Time `json:"generated_at"`
Candidates []PeerRecoveryCandidate `json:"candidates,omitempty"`
}
type PeerRecoveryCandidate struct {
NodeID string `json:"node_id"`
Endpoint string `json:"endpoint,omitempty"`
Warm bool `json:"warm"`
WarmReason string `json:"warm_reason,omitempty"`
RecoverySeed bool `json:"recovery_seed"`
BestCandidateID string `json:"best_candidate_id,omitempty"`
BestTransport string `json:"best_transport,omitempty"`
ConnectionState string `json:"connection_state"`
ConsecutiveFailures int `json:"consecutive_failures,omitempty"`
LastLatencyMs int `json:"last_latency_ms,omitempty"`
BackoffUntil time.Time `json:"backoff_until,omitempty"`
Reason string `json:"reason"`
Priority int `json:"priority"`
}
type peerRecoveryCandidateBuild struct {
PeerRecoveryCandidate
}
func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
now := normalizedNow(cfg.Now)
target := cfg.TargetReadyPeers
if target <= 0 {
target = DefaultStablePeerTarget
}
limit := cfg.MaxProbeCandidates
if limit <= 0 {
limit = DefaultRecoveryProbeLimit
}
connectable := connectablePeerCount(cfg.PeerCache)
if target > connectable {
target = connectable
}
if limit < target {
limit = target
}
connectionByNode := map[string]PeerConnectionState{}
for _, connection := range cfg.Connections.Entries {
if strings.TrimSpace(connection.NodeID) == "" {
continue
}
connectionByNode[connection.NodeID] = connection
}
entryByNode := map[string]PeerCacheEntry{}
for _, entry := range cfg.PeerCache.Entries {
if strings.TrimSpace(entry.NodeID) == "" {
continue
}
entryByNode[entry.NodeID] = entry
}
ready := 0
degraded := 0
backoff := 0
for nodeID, connection := range connectionByNode {
entry, ok := entryByNode[nodeID]
if !ok || strings.TrimSpace(entry.Endpoint) == "" {
continue
}
switch connection.State {
case PeerConnectionReady, PeerConnectionRelayReady:
ready++
case PeerConnectionDegraded:
degraded++
case PeerConnectionBackoff:
backoff++
}
}
deficit := target - ready
if deficit < 0 {
deficit = 0
}
mode := PeerRecoveryModeSteady
if deficit > 0 {
mode = PeerRecoveryModeRecovery
}
if mode == PeerRecoveryModeSteady {
limit = target
}
candidates := make([]peerRecoveryCandidateBuild, 0, len(cfg.PeerCache.Entries))
for _, entry := range cfg.PeerCache.Entries {
if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" {
continue
}
connection := connectionByNode[entry.NodeID]
if connection.State == "" {
connection.State = PeerConnectionDisconnected
}
if connection.State == PeerConnectionBackoff && connection.BackoffUntil.After(now) {
continue
}
reason, ok := peerRecoveryCandidateReason(mode, entry, connection)
if !ok {
continue
}
candidate := PeerRecoveryCandidate{
NodeID: entry.NodeID,
Endpoint: strings.TrimSpace(entry.Endpoint),
Warm: entry.Warm,
WarmReason: entry.WarmReason,
RecoverySeed: entry.RecoverySeed,
BestCandidateID: entry.BestCandidateID,
BestTransport: entry.BestTransport,
ConnectionState: connection.State,
ConsecutiveFailures: connection.ConsecutiveFailures,
LastLatencyMs: connection.LastLatencyMs,
BackoffUntil: connection.BackoffUntil,
Reason: reason,
Priority: peerRecoveryCandidatePriority(entry, connection, reason),
}
candidates = append(candidates, peerRecoveryCandidateBuild{PeerRecoveryCandidate: candidate})
}
sort.SliceStable(candidates, func(i, j int) bool {
if candidates[i].Priority != candidates[j].Priority {
return candidates[i].Priority > candidates[j].Priority
}
return candidates[i].NodeID < candidates[j].NodeID
})
if len(candidates) > limit {
candidates = candidates[:limit]
}
outCandidates := make([]PeerRecoveryCandidate, 0, len(candidates))
recoverySeedCandidates := 0
for _, candidate := range candidates {
outCandidates = append(outCandidates, candidate.PeerRecoveryCandidate)
if candidate.RecoverySeed {
recoverySeedCandidates++
}
}
return PeerRecoveryPlan{
Mode: mode,
Healthy: deficit == 0,
TargetReadyPeers: target,
ReadyPeerCount: ready,
DegradedPeerCount: degraded,
BackoffPeerCount: backoff,
ConnectablePeerCount: connectable,
Deficit: deficit,
ProbeCandidateCount: len(outCandidates),
RecoverySeedCandidateCount: recoverySeedCandidates,
GeneratedAt: now,
Candidates: outCandidates,
}
}
func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection PeerConnectionState) (string, bool) {
if mode == PeerRecoveryModeSteady {
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
return "maintain_ready", true
}
return "", false
}
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
return "maintain_ready", true
}
if connection.State == PeerConnectionDegraded {
return "recover_degraded", true
}
if entry.Warm {
return "recover_warm", true
}
if entry.RecoverySeed {
return "recover_seed", true
}
return "recover_peer", true
}
func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnectionState, reason string) int {
score := 0
if entry.Warm {
score += 1000
}
switch entry.WarmReason {
case "route_adjacent":
score += 500
case "recovery_seed":
score += 350
case "endpoint_candidate":
score += 200
case "peer_endpoint":
score += 100
}
if entry.RecoverySeed {
score += 250
}
if entry.BestCandidateID != "" {
score += 150
}
score += entry.BestCandidateScore / 10
switch connection.State {
case PeerConnectionReady, PeerConnectionRelayReady:
score += 600
case PeerConnectionDegraded:
score += 350
case PeerConnectionConnecting:
score += 200
case PeerConnectionDisconnected:
score += 100
}
switch reason {
case "maintain_ready":
score += 500
case "recover_degraded":
score += 300
case "recover_seed":
score += 250
case "recover_warm":
score += 150
}
if connection.LastLatencyMs > 0 {
score -= connection.LastLatencyMs / 10
}
if score < 0 {
return 0
}
return score
}
func connectablePeerCount(snapshot PeerCacheSnapshot) int {
count := 0
for _, entry := range snapshot.Entries {
if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" {
continue
}
count++
}
return count
}