package mesh import ( "sort" "strings" "time" ) const ( PeerRecoveryModeSteady = "steady" PeerRecoveryModeRecovery = "recovery" ) const ( DefaultStablePeerTarget = 3 DefaultRecoveryProbeLimit = 6 ) type PeerRecoveryPlanConfig struct { PeerCache PeerCacheSnapshot Connections PeerConnectionSnapshot TargetReadyPeers int MaxProbeCandidates int Now time.Time } type PeerRecoveryPlan struct { Mode string `json:"mode"` Healthy bool `json:"healthy"` TargetReadyPeers int `json:"target_ready_peers"` ReadyPeerCount int `json:"ready_peer_count"` DegradedPeerCount int `json:"degraded_peer_count"` BackoffPeerCount int `json:"backoff_peer_count"` ConnectablePeerCount int `json:"connectable_peer_count"` Deficit int `json:"deficit"` ProbeCandidateCount int `json:"probe_candidate_count"` RecoverySeedCandidateCount int `json:"recovery_seed_candidate_count"` GeneratedAt time.Time `json:"generated_at"` Candidates []PeerRecoveryCandidate `json:"candidates,omitempty"` } type PeerRecoveryCandidate struct { NodeID string `json:"node_id"` Endpoint string `json:"endpoint,omitempty"` Warm bool `json:"warm"` WarmReason string `json:"warm_reason,omitempty"` RecoverySeed bool `json:"recovery_seed"` BestCandidateID string `json:"best_candidate_id,omitempty"` BestTransport string `json:"best_transport,omitempty"` ConnectionState string `json:"connection_state"` ConsecutiveFailures int `json:"consecutive_failures,omitempty"` LastLatencyMs int `json:"last_latency_ms,omitempty"` BackoffUntil time.Time `json:"backoff_until,omitempty"` Reason string `json:"reason"` Priority int `json:"priority"` } type peerRecoveryCandidateBuild struct { PeerRecoveryCandidate } func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan { now := normalizedNow(cfg.Now) target := cfg.TargetReadyPeers if target <= 0 { target = DefaultStablePeerTarget } limit := cfg.MaxProbeCandidates if limit <= 0 { limit = DefaultRecoveryProbeLimit } connectable := connectablePeerCount(cfg.PeerCache) if target > connectable { target = connectable } if limit < target { limit = target } connectionByNode := map[string]PeerConnectionState{} for _, connection := range cfg.Connections.Entries { if strings.TrimSpace(connection.NodeID) == "" { continue } connectionByNode[connection.NodeID] = connection } entryByNode := map[string]PeerCacheEntry{} for _, entry := range cfg.PeerCache.Entries { if strings.TrimSpace(entry.NodeID) == "" { continue } entryByNode[entry.NodeID] = entry } ready := 0 degraded := 0 backoff := 0 for nodeID, connection := range connectionByNode { entry, ok := entryByNode[nodeID] if !ok || strings.TrimSpace(entry.Endpoint) == "" { continue } switch connection.State { case PeerConnectionReady, PeerConnectionRelayReady: ready++ case PeerConnectionDegraded: degraded++ case PeerConnectionBackoff: backoff++ } } deficit := target - ready if deficit < 0 { deficit = 0 } mode := PeerRecoveryModeSteady if deficit > 0 { mode = PeerRecoveryModeRecovery } if mode == PeerRecoveryModeSteady { limit = target } candidates := make([]peerRecoveryCandidateBuild, 0, len(cfg.PeerCache.Entries)) for _, entry := range cfg.PeerCache.Entries { if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" { continue } connection := connectionByNode[entry.NodeID] if connection.State == "" { connection.State = PeerConnectionDisconnected } if connection.State == PeerConnectionBackoff && connection.BackoffUntil.After(now) { continue } reason, ok := peerRecoveryCandidateReason(mode, entry, connection) if !ok { continue } candidate := PeerRecoveryCandidate{ NodeID: entry.NodeID, Endpoint: strings.TrimSpace(entry.Endpoint), Warm: entry.Warm, WarmReason: entry.WarmReason, RecoverySeed: entry.RecoverySeed, BestCandidateID: entry.BestCandidateID, BestTransport: entry.BestTransport, ConnectionState: connection.State, ConsecutiveFailures: connection.ConsecutiveFailures, LastLatencyMs: connection.LastLatencyMs, BackoffUntil: connection.BackoffUntil, Reason: reason, Priority: peerRecoveryCandidatePriority(entry, connection, reason), } candidates = append(candidates, peerRecoveryCandidateBuild{PeerRecoveryCandidate: candidate}) } sort.SliceStable(candidates, func(i, j int) bool { if candidates[i].Priority != candidates[j].Priority { return candidates[i].Priority > candidates[j].Priority } return candidates[i].NodeID < candidates[j].NodeID }) if len(candidates) > limit { candidates = candidates[:limit] } outCandidates := make([]PeerRecoveryCandidate, 0, len(candidates)) recoverySeedCandidates := 0 for _, candidate := range candidates { outCandidates = append(outCandidates, candidate.PeerRecoveryCandidate) if candidate.RecoverySeed { recoverySeedCandidates++ } } return PeerRecoveryPlan{ Mode: mode, Healthy: deficit == 0, TargetReadyPeers: target, ReadyPeerCount: ready, DegradedPeerCount: degraded, BackoffPeerCount: backoff, ConnectablePeerCount: connectable, Deficit: deficit, ProbeCandidateCount: len(outCandidates), RecoverySeedCandidateCount: recoverySeedCandidates, GeneratedAt: now, Candidates: outCandidates, } } func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection PeerConnectionState) (string, bool) { if mode == PeerRecoveryModeSteady { if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady { return "maintain_ready", true } return "", false } if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady { return "maintain_ready", true } if connection.State == PeerConnectionDegraded { return "recover_degraded", true } if entry.Warm { return "recover_warm", true } if entry.RecoverySeed { return "recover_seed", true } return "recover_peer", true } func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnectionState, reason string) int { score := 0 if entry.Warm { score += 1000 } switch entry.WarmReason { case "route_adjacent": score += 500 case "recovery_seed": score += 350 case "endpoint_candidate": score += 200 case "peer_endpoint": score += 100 } if entry.RecoverySeed { score += 250 } if entry.BestCandidateID != "" { score += 150 } score += entry.BestCandidateScore / 10 switch connection.State { case PeerConnectionReady, PeerConnectionRelayReady: score += 600 case PeerConnectionDegraded: score += 350 case PeerConnectionConnecting: score += 200 case PeerConnectionDisconnected: score += 100 } switch reason { case "maintain_ready": score += 500 case "recover_degraded": score += 300 case "recover_seed": score += 250 case "recover_warm": score += 150 } if connection.LastLatencyMs > 0 { score -= connection.LastLatencyMs / 10 } if score < 0 { return 0 } return score } func connectablePeerCount(snapshot PeerCacheSnapshot) int { count := 0 for _, entry := range snapshot.Entries { if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" { continue } count++ } return count }