Score capacity pressure softly

This commit is contained in:
2026-05-16 11:55:38 +03:00
parent d67b80a7aa
commit a02f4fa8aa
3 changed files with 44 additions and 2 deletions
@@ -237,9 +237,14 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
reasons = append(reasons, "history:failure")
}
if strings.TrimSpace(observation.LastFailureReason) != "" {
if strings.TrimSpace(observation.LastFailureReason) == "capacity_limited" {
score -= 4
reasons = append(reasons, "capacity:limited")
} else {
score -= 8
reasons = append(reasons, "failure:recent")
}
}
return score, reasons
}
@@ -353,6 +353,40 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
}
}
func TestRankPeerEndpointCandidatesTreatsCapacityAsSoftPressure(t *testing.T) {
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
{
EndpointID: "node-b-quic",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
}, EndpointCandidateScoreOptions{
Now: now,
MaxVerificationAge: time.Minute,
Observations: map[string]EndpointCandidateHealthObservation{
"node-b-quic": {
EndpointID: "node-b-quic",
LastFailureReason: "capacity_limited",
ReliabilityScore: 95,
ObservedAt: now,
},
},
MaxObservationAge: time.Minute,
})
if len(ranked) != 1 || !containsReason(ranked[0].Reasons, "capacity:limited") {
t.Fatalf("capacity pressure reason missing: %+v", ranked)
}
if containsReason(ranked[0].Reasons, "failure:recent") {
t.Fatalf("capacity pressure treated as recent failure: %+v", ranked[0].Reasons)
}
}
func containsReason(reasons []string, reason string) bool {
for _, item := range reasons {
if item == reason {
@@ -374,6 +374,9 @@ but saturated carrier.
VPN fabric dial telemetry records the last capacity-limited endpoint and
transport, making stream saturation visible without poisoning endpoint health
observations.
Endpoint ranking treats `capacity_limited` observations as a soft pressure
penalty instead of a hard recent failure, enabling load spreading without
marking the carrier unhealthy.
Cached QUIC carrier idle TTL is configurable through
`RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS` / `-vpn-fabric-quic-idle-ttl` and
propagated by host-agent install profiles.