Use capacity pressure in endpoint ranking

This commit is contained in:
2026-05-16 12:06:05 +03:00
parent 90fe4b6872
commit 9a170c83c2
5 changed files with 162 additions and 11 deletions
@@ -748,6 +748,37 @@ func (s *vpnFabricSessionDialStats) capacityCountersSnapshot(maxEntries int) []v
return values[:maxEntries]
}
func (s *vpnFabricSessionDialStats) capacityPressureForScoring(maxAge time.Duration) map[string]mesh.EndpointCandidateCapacityPressure {
if s == nil {
return nil
}
now := time.Now().UTC()
s.capacityMu.Lock()
defer s.capacityMu.Unlock()
out := make(map[string]mesh.EndpointCandidateCapacityPressure, len(s.capacityByEndpoint))
for _, counter := range s.capacityByEndpoint {
endpointID := strings.TrimSpace(counter.EndpointID)
if endpointID == "" || counter.Count <= 0 {
continue
}
if maxAge > 0 && counter.LastSeenUnixSec > 0 {
lastSeen := time.Unix(counter.LastSeenUnixSec, 0).UTC()
if now.Sub(lastSeen) > maxAge {
continue
}
}
out[endpointID] = mesh.EndpointCandidateCapacityPressure{
EndpointID: endpointID,
Count: counter.Count,
LastSeenUnixSec: counter.LastSeenUnixSec,
}
}
if len(out) == 0 {
return nil
}
return out
}
func (s *vpnFabricSessionDialStats) ObserveAllCandidatesFailed() {
if s == nil {
return
@@ -5254,12 +5285,18 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
out := make([]mesh.FabricTransportTarget, 0, len(meshState.PeerEndpointCandidates[nextHop])+1)
seen := map[string]struct{}{}
if candidates := meshState.PeerEndpointCandidates[nextHop]; len(candidates) > 0 {
var capacityPressure map[string]mesh.EndpointCandidateCapacityPressure
if meshState.VPNFabricSessionDialStats != nil {
capacityPressure = meshState.VPNFabricSessionDialStats.capacityPressureForScoring(2 * time.Minute)
}
ranked := mesh.RankPeerEndpointCandidates(candidates, mesh.EndpointCandidateScoreOptions{
ChannelClass: mesh.SyntheticChannelFabricControl,
Now: time.Now().UTC(),
MaxVerificationAge: 5 * time.Minute,
Observations: mergedEndpointCandidateObservations(meshState.PeerEndpointObservations, meshState.VPNFabricEndpointObservations.Snapshot()),
MaxObservationAge: 5 * time.Minute,
ChannelClass: mesh.SyntheticChannelFabricControl,
Now: time.Now().UTC(),
MaxVerificationAge: 5 * time.Minute,
Observations: mergedEndpointCandidateObservations(meshState.PeerEndpointObservations, meshState.VPNFabricEndpointObservations.Snapshot()),
MaxObservationAge: 5 * time.Minute,
CapacityPressure: capacityPressure,
MaxCapacityPressureAge: 2 * time.Minute,
})
for _, item := range ranked {
endpoint := strings.TrimRight(strings.TrimSpace(item.Candidate.Address), "/")
@@ -1086,6 +1086,47 @@ func TestVPNFabricSessionTargetsUseRemoteHealthObservations(t *testing.T) {
}
}
func TestVPNFabricSessionTargetsUseCapacityPressureForLoadSpread(t *testing.T) {
now := time.Now().UTC()
stats := newVPNFabricSessionDialStats()
for i := 0; i < 8; i++ {
stats.ObserveCapacityLimited(mesh.FabricTransportTarget{
EndpointID: "node-b-quic-a",
Endpoint: "quic://node-b-a.example.test:19443",
Transport: "direct_quic",
})
}
targets := vpnFabricSessionTargets(&syntheticMeshState{
VPNFabricSessionDialStats: stats,
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-quic-a",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b-a.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-quic-b",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 5,
LastVerifiedAt: &now,
},
},
},
}, "node-b")
if len(targets) != 2 || targets[0].EndpointID != "node-b-quic-b" {
t.Fatalf("targets did not spread away from pressured endpoint: %+v", targets)
}
}
func TestMergedEndpointCandidateObservationsKeepsNewest(t *testing.T) {
now := time.Now().UTC()
merged := mergedEndpointCandidateObservations(