Use capacity pressure in endpoint ranking
This commit is contained in:
@@ -748,6 +748,37 @@ func (s *vpnFabricSessionDialStats) capacityCountersSnapshot(maxEntries int) []v
|
||||
return values[:maxEntries]
|
||||
}
|
||||
|
||||
func (s *vpnFabricSessionDialStats) capacityPressureForScoring(maxAge time.Duration) map[string]mesh.EndpointCandidateCapacityPressure {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
s.capacityMu.Lock()
|
||||
defer s.capacityMu.Unlock()
|
||||
out := make(map[string]mesh.EndpointCandidateCapacityPressure, len(s.capacityByEndpoint))
|
||||
for _, counter := range s.capacityByEndpoint {
|
||||
endpointID := strings.TrimSpace(counter.EndpointID)
|
||||
if endpointID == "" || counter.Count <= 0 {
|
||||
continue
|
||||
}
|
||||
if maxAge > 0 && counter.LastSeenUnixSec > 0 {
|
||||
lastSeen := time.Unix(counter.LastSeenUnixSec, 0).UTC()
|
||||
if now.Sub(lastSeen) > maxAge {
|
||||
continue
|
||||
}
|
||||
}
|
||||
out[endpointID] = mesh.EndpointCandidateCapacityPressure{
|
||||
EndpointID: endpointID,
|
||||
Count: counter.Count,
|
||||
LastSeenUnixSec: counter.LastSeenUnixSec,
|
||||
}
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *vpnFabricSessionDialStats) ObserveAllCandidatesFailed() {
|
||||
if s == nil {
|
||||
return
|
||||
@@ -5254,12 +5285,18 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
|
||||
out := make([]mesh.FabricTransportTarget, 0, len(meshState.PeerEndpointCandidates[nextHop])+1)
|
||||
seen := map[string]struct{}{}
|
||||
if candidates := meshState.PeerEndpointCandidates[nextHop]; len(candidates) > 0 {
|
||||
var capacityPressure map[string]mesh.EndpointCandidateCapacityPressure
|
||||
if meshState.VPNFabricSessionDialStats != nil {
|
||||
capacityPressure = meshState.VPNFabricSessionDialStats.capacityPressureForScoring(2 * time.Minute)
|
||||
}
|
||||
ranked := mesh.RankPeerEndpointCandidates(candidates, mesh.EndpointCandidateScoreOptions{
|
||||
ChannelClass: mesh.SyntheticChannelFabricControl,
|
||||
Now: time.Now().UTC(),
|
||||
MaxVerificationAge: 5 * time.Minute,
|
||||
Observations: mergedEndpointCandidateObservations(meshState.PeerEndpointObservations, meshState.VPNFabricEndpointObservations.Snapshot()),
|
||||
MaxObservationAge: 5 * time.Minute,
|
||||
ChannelClass: mesh.SyntheticChannelFabricControl,
|
||||
Now: time.Now().UTC(),
|
||||
MaxVerificationAge: 5 * time.Minute,
|
||||
Observations: mergedEndpointCandidateObservations(meshState.PeerEndpointObservations, meshState.VPNFabricEndpointObservations.Snapshot()),
|
||||
MaxObservationAge: 5 * time.Minute,
|
||||
CapacityPressure: capacityPressure,
|
||||
MaxCapacityPressureAge: 2 * time.Minute,
|
||||
})
|
||||
for _, item := range ranked {
|
||||
endpoint := strings.TrimRight(strings.TrimSpace(item.Candidate.Address), "/")
|
||||
|
||||
@@ -1086,6 +1086,47 @@ func TestVPNFabricSessionTargetsUseRemoteHealthObservations(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNFabricSessionTargetsUseCapacityPressureForLoadSpread(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
stats := newVPNFabricSessionDialStats()
|
||||
for i := 0; i < 8; i++ {
|
||||
stats.ObserveCapacityLimited(mesh.FabricTransportTarget{
|
||||
EndpointID: "node-b-quic-a",
|
||||
Endpoint: "quic://node-b-a.example.test:19443",
|
||||
Transport: "direct_quic",
|
||||
})
|
||||
}
|
||||
targets := vpnFabricSessionTargets(&syntheticMeshState{
|
||||
VPNFabricSessionDialStats: stats,
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-quic-a",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://node-b-a.example.test:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-quic-b",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://node-b-b.example.test:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 5,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, "node-b")
|
||||
if len(targets) != 2 || targets[0].EndpointID != "node-b-quic-b" {
|
||||
t.Fatalf("targets did not spread away from pressured endpoint: %+v", targets)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergedEndpointCandidateObservationsKeepsNewest(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
merged := mergedEndpointCandidateObservations(
|
||||
|
||||
Reference in New Issue
Block a user