Apply endpoint health in peer cache
This commit is contained in:
@@ -924,16 +924,17 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
||||
productionForwardingEnabled := cfg.MeshProductionForwardingEnabled || loadedConfig.ProductionForwarding
|
||||
routeHealthRoutes := routeHealthRoutesFromPathDecisions(routes, loadedConfig.RoutePathDecisions)
|
||||
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||
PeerDirectory: loadedConfig.PeerDirectory,
|
||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||
Routes: loadedConfig.Routes,
|
||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||
PreferredRegion: cfg.MeshRegion,
|
||||
Now: time.Now().UTC(),
|
||||
Local: local,
|
||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||
PeerDirectory: loadedConfig.PeerDirectory,
|
||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||
Routes: loadedConfig.Routes,
|
||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||
PreferredRegion: cfg.MeshRegion,
|
||||
Now: time.Now().UTC(),
|
||||
})
|
||||
peerCacheSnapshot := peerCache.Snapshot()
|
||||
peerConnections := mesh.NewPeerConnectionTracker(peerCacheSnapshot, time.Now().UTC())
|
||||
@@ -1931,16 +1932,17 @@ func refreshSyntheticMeshConfigForRouteHealthFeedback(ctx context.Context, cfg c
|
||||
func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, identity state.Identity, meshState *syntheticMeshState, loadedConfig loadedSyntheticMeshConfig, local mesh.PeerIdentity, preferredRegion string, observedAt time.Time) {
|
||||
routeHealthRoutes := routeHealthRoutesFromPathDecisions(loadedConfig.Routes, loadedConfig.RoutePathDecisions)
|
||||
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||
PeerDirectory: loadedConfig.PeerDirectory,
|
||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||
Routes: loadedConfig.Routes,
|
||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||
PreferredRegion: preferredRegion,
|
||||
Now: observedAt,
|
||||
Local: local,
|
||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||
PeerDirectory: loadedConfig.PeerDirectory,
|
||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||
Routes: loadedConfig.Routes,
|
||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||
PreferredRegion: preferredRegion,
|
||||
Now: observedAt,
|
||||
})
|
||||
if meshState.PeerConnections == nil {
|
||||
meshState.PeerConnections = mesh.NewPeerConnectionTracker(peerCache.Snapshot(), observedAt)
|
||||
|
||||
@@ -9,16 +9,17 @@ import (
|
||||
const DefaultWarmPeerLimit = 8
|
||||
|
||||
type PeerCacheConfig struct {
|
||||
Local PeerIdentity
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||
PeerDirectory []PeerDirectoryEntry
|
||||
RecoverySeeds []PeerRecoverySeed
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
Routes []SyntheticRoute
|
||||
WarmPeerLimit int
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
Local PeerIdentity
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||
PeerEndpointObservations map[string]EndpointCandidateHealthObservation
|
||||
PeerDirectory []PeerDirectoryEntry
|
||||
RecoverySeeds []PeerRecoverySeed
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
Routes []SyntheticRoute
|
||||
WarmPeerLimit int
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type PeerCache struct {
|
||||
@@ -116,6 +117,8 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
PreferredRegion: cfg.PreferredRegion,
|
||||
Now: now,
|
||||
MaxVerificationAge: time.Hour,
|
||||
Observations: cfg.PeerEndpointObservations,
|
||||
MaxObservationAge: time.Hour,
|
||||
})
|
||||
if len(scored) > 0 {
|
||||
entry.EndpointCandidates = make([]PeerEndpointCandidate, 0, len(scored))
|
||||
|
||||
@@ -100,6 +100,59 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-quic",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://node-b.example.test:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "https://node-b.example.test:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
},
|
||||
},
|
||||
PeerEndpointObservations: map[string]EndpointCandidateHealthObservation{
|
||||
"node-b-quic": {
|
||||
EndpointID: "node-b-quic",
|
||||
FailureCount: 2,
|
||||
LastFailureReason: "session_open_failed",
|
||||
ReliabilityScore: 35,
|
||||
ObservedAt: now,
|
||||
},
|
||||
},
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
|
||||
entry, ok := peerCacheEntryByID(cache.Snapshot(), "node-b")
|
||||
if !ok {
|
||||
t.Fatal("node-b missing from cache")
|
||||
}
|
||||
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
|
||||
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
|
||||
Reference in New Issue
Block a user