Apply endpoint health in peer cache
This commit is contained in:
@@ -924,16 +924,17 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
|||||||
productionForwardingEnabled := cfg.MeshProductionForwardingEnabled || loadedConfig.ProductionForwarding
|
productionForwardingEnabled := cfg.MeshProductionForwardingEnabled || loadedConfig.ProductionForwarding
|
||||||
routeHealthRoutes := routeHealthRoutesFromPathDecisions(routes, loadedConfig.RoutePathDecisions)
|
routeHealthRoutes := routeHealthRoutesFromPathDecisions(routes, loadedConfig.RoutePathDecisions)
|
||||||
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||||
PeerDirectory: loadedConfig.PeerDirectory,
|
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
PeerDirectory: loadedConfig.PeerDirectory,
|
||||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||||
Routes: loadedConfig.Routes,
|
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
Routes: loadedConfig.Routes,
|
||||||
PreferredRegion: cfg.MeshRegion,
|
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||||
Now: time.Now().UTC(),
|
PreferredRegion: cfg.MeshRegion,
|
||||||
|
Now: time.Now().UTC(),
|
||||||
})
|
})
|
||||||
peerCacheSnapshot := peerCache.Snapshot()
|
peerCacheSnapshot := peerCache.Snapshot()
|
||||||
peerConnections := mesh.NewPeerConnectionTracker(peerCacheSnapshot, time.Now().UTC())
|
peerConnections := mesh.NewPeerConnectionTracker(peerCacheSnapshot, time.Now().UTC())
|
||||||
@@ -1931,16 +1932,17 @@ func refreshSyntheticMeshConfigForRouteHealthFeedback(ctx context.Context, cfg c
|
|||||||
func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, identity state.Identity, meshState *syntheticMeshState, loadedConfig loadedSyntheticMeshConfig, local mesh.PeerIdentity, preferredRegion string, observedAt time.Time) {
|
func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, identity state.Identity, meshState *syntheticMeshState, loadedConfig loadedSyntheticMeshConfig, local mesh.PeerIdentity, preferredRegion string, observedAt time.Time) {
|
||||||
routeHealthRoutes := routeHealthRoutesFromPathDecisions(loadedConfig.Routes, loadedConfig.RoutePathDecisions)
|
routeHealthRoutes := routeHealthRoutesFromPathDecisions(loadedConfig.Routes, loadedConfig.RoutePathDecisions)
|
||||||
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||||
PeerDirectory: loadedConfig.PeerDirectory,
|
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
PeerDirectory: loadedConfig.PeerDirectory,
|
||||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||||
Routes: loadedConfig.Routes,
|
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||||
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
Routes: loadedConfig.Routes,
|
||||||
PreferredRegion: preferredRegion,
|
WarmPeerLimit: mesh.DefaultWarmPeerLimit,
|
||||||
Now: observedAt,
|
PreferredRegion: preferredRegion,
|
||||||
|
Now: observedAt,
|
||||||
})
|
})
|
||||||
if meshState.PeerConnections == nil {
|
if meshState.PeerConnections == nil {
|
||||||
meshState.PeerConnections = mesh.NewPeerConnectionTracker(peerCache.Snapshot(), observedAt)
|
meshState.PeerConnections = mesh.NewPeerConnectionTracker(peerCache.Snapshot(), observedAt)
|
||||||
|
|||||||
@@ -9,16 +9,17 @@ import (
|
|||||||
const DefaultWarmPeerLimit = 8
|
const DefaultWarmPeerLimit = 8
|
||||||
|
|
||||||
type PeerCacheConfig struct {
|
type PeerCacheConfig struct {
|
||||||
Local PeerIdentity
|
Local PeerIdentity
|
||||||
PeerEndpoints map[string]string
|
PeerEndpoints map[string]string
|
||||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||||
PeerDirectory []PeerDirectoryEntry
|
PeerEndpointObservations map[string]EndpointCandidateHealthObservation
|
||||||
RecoverySeeds []PeerRecoverySeed
|
PeerDirectory []PeerDirectoryEntry
|
||||||
RendezvousLeases []PeerRendezvousLease
|
RecoverySeeds []PeerRecoverySeed
|
||||||
Routes []SyntheticRoute
|
RendezvousLeases []PeerRendezvousLease
|
||||||
WarmPeerLimit int
|
Routes []SyntheticRoute
|
||||||
PreferredRegion string
|
WarmPeerLimit int
|
||||||
Now time.Time
|
PreferredRegion string
|
||||||
|
Now time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type PeerCache struct {
|
type PeerCache struct {
|
||||||
@@ -116,6 +117,8 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
PreferredRegion: cfg.PreferredRegion,
|
PreferredRegion: cfg.PreferredRegion,
|
||||||
Now: now,
|
Now: now,
|
||||||
MaxVerificationAge: time.Hour,
|
MaxVerificationAge: time.Hour,
|
||||||
|
Observations: cfg.PeerEndpointObservations,
|
||||||
|
MaxObservationAge: time.Hour,
|
||||||
})
|
})
|
||||||
if len(scored) > 0 {
|
if len(scored) > 0 {
|
||||||
entry.EndpointCandidates = make([]PeerEndpointCandidate, 0, len(scored))
|
entry.EndpointCandidates = make([]PeerEndpointCandidate, 0, len(scored))
|
||||||
|
|||||||
@@ -100,6 +100,59 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||||
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||||
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
|
Local: local,
|
||||||
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
|
"node-b": {
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-quic",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://node-b.example.test:19443",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-wss",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "wss",
|
||||||
|
Address: "https://node-b.example.test:443",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
PeerEndpointObservations: map[string]EndpointCandidateHealthObservation{
|
||||||
|
"node-b-quic": {
|
||||||
|
EndpointID: "node-b-quic",
|
||||||
|
FailureCount: 2,
|
||||||
|
LastFailureReason: "session_open_failed",
|
||||||
|
ReliabilityScore: 35,
|
||||||
|
ObservedAt: now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
WarmPeerLimit: 1,
|
||||||
|
Now: now,
|
||||||
|
})
|
||||||
|
|
||||||
|
entry, ok := peerCacheEntryByID(cache.Snapshot(), "node-b")
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("node-b missing from cache")
|
||||||
|
}
|
||||||
|
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
|
||||||
|
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
|
|||||||
@@ -347,6 +347,9 @@ plane can distinguish local dial feedback from aggregated or policy-generated
|
|||||||
health hints.
|
health hints.
|
||||||
The endpoint health heartbeat report also includes the reporter node id at the
|
The endpoint health heartbeat report also includes the reporter node id at the
|
||||||
report level for simpler multi-node ingestion and diagnostics.
|
report level for simpler multi-node ingestion and diagnostics.
|
||||||
|
Peer cache construction now applies endpoint health observations when ranking
|
||||||
|
peer endpoint candidates, so recovery and warm-peer decisions see the same
|
||||||
|
degraded-path feedback as VPN fabric-session dialing.
|
||||||
|
|
||||||
Deliverables:
|
Deliverables:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user