Apply endpoint health in peer cache
This commit is contained in:
@@ -927,6 +927,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
|||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||||
|
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||||
PeerDirectory: loadedConfig.PeerDirectory,
|
PeerDirectory: loadedConfig.PeerDirectory,
|
||||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||||
@@ -1934,6 +1935,7 @@ func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, i
|
|||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: loadedConfig.PeerEndpoints,
|
PeerEndpoints: loadedConfig.PeerEndpoints,
|
||||||
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
PeerEndpointCandidates: loadedConfig.PeerEndpointCandidates,
|
||||||
|
PeerEndpointObservations: loadedConfig.PeerEndpointObservations,
|
||||||
PeerDirectory: loadedConfig.PeerDirectory,
|
PeerDirectory: loadedConfig.PeerDirectory,
|
||||||
RecoverySeeds: loadedConfig.RecoverySeeds,
|
RecoverySeeds: loadedConfig.RecoverySeeds,
|
||||||
RendezvousLeases: loadedConfig.RendezvousLeases,
|
RendezvousLeases: loadedConfig.RendezvousLeases,
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ type PeerCacheConfig struct {
|
|||||||
Local PeerIdentity
|
Local PeerIdentity
|
||||||
PeerEndpoints map[string]string
|
PeerEndpoints map[string]string
|
||||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||||
|
PeerEndpointObservations map[string]EndpointCandidateHealthObservation
|
||||||
PeerDirectory []PeerDirectoryEntry
|
PeerDirectory []PeerDirectoryEntry
|
||||||
RecoverySeeds []PeerRecoverySeed
|
RecoverySeeds []PeerRecoverySeed
|
||||||
RendezvousLeases []PeerRendezvousLease
|
RendezvousLeases []PeerRendezvousLease
|
||||||
@@ -116,6 +117,8 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
PreferredRegion: cfg.PreferredRegion,
|
PreferredRegion: cfg.PreferredRegion,
|
||||||
Now: now,
|
Now: now,
|
||||||
MaxVerificationAge: time.Hour,
|
MaxVerificationAge: time.Hour,
|
||||||
|
Observations: cfg.PeerEndpointObservations,
|
||||||
|
MaxObservationAge: time.Hour,
|
||||||
})
|
})
|
||||||
if len(scored) > 0 {
|
if len(scored) > 0 {
|
||||||
entry.EndpointCandidates = make([]PeerEndpointCandidate, 0, len(scored))
|
entry.EndpointCandidates = make([]PeerEndpointCandidate, 0, len(scored))
|
||||||
|
|||||||
@@ -100,6 +100,59 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||||
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||||
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
|
Local: local,
|
||||||
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
|
"node-b": {
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-quic",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://node-b.example.test:19443",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-wss",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "wss",
|
||||||
|
Address: "https://node-b.example.test:443",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
PeerEndpointObservations: map[string]EndpointCandidateHealthObservation{
|
||||||
|
"node-b-quic": {
|
||||||
|
EndpointID: "node-b-quic",
|
||||||
|
FailureCount: 2,
|
||||||
|
LastFailureReason: "session_open_failed",
|
||||||
|
ReliabilityScore: 35,
|
||||||
|
ObservedAt: now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
WarmPeerLimit: 1,
|
||||||
|
Now: now,
|
||||||
|
})
|
||||||
|
|
||||||
|
entry, ok := peerCacheEntryByID(cache.Snapshot(), "node-b")
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("node-b missing from cache")
|
||||||
|
}
|
||||||
|
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
|
||||||
|
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
|
|||||||
@@ -347,6 +347,9 @@ plane can distinguish local dial feedback from aggregated or policy-generated
|
|||||||
health hints.
|
health hints.
|
||||||
The endpoint health heartbeat report also includes the reporter node id at the
|
The endpoint health heartbeat report also includes the reporter node id at the
|
||||||
report level for simpler multi-node ingestion and diagnostics.
|
report level for simpler multi-node ingestion and diagnostics.
|
||||||
|
Peer cache construction now applies endpoint health observations when ranking
|
||||||
|
peer endpoint candidates, so recovery and warm-peer decisions see the same
|
||||||
|
degraded-path feedback as VPN fabric-session dialing.
|
||||||
|
|
||||||
Deliverables:
|
Deliverables:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user