Accept endpoint health from mesh config
This commit is contained in:
@@ -370,6 +370,7 @@ type syntheticMeshState struct {
|
||||
VPNFabricEndpointObservations *vpnFabricEndpointObservationStore
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]mesh.PeerEndpointCandidate
|
||||
PeerEndpointObservations map[string]mesh.EndpointCandidateHealthObservation
|
||||
VPNGateway *vpnruntime.Gateway
|
||||
ServiceChannelAccessStats *fabricServiceChannelAccessStats
|
||||
RemoteWorkspaceFrameSink *mesh.RemoteWorkspaceFrameProbeSink
|
||||
@@ -869,6 +870,7 @@ type meshRouteHealthFeedbackRefreshState struct {
|
||||
type loadedSyntheticMeshConfig struct {
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]mesh.PeerEndpointCandidate
|
||||
PeerEndpointObservations map[string]mesh.EndpointCandidateHealthObservation
|
||||
PeerDirectory []mesh.PeerDirectoryEntry
|
||||
RecoverySeeds []mesh.PeerRecoverySeed
|
||||
RendezvousLeases []mesh.PeerRendezvousLease
|
||||
@@ -895,13 +897,14 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
||||
if err != nil {
|
||||
log.Printf("synthetic mesh config load failed; starting diagnostics-only mesh state: %v", err)
|
||||
loadedConfig = loadedSyntheticMeshConfig{
|
||||
PeerEndpoints: map[string]string{},
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{},
|
||||
PeerDirectory: []mesh.PeerDirectoryEntry{},
|
||||
RecoverySeeds: []mesh.PeerRecoverySeed{},
|
||||
RendezvousLeases: []mesh.PeerRendezvousLease{},
|
||||
Routes: []mesh.SyntheticRoute{},
|
||||
Source: "config_load_failed",
|
||||
PeerEndpoints: map[string]string{},
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{},
|
||||
PeerEndpointObservations: map[string]mesh.EndpointCandidateHealthObservation{},
|
||||
PeerDirectory: []mesh.PeerDirectoryEntry{},
|
||||
RecoverySeeds: []mesh.PeerRecoverySeed{},
|
||||
RendezvousLeases: []mesh.PeerRendezvousLease{},
|
||||
Routes: []mesh.SyntheticRoute{},
|
||||
Source: "config_load_failed",
|
||||
}
|
||||
}
|
||||
peerEndpoints := loadedConfig.PeerEndpoints
|
||||
@@ -1082,6 +1085,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
||||
VPNFabricEndpointObservations: newVPNFabricEndpointObservationStore(),
|
||||
PeerEndpoints: copyStringMap(peerEndpoints),
|
||||
PeerEndpointCandidates: copyPeerEndpointCandidatesMap(loadedConfig.PeerEndpointCandidates),
|
||||
PeerEndpointObservations: copyEndpointCandidateObservations(loadedConfig.PeerEndpointObservations),
|
||||
VPNGateway: vpnGateway,
|
||||
ServiceChannelAccessStats: serviceChannelAccessStats,
|
||||
RemoteWorkspaceFrameSink: remoteWorkspaceFrameSink,
|
||||
@@ -1545,18 +1549,19 @@ func loadSyntheticMeshConfig(ctx context.Context, cfg config.Config, identity st
|
||||
return loadedSyntheticMeshConfig{}, err
|
||||
}
|
||||
return loadedSyntheticMeshConfig{
|
||||
PeerEndpoints: scoped.PeerEndpoints,
|
||||
PeerEndpointCandidates: scoped.PeerEndpointCandidates,
|
||||
PeerDirectory: scoped.PeerDirectory,
|
||||
RecoverySeeds: scoped.RecoverySeeds,
|
||||
RendezvousLeases: scoped.RendezvousLeases,
|
||||
RoutePathDecisions: nil,
|
||||
Routes: scoped.Routes,
|
||||
Source: "scoped_config",
|
||||
ConfigVersion: scoped.ConfigVersion,
|
||||
PeerDirectoryVersion: scoped.PeerDirectoryVersion,
|
||||
PolicyVersion: scoped.PolicyVersion,
|
||||
ProductionForwarding: false,
|
||||
PeerEndpoints: scoped.PeerEndpoints,
|
||||
PeerEndpointCandidates: scoped.PeerEndpointCandidates,
|
||||
PeerEndpointObservations: scoped.PeerEndpointObservations,
|
||||
PeerDirectory: scoped.PeerDirectory,
|
||||
RecoverySeeds: scoped.RecoverySeeds,
|
||||
RendezvousLeases: scoped.RendezvousLeases,
|
||||
RoutePathDecisions: nil,
|
||||
Routes: scoped.Routes,
|
||||
Source: "scoped_config",
|
||||
ConfigVersion: scoped.ConfigVersion,
|
||||
PeerDirectoryVersion: scoped.PeerDirectoryVersion,
|
||||
PolicyVersion: scoped.PolicyVersion,
|
||||
ProductionForwarding: false,
|
||||
}, nil
|
||||
}
|
||||
if api != nil {
|
||||
@@ -1570,6 +1575,7 @@ func loadSyntheticMeshConfig(ctx context.Context, cfg config.Config, identity st
|
||||
return loadedSyntheticMeshConfig{
|
||||
PeerEndpoints: remote.PeerEndpoints,
|
||||
PeerEndpointCandidates: peerEndpointCandidatesFromControlPlane(remote.PeerEndpointCandidates),
|
||||
PeerEndpointObservations: endpointCandidateObservationsFromControlPlane(remote.PeerEndpointObservations),
|
||||
PeerDirectory: peerDirectoryFromControlPlane(remote.PeerDirectory),
|
||||
RecoverySeeds: recoverySeedsFromControlPlane(remote.RecoverySeeds),
|
||||
RendezvousLeases: rendezvousLeasesFromControlPlane(remote.RendezvousLeases),
|
||||
@@ -1981,6 +1987,7 @@ func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, i
|
||||
}
|
||||
meshState.PeerEndpoints = copyStringMap(loadedConfig.PeerEndpoints)
|
||||
meshState.PeerEndpointCandidates = copyPeerEndpointCandidatesMap(loadedConfig.PeerEndpointCandidates)
|
||||
meshState.PeerEndpointObservations = copyEndpointCandidateObservations(loadedConfig.PeerEndpointObservations)
|
||||
if productionForwardingEnabled {
|
||||
meshState.ProductionForwardTransport = mesh.NewHTTPProductionForwardTransport(loadedConfig.PeerEndpoints)
|
||||
} else {
|
||||
@@ -2287,6 +2294,26 @@ func peerEndpointCandidatesFromControlPlane(candidates map[string][]client.PeerE
|
||||
return out
|
||||
}
|
||||
|
||||
func endpointCandidateObservationsFromControlPlane(observations map[string]client.EndpointCandidateHealthObservation) map[string]mesh.EndpointCandidateHealthObservation {
|
||||
out := make(map[string]mesh.EndpointCandidateHealthObservation, len(observations))
|
||||
for endpointID, item := range observations {
|
||||
endpointID = strings.TrimSpace(endpointID)
|
||||
if endpointID == "" {
|
||||
continue
|
||||
}
|
||||
out[endpointID] = mesh.EndpointCandidateHealthObservation{
|
||||
EndpointID: firstNonEmpty(strings.TrimSpace(item.EndpointID), endpointID),
|
||||
LastLatencyMs: item.LastLatencyMs,
|
||||
SuccessCount: item.SuccessCount,
|
||||
FailureCount: item.FailureCount,
|
||||
LastFailureReason: item.LastFailureReason,
|
||||
ReliabilityScore: item.ReliabilityScore,
|
||||
ObservedAt: item.ObservedAt,
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func peerDirectoryFromControlPlane(entries []client.PeerDirectoryEntry) []mesh.PeerDirectoryEntry {
|
||||
out := make([]mesh.PeerDirectoryEntry, 0, len(entries))
|
||||
for _, item := range entries {
|
||||
@@ -4508,6 +4535,17 @@ func copyPeerEndpointCandidatesMap(values map[string][]mesh.PeerEndpointCandidat
|
||||
return out
|
||||
}
|
||||
|
||||
func copyEndpointCandidateObservations(values map[string]mesh.EndpointCandidateHealthObservation) map[string]mesh.EndpointCandidateHealthObservation {
|
||||
if len(values) == 0 {
|
||||
return map[string]mesh.EndpointCandidateHealthObservation{}
|
||||
}
|
||||
out := make(map[string]mesh.EndpointCandidateHealthObservation, len(values))
|
||||
for endpointID, observation := range values {
|
||||
out[endpointID] = observation
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func minInt(left, right int) int {
|
||||
if left < right {
|
||||
return left
|
||||
@@ -5005,7 +5043,7 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
|
||||
ChannelClass: mesh.SyntheticChannelFabricControl,
|
||||
Now: time.Now().UTC(),
|
||||
MaxVerificationAge: 5 * time.Minute,
|
||||
Observations: meshState.VPNFabricEndpointObservations.Snapshot(),
|
||||
Observations: mergedEndpointCandidateObservations(meshState.PeerEndpointObservations, meshState.VPNFabricEndpointObservations.Snapshot()),
|
||||
MaxObservationAge: 5 * time.Minute,
|
||||
})
|
||||
for _, item := range ranked {
|
||||
@@ -5036,6 +5074,23 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
|
||||
return out
|
||||
}
|
||||
|
||||
func mergedEndpointCandidateObservations(remote map[string]mesh.EndpointCandidateHealthObservation, local map[string]mesh.EndpointCandidateHealthObservation) map[string]mesh.EndpointCandidateHealthObservation {
|
||||
if len(remote) == 0 && len(local) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string]mesh.EndpointCandidateHealthObservation, len(remote)+len(local))
|
||||
for endpointID, observation := range remote {
|
||||
out[endpointID] = observation
|
||||
}
|
||||
for endpointID, observation := range local {
|
||||
if existing, ok := out[endpointID]; ok && !observation.ObservedAt.IsZero() && !existing.ObservedAt.IsZero() && existing.ObservedAt.After(observation.ObservedAt) {
|
||||
continue
|
||||
}
|
||||
out[endpointID] = observation
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func endpointCandidateTLSCertSHA256(candidate mesh.PeerEndpointCandidate) string {
|
||||
if len(candidate.Metadata) == 0 {
|
||||
return ""
|
||||
|
||||
@@ -977,6 +977,64 @@ func TestVPNFabricSessionTargetsUseLocalHealthObservations(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNFabricSessionTargetsUseRemoteHealthObservations(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
targets := vpnFabricSessionTargets(&syntheticMeshState{
|
||||
PeerEndpointObservations: map[string]mesh.EndpointCandidateHealthObservation{
|
||||
"node-b-quic": {
|
||||
EndpointID: "node-b-quic",
|
||||
FailureCount: 2,
|
||||
LastFailureReason: "control_plane_session_open_failed",
|
||||
ReliabilityScore: 35,
|
||||
ObservedAt: now,
|
||||
},
|
||||
},
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-quic",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://node-b.example.test:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "https://node-b.example.test:443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, "node-b")
|
||||
if len(targets) != 2 || targets[0].EndpointID != "node-b-wss" {
|
||||
t.Fatalf("targets did not apply remote health observations: %+v", targets)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergedEndpointCandidateObservationsKeepsNewest(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
merged := mergedEndpointCandidateObservations(
|
||||
map[string]mesh.EndpointCandidateHealthObservation{
|
||||
"endpoint-a": {EndpointID: "endpoint-a", ReliabilityScore: 90, ObservedAt: now},
|
||||
},
|
||||
map[string]mesh.EndpointCandidateHealthObservation{
|
||||
"endpoint-a": {EndpointID: "endpoint-a", ReliabilityScore: 35, ObservedAt: now.Add(-time.Minute)},
|
||||
"endpoint-b": {EndpointID: "endpoint-b", ReliabilityScore: 80, ObservedAt: now},
|
||||
},
|
||||
)
|
||||
if merged["endpoint-a"].ReliabilityScore != 90 || merged["endpoint-b"].ReliabilityScore != 80 {
|
||||
t.Fatalf("unexpected merged observations: %+v", merged)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeartbeatPayloadReportsMeshListenerFailureWithoutKillingHeartbeat(t *testing.T) {
|
||||
now := time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC)
|
||||
payload := heartbeatPayload(config.Config{
|
||||
|
||||
Reference in New Issue
Block a user