1114 lines
44 KiB
Go
1114 lines
44 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/ed25519"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
agentauthority "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/config"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
|
)
|
|
|
|
func TestLoadSyntheticMeshConfigPrefersScopedFile(t *testing.T) {
|
|
route := mesh.SyntheticRoute{
|
|
RouteID: "route-file",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-b"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
|
RouteVersion: "route-v1",
|
|
PolicyVersion: "policy-v1",
|
|
PeerDirectoryVersion: "peers-v1",
|
|
}
|
|
payload, err := json.Marshal(mesh.ScopedSyntheticConfig{
|
|
SchemaVersion: "c17f.synthetic.v1",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
|
|
PeerDirectory: []mesh.PeerDirectoryEntry{
|
|
{NodeID: "node-b", RouteIDs: []string{"route-file"}, EndpointCount: 1},
|
|
},
|
|
RecoverySeeds: []mesh.PeerRecoverySeed{
|
|
{NodeID: "node-b", Endpoint: "http://127.0.0.1:19002", Transport: "direct_tcp_tls", Priority: 10},
|
|
},
|
|
Routes: []mesh.SyntheticRoute{route},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("marshal scoped config: %v", err)
|
|
}
|
|
path := filepath.Join(t.TempDir(), "mesh-scoped.json")
|
|
if err := os.WriteFile(path, payload, 0o600); err != nil {
|
|
t.Fatalf("write scoped config: %v", err)
|
|
}
|
|
|
|
loaded, err := loadSyntheticMeshConfig(context.Background(), config.Config{
|
|
MeshSyntheticConfigPath: path,
|
|
MeshPeerEndpointsJSON: `{"node-b":"http://debug.invalid"}`,
|
|
MeshSyntheticRoutesJSON: `[]`,
|
|
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil)
|
|
if err != nil {
|
|
t.Fatalf("load synthetic config: %v", err)
|
|
}
|
|
if loaded.Source != "scoped_config" {
|
|
t.Fatalf("source = %q, want scoped_config", loaded.Source)
|
|
}
|
|
if loaded.PeerEndpoints["node-b"] != "http://127.0.0.1:19002" {
|
|
t.Fatalf("peer endpoint = %q", loaded.PeerEndpoints["node-b"])
|
|
}
|
|
if len(loaded.Routes) != 1 || loaded.Routes[0].RouteID != "route-file" {
|
|
t.Fatalf("routes = %+v", loaded.Routes)
|
|
}
|
|
if len(loaded.PeerDirectory) != 1 || len(loaded.RecoverySeeds) != 1 {
|
|
t.Fatalf("peer runtime config missing: directory=%+v seeds=%+v", loaded.PeerDirectory, loaded.RecoverySeeds)
|
|
}
|
|
}
|
|
|
|
func TestVerifyEnrollmentBootstrapAcceptsSignedApproval(t *testing.T) {
|
|
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
|
if err != nil {
|
|
t.Fatalf("generate key: %v", err)
|
|
}
|
|
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
|
|
fingerprint := agentauthority.Fingerprint(publicKey)
|
|
payload := json.RawMessage(`{
|
|
"schema_version":"rap.cluster.node_approval.v1",
|
|
"cluster_id":"cluster-1",
|
|
"join_request_id":"join-request-1",
|
|
"node_id":"node-1",
|
|
"node_fingerprint":"fp-1",
|
|
"identity_status":"active",
|
|
"heartbeat_endpoint":"/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
|
|
"approved_by_user_id":"admin-1",
|
|
"issued_at":"2026-04-28T12:00:00Z",
|
|
"control_plane_only":true,
|
|
"production_forwarding":false
|
|
}`)
|
|
canonical, err := agentauthority.CanonicalJSON(payload)
|
|
if err != nil {
|
|
t.Fatalf("canonical json: %v", err)
|
|
}
|
|
bootstrap := client.NodeBootstrap{
|
|
NodeID: "node-1",
|
|
ClusterID: "cluster-1",
|
|
IdentityStatus: "active",
|
|
HeartbeatEndpoint: "/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
|
|
ClusterAuthority: &client.ClusterAuthorityDescriptor{
|
|
SchemaVersion: agentauthority.AuthoritySchemaVersion,
|
|
ClusterID: "cluster-1",
|
|
AuthorityState: "active",
|
|
KeyAlgorithm: agentauthority.AlgorithmEd25519,
|
|
PublicKey: publicKeyB64,
|
|
PublicKeyFingerprint: fingerprint,
|
|
},
|
|
AuthorityPayload: payload,
|
|
AuthoritySignature: &client.ClusterSignature{
|
|
SchemaVersion: agentauthority.SignatureSchemaVersion,
|
|
Algorithm: agentauthority.AlgorithmEd25519,
|
|
KeyFingerprint: fingerprint,
|
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
|
SignedAt: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
|
},
|
|
}
|
|
|
|
err = verifyEnrollmentBootstrap(bootstrap, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeFingerprint: "fp-1",
|
|
}, config.Config{ClusterAuthorityFingerprint: fingerprint})
|
|
if err != nil {
|
|
t.Fatalf("verify enrollment bootstrap: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestVerifyEnrollmentBootstrapRejectsPinnedAuthorityMismatch(t *testing.T) {
|
|
bootstrap := client.NodeBootstrap{
|
|
NodeID: "node-1",
|
|
ClusterID: "cluster-1",
|
|
IdentityStatus: "active",
|
|
ClusterAuthority: &client.ClusterAuthorityDescriptor{
|
|
SchemaVersion: agentauthority.AuthoritySchemaVersion,
|
|
ClusterID: "cluster-1",
|
|
KeyAlgorithm: agentauthority.AlgorithmEd25519,
|
|
PublicKeyFingerprint: "rap-ca-ed25519-other",
|
|
},
|
|
}
|
|
err := verifyEnrollmentBootstrap(bootstrap, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeFingerprint: "fp-1",
|
|
}, config.Config{ClusterAuthorityFingerprint: "rap-ca-ed25519-expected"})
|
|
if err == nil {
|
|
t.Fatal("expected pinned authority mismatch")
|
|
}
|
|
}
|
|
|
|
func TestSyntheticQualityScoreIsBounded(t *testing.T) {
|
|
cases := []struct {
|
|
latency int
|
|
min int
|
|
max int
|
|
}{
|
|
{latency: 0, min: 100, max: 100},
|
|
{latency: 50, min: 90, max: 100},
|
|
{latency: 10000, min: 1, max: 1},
|
|
}
|
|
for _, tc := range cases {
|
|
score := syntheticQualityScore(tc.latency)
|
|
if score < tc.min || score > tc.max {
|
|
t.Fatalf("syntheticQualityScore(%d) = %d, want [%d,%d]", tc.latency, score, tc.min, tc.max)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestProductionEnvelopeObservationSinkFromConfigIsDisabledByDefault(t *testing.T) {
|
|
sink := productionEnvelopeObservationSinkFromConfig(config.Config{})
|
|
if sink != nil {
|
|
t.Fatal("sink is enabled by default")
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshAdvertiseEndpoint: "https://node-a.example.test:443",
|
|
MeshAdvertiseTransport: "wss",
|
|
MeshConnectivityMode: "outbound_only",
|
|
MeshNATType: "symmetric",
|
|
MeshRegion: "eu",
|
|
MeshSyntheticRuntimeEnabled: true,
|
|
MeshProductionForwardingEnabled: true,
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, nil, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
|
|
|
|
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["peer_endpoint"] != "https://node-a.example.test:443" ||
|
|
report["connectivity_mode"] != "outbound_only" ||
|
|
report["nat_type"] != "symmetric" ||
|
|
report["region"] != "eu" {
|
|
t.Fatalf("unexpected endpoint report: %+v", report)
|
|
}
|
|
if payload.Capabilities["mesh_dynamic_endpoint_reporting"] != true {
|
|
t.Fatalf("dynamic endpoint capability missing: %+v", payload.Capabilities)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsMultipleMeshEndpoints(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshAdvertiseEndpointsJSON: `[{
|
|
"endpoint_id": "node-a-lan",
|
|
"address": "http://10.24.10.10:19001",
|
|
"transport": "direct_tcp_tls",
|
|
"reachability": "private",
|
|
"connectivity_mode": "direct",
|
|
"nat_type": "none",
|
|
"region": "corp-eu",
|
|
"priority": 1,
|
|
"policy_tags": ["corp-lan", "same-site"]
|
|
},{
|
|
"endpoint_id": "node-a-public",
|
|
"address": "https://node-a.example.test:443",
|
|
"transport": "direct_tcp_tls",
|
|
"reachability": "public",
|
|
"connectivity_mode": "direct",
|
|
"nat_type": "none",
|
|
"priority": 10
|
|
}]`,
|
|
MeshRegion: "corp-eu",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, nil, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
|
|
|
|
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
|
|
}
|
|
candidates, ok := report["endpoint_candidates"].([]mesh.PeerEndpointCandidate)
|
|
if !ok || len(candidates) != 2 {
|
|
t.Fatalf("unexpected endpoint candidates: %#v", report["endpoint_candidates"])
|
|
}
|
|
if candidates[0].EndpointID != "node-a-lan" || candidates[0].Reachability != "private" {
|
|
t.Fatalf("internal endpoint candidate not preserved: %+v", candidates[0])
|
|
}
|
|
if report["peer_endpoint"] != "http://10.24.10.10:19001" {
|
|
t.Fatalf("default peer endpoint = %v", report["peer_endpoint"])
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesPeerRecoveryReportWithoutAdvertisedEndpoint(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{
|
|
"node-b": "http://node-b:19001",
|
|
"node-c": "http://node-c:19001",
|
|
"node-d": "http://node-d:19001",
|
|
},
|
|
WarmPeerLimit: 3,
|
|
Now: now,
|
|
})
|
|
peerConnections := mesh.NewPeerConnectionTracker(peerCache.Snapshot(), now)
|
|
peerConnections.RecordSuccess("node-b", 20, now)
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: peerCache,
|
|
PeerConnections: peerConnections,
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, meshState, now)
|
|
|
|
report, ok := payload.Metadata["mesh_peer_recovery_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh peer recovery report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["schema_version"] != "c17z9.mesh_peer_recovery_report.v1" ||
|
|
report["mode"] != mesh.PeerRecoveryModeRecovery ||
|
|
report["ready_peer_count"] != 1 ||
|
|
report["target_ready_peers"] != mesh.DefaultStablePeerTarget ||
|
|
report["deficit"] != 2 {
|
|
t.Fatalf("unexpected recovery report: %+v", report)
|
|
}
|
|
if payload.Capabilities["mesh_peer_recovery_planning"] != true {
|
|
t.Fatalf("peer recovery capability missing: %+v", payload.Capabilities)
|
|
}
|
|
intentReport, ok := payload.Metadata["mesh_peer_connection_intent_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh peer connection intent report missing: %+v", payload.Metadata)
|
|
}
|
|
if intentReport["schema_version"] != "c17z12.mesh_peer_connection_intent_report.v1" ||
|
|
intentReport["intent_count"] != 3 ||
|
|
intentReport["recover_count"] != 2 {
|
|
t.Fatalf("unexpected connection intent report: %+v", intentReport)
|
|
}
|
|
if payload.Capabilities["mesh_peer_connection_intent_planning"] != true {
|
|
t.Fatalf("connection intent capability missing: %+v", payload.Capabilities)
|
|
}
|
|
if _, ok := payload.Metadata["mesh_endpoint_report"]; ok {
|
|
t.Fatalf("endpoint report should not be emitted without advertised endpoint: %+v", payload.Metadata)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesRendezvousLeaseAdmissionReport(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
leases := []mesh.PeerRendezvousLease{
|
|
{
|
|
LeaseID: "lease-node-b-via-node-a",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-a",
|
|
RelayEndpoint: "http://node-a:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ab"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-time.Minute),
|
|
ExpiresAt: now.Add(5 * time.Minute),
|
|
},
|
|
{
|
|
LeaseID: "lease-node-a-via-node-r",
|
|
PeerNodeID: "node-a",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ra"},
|
|
Priority: 20,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-2 * time.Minute),
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
},
|
|
{
|
|
LeaseID: "lease-node-c-via-node-r-expired",
|
|
PeerNodeID: "node-c",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-cr"},
|
|
Priority: 30,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-10 * time.Minute),
|
|
ExpiresAt: now.Add(-time.Second),
|
|
},
|
|
}
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
|
|
RendezvousLeases: leases,
|
|
WarmPeerLimit: 3,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
|
|
tracker.RecordRelayReady(mesh.PeerCacheEntry{
|
|
NodeID: "node-b",
|
|
Endpoint: "http://node-a:19001",
|
|
Warm: true,
|
|
RendezvousLeaseID: "lease-node-b-via-node-a",
|
|
RelayNodeID: "node-a",
|
|
RelayEndpoint: "http://node-a:19001",
|
|
RelayControl: true,
|
|
BestTransport: "relay_control",
|
|
BestReachability: "relay",
|
|
BestConnectivity: "relay_required",
|
|
BestCandidateScore: 500,
|
|
}, 12, now.Add(time.Second))
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
RendezvousLeases: leases,
|
|
PeerConnections: tracker,
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, identity, meshState, now)
|
|
|
|
report, ok := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("rendezvous lease report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["schema_version"] != meshRendezvousLeaseReportSchema ||
|
|
report["lease_count"] != 3 ||
|
|
report["active_count"] != 2 ||
|
|
report["expired_count"] != 1 ||
|
|
report["admitted_as_relay_count"] != 1 ||
|
|
report["admitted_as_peer_count"] != 1 ||
|
|
report["renewal_needed_count"] != 1 ||
|
|
report["relay_control_ready_count"] != 1 {
|
|
t.Fatalf("unexpected lease report: %+v", report)
|
|
}
|
|
if report["control_plane_only"] != true ||
|
|
report["relay_payload_forwarding"] != false ||
|
|
report["production_payload_forwarding"] != false {
|
|
t.Fatalf("payload boundary flags not preserved: %+v", report)
|
|
}
|
|
leaseDetails, ok := report["leases"].([]map[string]any)
|
|
if !ok || len(leaseDetails) != 3 {
|
|
t.Fatalf("unexpected lease details: %#v", report["leases"])
|
|
}
|
|
if leaseDetails[0]["role"] != "relay" ||
|
|
leaseDetails[0]["status"] != "admitted" ||
|
|
leaseDetails[0]["admitted"] != true ||
|
|
leaseDetails[0]["relay_ready"] != true {
|
|
t.Fatalf("relay admission detail missing: %+v", leaseDetails[0])
|
|
}
|
|
if leaseDetails[1]["role"] != "peer" ||
|
|
leaseDetails[1]["status"] != "renewal_needed" ||
|
|
leaseDetails[1]["renewal_needed"] != true {
|
|
t.Fatalf("peer renewal detail missing: %+v", leaseDetails[1])
|
|
}
|
|
if payload.Capabilities[meshRendezvousLeaseTelemetryCapability] != true {
|
|
t.Fatalf("lease telemetry capability missing: %+v", payload.Capabilities)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsStaleRelayWithdrawalTelemetry(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-r"}
|
|
lease := mesh.PeerRendezvousLease{
|
|
LeaseID: "lease-node-b-via-node-r",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-rb"},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-time.Minute),
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
}
|
|
altLease := lease
|
|
altLease.LeaseID = "lease-node-b-via-node-r2"
|
|
altLease.RelayNodeID = "node-r2"
|
|
altLease.RelayEndpoint = "http://node-r2:19001"
|
|
altLease.Priority = 20
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
|
|
peer := mesh.PeerCacheEntry{
|
|
NodeID: "node-b",
|
|
Endpoint: "http://node-r:19001",
|
|
Warm: true,
|
|
RendezvousLeaseID: "lease-node-b-via-node-r",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
RelayControl: true,
|
|
}
|
|
tracker.RecordRelayReady(peer, 10, now.Add(time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(2*time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(3*time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(4*time.Second))
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
|
|
PeerConnections: tracker,
|
|
Source: "control_plane",
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, identity, meshState, now.Add(5*time.Second))
|
|
|
|
report := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
|
|
if report["stale_relay_count"] != 1 ||
|
|
report["withdrawal_needed_count"] != 1 ||
|
|
report["reselection_needed_count"] != 0 ||
|
|
report["refresh_needed_count"] != 1 {
|
|
t.Fatalf("unexpected stale relay report: %+v", report)
|
|
}
|
|
leaseDetails := report["leases"].([]map[string]any)
|
|
if leaseDetails[0]["stale_relay"] != true ||
|
|
leaseDetails[0]["withdrawal_needed"] != true ||
|
|
leaseDetails[0]["connection_state"] != mesh.PeerConnectionBackoff {
|
|
t.Fatalf("stale relay detail missing: %+v", leaseDetails[0])
|
|
}
|
|
if leaseDetails[1]["stale_relay"] != false ||
|
|
leaseDetails[1]["withdrawal_needed"] != false {
|
|
t.Fatalf("alternate relay lease should not inherit stale state: %+v", leaseDetails[1])
|
|
}
|
|
}
|
|
|
|
func TestRefreshRendezvousLeasesIfNeededReloadsControlPlaneConfig(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
oldLease := mesh.PeerRendezvousLease{
|
|
LeaseID: "lease-node-b-via-node-r-old",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-r-old",
|
|
RelayEndpoint: "http://node-r-old:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ab"},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-2 * time.Minute),
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
}
|
|
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
|
|
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now)
|
|
oldPathDecisions := &client.RoutePathDecisionReport{
|
|
SchemaVersion: "c17z18.route_path_decisions.v1",
|
|
DecisionMode: "control_plane_effective_path_from_relay_policy",
|
|
Generation: "old-config",
|
|
DecisionCount: 1,
|
|
ReplacementDecisionCount: 0,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
Decisions: []client.RoutePathDecision{
|
|
{
|
|
DecisionID: "route-ab-path-node-a-via-node-r-old",
|
|
RouteID: "route-ab",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
OriginalHops: []string{"node-a", "node-r-old", "node-b"},
|
|
EffectiveHops: []string{"node-a", "node-r-old", "node-b"},
|
|
NextHopID: "node-r-old",
|
|
LocalRole: "entry",
|
|
DecisionSource: "route_intent",
|
|
Generation: "old-config",
|
|
PathScore: 1000,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
}
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: oldCache,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
|
|
RoutePathDecisions: oldPathDecisions,
|
|
RouteGenerationTracker: newMeshRouteGenerationTracker(
|
|
oldPathDecisions,
|
|
now.Add(-time.Minute),
|
|
),
|
|
PeerConnections: tracker,
|
|
PeerConnectionManager: mesh.NewPeerConnectionManager(mesh.PeerConnectionManagerConfig{Local: local, PeerCache: oldCache, Tracker: tracker, RendezvousLeases: []mesh.PeerRendezvousLease{oldLease}}),
|
|
Source: "control_plane",
|
|
ConfigVersion: "old-config",
|
|
}
|
|
|
|
requests := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
|
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
requests++
|
|
response := map[string]any{
|
|
"synthetic_mesh_config": map[string]any{
|
|
"enabled": true,
|
|
"schema_version": "c17z18.synthetic.v1",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"config_version": "new-config",
|
|
"peer_directory_version": "new-config",
|
|
"policy_version": "new-config",
|
|
"peer_endpoints": map[string]string{"node-r-new": "http://node-r-new:19001"},
|
|
"peer_endpoint_candidates": map[string]any{
|
|
"node-b": []map[string]any{
|
|
{
|
|
"endpoint_id": "node-b-outbound-only",
|
|
"node_id": "node-b",
|
|
"transport": "outbound_reverse",
|
|
"address": "http://node-b:19002",
|
|
"address_family": "ipv4",
|
|
"reachability": "outbound_only",
|
|
"connectivity_mode": "outbound_only",
|
|
"nat_type": "symmetric",
|
|
"region": "test",
|
|
"priority": 5,
|
|
},
|
|
},
|
|
},
|
|
"peer_directory": []map[string]any{
|
|
{"node_id": "node-b", "route_ids": []string{"route-ab"}, "endpoint_count": 0, "candidate_count": 1, "connectivity_modes": []string{"relay_required"}, "recovery_seed": false},
|
|
},
|
|
"rendezvous_leases": []map[string]any{
|
|
{
|
|
"lease_id": "lease-node-b-via-node-r-new",
|
|
"peer_node_id": "node-b",
|
|
"relay_node_id": "node-r-new",
|
|
"relay_endpoint": "http://node-r-new:19001",
|
|
"transport": "relay_control",
|
|
"connectivity_mode": "relay_required",
|
|
"route_ids": []string{"route-ab"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"priority": 5,
|
|
"control_plane_only": true,
|
|
"issued_at": now,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"reason": "refresh_test",
|
|
},
|
|
},
|
|
"route_path_decisions": map[string]any{
|
|
"schema_version": "c17z18.route_path_decisions.v1",
|
|
"decision_mode": "control_plane_effective_path_from_relay_policy",
|
|
"generation": "new-config",
|
|
"decision_count": 1,
|
|
"replacement_decision_count": 1,
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"decisions": []map[string]any{
|
|
{
|
|
"decision_id": "route-ab-path-node-a-via-node-r-new",
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
|
|
"next_hop_id": "node-r-new",
|
|
"local_role": "entry",
|
|
"selected_relay_id": "node-r-new",
|
|
"selected_relay_endpoint": "http://node-r-new:19001",
|
|
"stale_relay_node_id": "node-r-old",
|
|
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
|
|
"rendezvous_lease_reason": "stale_relay_replacement",
|
|
"decision_source": "stale_relay_replacement",
|
|
"generation": "new-config",
|
|
"path_score": 900,
|
|
"score_reasons": []string{"relay_replacement_policy"},
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
},
|
|
"routes": []map[string]any{
|
|
{
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"max_ttl": 6,
|
|
"max_hops": 6,
|
|
},
|
|
},
|
|
"production_forwarding": false,
|
|
},
|
|
}
|
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
|
t.Fatalf("write response: %v", err)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
err := refreshRendezvousLeasesIfNeeded(context.Background(), config.Config{}, identity, client.New(server.URL), meshState, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh leases: %v", err)
|
|
}
|
|
if requests != 1 {
|
|
t.Fatalf("requests = %d, want 1", requests)
|
|
}
|
|
if meshState.ConfigVersion != "new-config" ||
|
|
len(meshState.RendezvousLeases) != 1 ||
|
|
meshState.RendezvousLeases[0].RelayNodeID != "node-r-new" {
|
|
t.Fatalf("mesh state was not refreshed: version=%s leases=%+v", meshState.ConfigVersion, meshState.RendezvousLeases)
|
|
}
|
|
if meshState.RoutePathDecisions == nil ||
|
|
meshState.RoutePathDecisions.SchemaVersion != "c17z18.route_path_decisions.v1" ||
|
|
meshState.RoutePathDecisions.ReplacementDecisionCount != 1 ||
|
|
len(meshState.RoutePathDecisions.Decisions) != 1 ||
|
|
meshState.RoutePathDecisions.Decisions[0].NextHopID != "node-r-new" {
|
|
t.Fatalf("route path decisions were not refreshed: %+v", meshState.RoutePathDecisions)
|
|
}
|
|
if len(meshState.Routes) != 1 ||
|
|
!sameStringSlice(meshState.Routes[0].Hops, []string{"node-a", "node-r-old", "node-r-new", "node-b"}) {
|
|
t.Fatalf("base routes should remain original for non-route-health runtime: %+v", meshState.Routes)
|
|
}
|
|
if len(meshState.RouteHealthRoutes) != 1 ||
|
|
!sameStringSlice(meshState.RouteHealthRoutes[0].Hops, []string{"node-a", "node-r-new", "node-b"}) ||
|
|
meshState.RouteHealthRoutes[0].RouteVersion != "new-config" {
|
|
t.Fatalf("route health routes were not generated from path decisions: %+v", meshState.RouteHealthRoutes)
|
|
}
|
|
if meshState.LeaseRefreshAttempts != 1 || meshState.LeaseRefreshSuccesses != 1 || meshState.LeaseRefreshFailures != 0 {
|
|
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.LeaseRefreshAttempts, meshState.LeaseRefreshSuccesses, meshState.LeaseRefreshFailures)
|
|
}
|
|
if meshState.LastLeaseRefresh == nil ||
|
|
meshState.LastLeaseRefresh.Status != "succeeded" ||
|
|
meshState.LastLeaseRefresh.Reason != "renewal_needed" {
|
|
t.Fatalf("unexpected refresh state: %+v", meshState.LastLeaseRefresh)
|
|
}
|
|
if meshState.LastConfigRefreshAt.IsZero() {
|
|
t.Fatalf("last config refresh time was not updated")
|
|
}
|
|
recoveryPlan := peerRecoveryPlan(meshState, now.Add(time.Second))
|
|
intentPlan := peerConnectionIntentPlan(meshState, recoveryPlan, now.Add(time.Second))
|
|
var peerIntent mesh.PeerConnectionIntent
|
|
for _, intent := range intentPlan.Intents {
|
|
if intent.NodeID == "node-b" {
|
|
peerIntent = intent
|
|
break
|
|
}
|
|
}
|
|
if intentPlan.RendezvousResolvedCount != 1 ||
|
|
peerIntent.RendezvousLeaseID != "lease-node-b-via-node-r-new" ||
|
|
peerIntent.RelayNodeID != "node-r-new" {
|
|
t.Fatalf("refreshed lease was not selected: %+v", intentPlan)
|
|
}
|
|
pathReport := meshRoutePathDecisionReport(meshState, identity, now.Add(time.Second))
|
|
if pathReport["schema_version"] != meshRoutePathDecisionReportSchema ||
|
|
pathReport["replacement_decision_count"] != 1 ||
|
|
pathReport["next_hop_available_count"] != 1 ||
|
|
pathReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route path decision report: %+v", pathReport)
|
|
}
|
|
generationReport := meshRouteGenerationReport(meshState, identity, now.Add(time.Second))
|
|
if generationReport["schema_version"] != meshRouteGenerationReportSchema ||
|
|
generationReport["active_decision_count"] != 1 ||
|
|
generationReport["applied_decision_count"] != 1 ||
|
|
generationReport["withdrawn_decision_count"] != 1 ||
|
|
generationReport["generation_changed"] != true ||
|
|
generationReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route generation report: %+v", generationReport)
|
|
}
|
|
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
|
|
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
|
|
routeHealthConfigReport["route_path_decision_applied_count"] != 1 ||
|
|
routeHealthConfigReport["replacement_route_health_route_count"] != 1 ||
|
|
routeHealthConfigReport["synthetic_route_health_route_path_runtime"] != true ||
|
|
routeHealthConfigReport["test_service_route_config_changed"] != false ||
|
|
routeHealthConfigReport["config_refresh_interval_ms"] != int64(meshSyntheticConfigRefreshInterval/time.Millisecond) ||
|
|
routeHealthConfigReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackRefreshAppliesReplacementConfig(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
|
|
oldRoute := mesh.SyntheticRoute{
|
|
RouteID: "route-ab",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-r-old", "node-b"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
RouteVersion: "old-config",
|
|
}
|
|
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{"node-r-old": "http://node-r-old:19001"},
|
|
Routes: []mesh.SyntheticRoute{oldRoute},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: oldCache,
|
|
Routes: []mesh.SyntheticRoute{oldRoute},
|
|
RouteHealthRoutes: []mesh.SyntheticRoute{oldRoute},
|
|
Source: "control_plane",
|
|
ConfigVersion: "old-config",
|
|
PeerConnections: mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now),
|
|
}
|
|
|
|
requests := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
|
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
requests++
|
|
response := map[string]any{
|
|
"synthetic_mesh_config": map[string]any{
|
|
"enabled": true,
|
|
"schema_version": "c17z20.synthetic.v1",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"config_version": "new-config",
|
|
"peer_directory_version": "new-config",
|
|
"policy_version": "new-config",
|
|
"peer_endpoints": map[string]string{"node-r-new": "http://node-r-new:19001"},
|
|
"rendezvous_leases": []map[string]any{
|
|
{
|
|
"lease_id": "lease-node-b-via-node-r-new",
|
|
"peer_node_id": "node-b",
|
|
"relay_node_id": "node-r-new",
|
|
"relay_endpoint": "http://node-r-new:19001",
|
|
"transport": "relay_control",
|
|
"connectivity_mode": "relay_required",
|
|
"route_ids": []string{"route-ab"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"priority": 5,
|
|
"control_plane_only": true,
|
|
"issued_at": now,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"reason": "stale_relay_replacement",
|
|
},
|
|
},
|
|
"route_path_decisions": map[string]any{
|
|
"schema_version": "c17z20.route_path_decisions.v1",
|
|
"decision_mode": "control_plane_effective_path_from_relay_policy",
|
|
"generation": "new-config",
|
|
"decision_count": 1,
|
|
"replacement_decision_count": 1,
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"decisions": []map[string]any{
|
|
{
|
|
"decision_id": "route-ab-path-node-a-via-node-r-new",
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
|
|
"next_hop_id": "node-r-new",
|
|
"local_role": "entry",
|
|
"selected_relay_id": "node-r-new",
|
|
"selected_relay_endpoint": "http://node-r-new:19001",
|
|
"stale_relay_node_id": "node-r-old",
|
|
"rendezvous_peer_node_id": "node-b",
|
|
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
|
|
"rendezvous_lease_reason": "stale_relay_replacement",
|
|
"decision_source": "stale_relay_replacement",
|
|
"generation": "new-config",
|
|
"path_score": 900,
|
|
"score_reasons": []string{"route_health_feedback"},
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
},
|
|
"routes": []map[string]any{
|
|
{
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"max_ttl": 6,
|
|
"max_hops": 6,
|
|
},
|
|
},
|
|
"production_forwarding": false,
|
|
},
|
|
}
|
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
|
t.Fatalf("write response: %v", err)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
trigger := meshRouteHealthFeedbackTrigger{
|
|
Reason: "synthetic_route_health_drift",
|
|
RouteID: "route-ab",
|
|
PeerNodeID: "node-b",
|
|
SelectedRelayID: "node-r-old",
|
|
LinkStatus: "reachable",
|
|
DriftDetected: true,
|
|
ObservedAt: now,
|
|
}
|
|
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, identity, client.New(server.URL), meshState, trigger, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh route health feedback: %v", err)
|
|
}
|
|
if requests != 1 {
|
|
t.Fatalf("requests = %d, want 1", requests)
|
|
}
|
|
if meshState.ConfigVersion != "new-config" ||
|
|
len(meshState.RouteHealthRoutes) != 1 ||
|
|
!sameStringSlice(meshState.RouteHealthRoutes[0].Hops, []string{"node-a", "node-r-new", "node-b"}) {
|
|
t.Fatalf("replacement config was not applied: version=%s route_health_routes=%+v", meshState.ConfigVersion, meshState.RouteHealthRoutes)
|
|
}
|
|
if meshState.RouteHealthRefreshAttempts != 1 || meshState.RouteHealthRefreshSuccesses != 1 || meshState.RouteHealthRefreshFailures != 0 {
|
|
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuccesses, meshState.RouteHealthRefreshFailures)
|
|
}
|
|
if meshState.LastRouteHealthRefresh == nil ||
|
|
meshState.LastRouteHealthRefresh.Status != "succeeded" ||
|
|
meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" ||
|
|
meshState.LastRouteHealthRefresh.RefreshedConfigVersion != "new-config" {
|
|
t.Fatalf("unexpected refresh state: %+v", meshState.LastRouteHealthRefresh)
|
|
}
|
|
report := meshRouteHealthFeedbackRefreshReport(meshState, identity, now.Add(time.Second))
|
|
if report["schema_version"] != meshRouteHealthFeedbackRefreshSchema ||
|
|
report["feedback_refresh_attempt_count"] != 1 ||
|
|
report["feedback_refresh_success_count"] != 1 ||
|
|
report["last_feedback_refresh_status"] != "succeeded" ||
|
|
report["last_feedback_refresh_reason"] != "synthetic_route_health_drift" ||
|
|
report["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected feedback refresh report: %+v", report)
|
|
}
|
|
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
|
|
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
|
|
routeHealthConfigReport["feedback_refresh_backoff_ms"] != int64(meshRouteHealthFeedbackRefreshBackoff/time.Millisecond) ||
|
|
routeHealthConfigReport["feedback_refresh_attempt_count"] != 1 ||
|
|
routeHealthConfigReport["feedback_refresh_success_count"] != 1 ||
|
|
routeHealthConfigReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackRefreshBackoffSuppressesRepeatedTrigger(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 5, 0, 0, time.UTC)
|
|
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{"node-b": "http://node-b:19002"},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
Source: "control_plane",
|
|
LastRouteHealthRefresh: &meshRouteHealthFeedbackRefreshState{
|
|
Status: "succeeded",
|
|
Reason: "synthetic_route_health_drift",
|
|
AttemptedAt: now.Add(-time.Second),
|
|
RouteID: "route-ab",
|
|
},
|
|
}
|
|
trigger := meshRouteHealthFeedbackTrigger{
|
|
Reason: "synthetic_route_health_failure",
|
|
RouteID: "route-ab",
|
|
PeerNodeID: "node-b",
|
|
LinkStatus: "unreachable",
|
|
FailureReason: "probe failed",
|
|
ObservedAt: now,
|
|
}
|
|
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, client.New("http://127.0.0.1:1"), meshState, trigger, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh should have been suppressed without backend call: %v", err)
|
|
}
|
|
if meshState.RouteHealthRefreshAttempts != 0 || meshState.RouteHealthRefreshSuppressed != 1 {
|
|
t.Fatalf("unexpected counters: attempts=%d suppressed=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuppressed)
|
|
}
|
|
if meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" {
|
|
t.Fatalf("suppressed refresh should not replace last state: %+v", meshState.LastRouteHealthRefresh)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackTriggerFromObservation(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 10, 0, 0, time.UTC)
|
|
route := mesh.SyntheticRoute{
|
|
RouteID: "route-ab",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-r-old", "node-b"},
|
|
}
|
|
decision := client.RoutePathDecision{
|
|
RouteID: "route-ab",
|
|
RendezvousPeerNodeID: "node-b",
|
|
SelectedRelayID: "node-r-old",
|
|
RendezvousLeaseID: "lease-old",
|
|
RendezvousLeaseReason: "auto_outbound_only",
|
|
}
|
|
trigger, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
|
|
"route_path_drift_detected": true,
|
|
}, now)
|
|
if !ok ||
|
|
trigger.Reason != "synthetic_route_health_drift" ||
|
|
trigger.RouteID != "route-ab" ||
|
|
trigger.PeerNodeID != "node-b" ||
|
|
trigger.SelectedRelayID != "node-r-old" ||
|
|
!trigger.DriftDetected {
|
|
t.Fatalf("unexpected drift trigger: %+v ok=%t", trigger, ok)
|
|
}
|
|
if _, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
|
|
"route_path_drift_detected": false,
|
|
}, now); ok {
|
|
t.Fatal("healthy route-health observation should not trigger refresh")
|
|
}
|
|
}
|
|
|
|
func TestMeshRouteGenerationTrackerReportsReplacementWithdrawOnFirstApply(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 13, 50, 0, 0, time.UTC)
|
|
report := &client.RoutePathDecisionReport{
|
|
SchemaVersion: "c17z18.route_path_decisions.v1",
|
|
Generation: "config-replacement",
|
|
ReplacementDecisionCount: 1,
|
|
Decisions: []client.RoutePathDecision{
|
|
{
|
|
DecisionID: "route-1-path-node-a-via-node-r-new",
|
|
RouteID: "route-1",
|
|
LocalNodeID: "node-a",
|
|
OriginalHops: []string{"node-a", "node-r-old", "node-r-new", "node-c"},
|
|
EffectiveHops: []string{"node-a", "node-r-new", "node-c"},
|
|
DecisionSource: "stale_relay_replacement",
|
|
Generation: "config-replacement",
|
|
LocalRole: "entry",
|
|
NextHopID: "node-r-new",
|
|
SelectedRelayID: "node-r-new",
|
|
StaleRelayNodeID: "node-r-old",
|
|
RendezvousLeaseID: "lease-node-c-via-node-r-new",
|
|
PathScore: 760,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
RendezvousLeaseReason: "stale_relay_replacement",
|
|
SelectedRelayEndpoint: "http://node-r-new:19124",
|
|
},
|
|
},
|
|
}
|
|
meshState := &syntheticMeshState{
|
|
Source: "control_plane",
|
|
ConfigVersion: "config-replacement",
|
|
RoutePathDecisions: report,
|
|
RouteGenerationTracker: newMeshRouteGenerationTracker(report, now),
|
|
}
|
|
generationReport := meshRouteGenerationReport(meshState, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, now.Add(time.Second))
|
|
if generationReport["active_decision_count"] != 1 ||
|
|
generationReport["applied_decision_count"] != 1 ||
|
|
generationReport["withdrawn_decision_count"] != 1 ||
|
|
generationReport["total_withdrawn_decision_count"] != 1 ||
|
|
generationReport["generation_changed"] != true {
|
|
t.Fatalf("unexpected first-apply route generation report: %+v", generationReport)
|
|
}
|
|
}
|
|
|
|
func TestProductionEnvelopeObservationSinkFromConfigCreatesBoundedSink(t *testing.T) {
|
|
sink := productionEnvelopeObservationSinkFromConfig(config.Config{
|
|
MeshProductionObservationSinkCapacity: 2,
|
|
})
|
|
if sink == nil {
|
|
t.Fatal("sink is nil")
|
|
}
|
|
if sink.Capacity() != 2 {
|
|
t.Fatalf("sink capacity = %d, want 2", sink.Capacity())
|
|
}
|
|
}
|
|
|
|
func TestProductionForwardingLogStateDistinguishesGateFromRuntime(t *testing.T) {
|
|
gateEnabled, runtimeEnabled := productionForwardingLogState(config.Config{
|
|
MeshProductionForwardingEnabled: true,
|
|
})
|
|
if !gateEnabled {
|
|
t.Fatal("gateEnabled = false, want true")
|
|
}
|
|
if !runtimeEnabled {
|
|
t.Fatal("runtimeEnabled = false, want true")
|
|
}
|
|
gateEnabled, runtimeEnabled = productionForwardingLogState(config.Config{})
|
|
if gateEnabled || runtimeEnabled {
|
|
t.Fatalf("default log state = gate:%t runtime:%t, want false/false", gateEnabled, runtimeEnabled)
|
|
}
|
|
}
|
|
|
|
func TestLogProductionObservationSinkMetricsToleratesNilState(t *testing.T) {
|
|
logProductionObservationSinkMetrics(nil)
|
|
logProductionObservationSinkMetrics(&syntheticMeshState{})
|
|
}
|
|
|
|
func TestLogProductionObservationSinkMetricsOnlyWhenChanged(t *testing.T) {
|
|
sink := mesh.NewProductionEnvelopeObservationSink(2)
|
|
meshState := &syntheticMeshState{ProductionObservationSink: sink}
|
|
var logs strings.Builder
|
|
previousOutput := log.Writer()
|
|
log.SetOutput(&logs)
|
|
defer log.SetOutput(previousOutput)
|
|
defer log.SetOutput(io.Discard)
|
|
|
|
logProductionObservationSinkMetrics(meshState)
|
|
firstLen := logs.Len()
|
|
if firstLen == 0 {
|
|
t.Fatal("first metrics log was not written")
|
|
}
|
|
logProductionObservationSinkMetrics(meshState)
|
|
if logs.Len() != firstLen {
|
|
t.Fatal("metrics log was written again without metric changes")
|
|
}
|
|
if err := sink.Observe(context.Background(), mesh.ProductionEnvelopeObservation{MessageID: "message-1"}); err != nil {
|
|
t.Fatalf("observe: %v", err)
|
|
}
|
|
logProductionObservationSinkMetrics(meshState)
|
|
if logs.Len() == firstLen {
|
|
t.Fatal("metrics log was not written after metric changes")
|
|
}
|
|
}
|
|
|
|
func TestProductionObservationSinkMetricsEqual(t *testing.T) {
|
|
a := mesh.ProductionEnvelopeObservationSinkMetrics{
|
|
Capacity: 2,
|
|
CurrentDepth: 1,
|
|
AcceptedTotal: 1,
|
|
DroppedOldest: 0,
|
|
}
|
|
if !productionObservationSinkMetricsEqual(a, a) {
|
|
t.Fatal("identical metrics were not equal")
|
|
}
|
|
b := a
|
|
b.DroppedOldest = 1
|
|
if productionObservationSinkMetricsEqual(a, b) {
|
|
t.Fatal("different metrics were equal")
|
|
}
|
|
}
|