1828 lines
71 KiB
Go
1828 lines
71 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/ed25519"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
agentauthority "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/config"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/vpnruntime"
|
|
)
|
|
|
|
func TestLoadSyntheticMeshConfigPrefersScopedFile(t *testing.T) {
|
|
route := mesh.SyntheticRoute{
|
|
RouteID: "route-file",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-b"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
|
RouteVersion: "route-v1",
|
|
PolicyVersion: "policy-v1",
|
|
PeerDirectoryVersion: "peers-v1",
|
|
}
|
|
payload, err := json.Marshal(mesh.ScopedSyntheticConfig{
|
|
SchemaVersion: "c17f.synthetic.v1",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
|
|
PeerDirectory: []mesh.PeerDirectoryEntry{
|
|
{NodeID: "node-b", RouteIDs: []string{"route-file"}, EndpointCount: 1},
|
|
},
|
|
RecoverySeeds: []mesh.PeerRecoverySeed{
|
|
{NodeID: "node-b", Endpoint: "http://127.0.0.1:19002", Transport: "direct_tcp_tls", Priority: 10},
|
|
},
|
|
Routes: []mesh.SyntheticRoute{route},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("marshal scoped config: %v", err)
|
|
}
|
|
path := filepath.Join(t.TempDir(), "mesh-scoped.json")
|
|
if err := os.WriteFile(path, payload, 0o600); err != nil {
|
|
t.Fatalf("write scoped config: %v", err)
|
|
}
|
|
|
|
loaded, err := loadSyntheticMeshConfig(context.Background(), config.Config{
|
|
MeshSyntheticConfigPath: path,
|
|
MeshPeerEndpointsJSON: `{"node-b":"http://debug.invalid"}`,
|
|
MeshSyntheticRoutesJSON: `[]`,
|
|
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil)
|
|
if err != nil {
|
|
t.Fatalf("load synthetic config: %v", err)
|
|
}
|
|
if loaded.Source != "scoped_config" {
|
|
t.Fatalf("source = %q, want scoped_config", loaded.Source)
|
|
}
|
|
if loaded.PeerEndpoints["node-b"] != "http://127.0.0.1:19002" {
|
|
t.Fatalf("peer endpoint = %q", loaded.PeerEndpoints["node-b"])
|
|
}
|
|
if len(loaded.Routes) != 1 || loaded.Routes[0].RouteID != "route-file" {
|
|
t.Fatalf("routes = %+v", loaded.Routes)
|
|
}
|
|
if len(loaded.PeerDirectory) != 1 || len(loaded.RecoverySeeds) != 1 {
|
|
t.Fatalf("peer runtime config missing: directory=%+v seeds=%+v", loaded.PeerDirectory, loaded.RecoverySeeds)
|
|
}
|
|
}
|
|
|
|
func TestSyntheticMeshConfigAuthorityHashUsesRawConfigPayload(t *testing.T) {
|
|
raw := json.RawMessage(`{
|
|
"enabled": true,
|
|
"schema_version": "c18z-test.synthetic.v1",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"authority_required": true,
|
|
"cluster_authority": {"schema_version":"rap.cluster_authority.v1"},
|
|
"authority_payload": {"ignored": true},
|
|
"authority_signature": {"ignored": true},
|
|
"config_version": "config-1",
|
|
"peer_endpoints": {},
|
|
"routes": [],
|
|
"production_forwarding": true,
|
|
"future_backend_field": {"must_remain_hash_visible": true}
|
|
}`)
|
|
var remote client.SyntheticMeshConfig
|
|
if err := json.Unmarshal(raw, &remote); err != nil {
|
|
t.Fatalf("unmarshal synthetic config: %v", err)
|
|
}
|
|
var unsigned map[string]json.RawMessage
|
|
if err := json.Unmarshal(raw, &unsigned); err != nil {
|
|
t.Fatalf("unmarshal unsigned map: %v", err)
|
|
}
|
|
delete(unsigned, "authority_payload")
|
|
delete(unsigned, "authority_signature")
|
|
unsignedRaw, err := json.Marshal(unsigned)
|
|
if err != nil {
|
|
t.Fatalf("marshal unsigned map: %v", err)
|
|
}
|
|
want, err := agentauthority.HashRaw(unsignedRaw)
|
|
if err != nil {
|
|
t.Fatalf("hash unsigned map: %v", err)
|
|
}
|
|
got, err := syntheticMeshConfigAuthorityHash(remote)
|
|
if err != nil {
|
|
t.Fatalf("hash synthetic config: %v", err)
|
|
}
|
|
if got != want {
|
|
t.Fatalf("hash = %s, want raw-preserving hash %s", got, want)
|
|
}
|
|
}
|
|
|
|
func TestRouteManagerDecisionsFromControlPlaneConsumesRemediationCommand(t *testing.T) {
|
|
now := time.Now().UTC()
|
|
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
|
|
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
|
|
CommandID: "cmd-1",
|
|
Action: "prefer_alternate_route",
|
|
ClusterID: "cluster-1",
|
|
ChannelID: "channel-1",
|
|
ServiceClass: "vpn_packets",
|
|
PrimaryRouteID: "route-primary",
|
|
ReplacementRouteID: "route-alternate",
|
|
Reason: "authorized_alternate_route_available",
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Minute),
|
|
}})
|
|
if len(decisions) != 1 {
|
|
t.Fatalf("decisions = %+v, want one remediation decision", decisions)
|
|
}
|
|
decision := decisions[0]
|
|
if decision.RouteID != "route-primary" ||
|
|
decision.ReplacementRouteID != "route-alternate" ||
|
|
decision.RebuildStatus != "applied" ||
|
|
decision.DecisionSource != "service_channel_remediation_command" ||
|
|
decision.RebuildRequestID != "cmd-1" {
|
|
t.Fatalf("unexpected remediation decision: %+v", decision)
|
|
}
|
|
}
|
|
|
|
func TestRouteManagerDecisionsFromControlPlaneConsumesRebuildRouteCommand(t *testing.T) {
|
|
now := time.Now().UTC()
|
|
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
|
|
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
|
|
CommandID: "cmd-rebuild",
|
|
Action: "rebuild_route",
|
|
ClusterID: "cluster-1",
|
|
ChannelID: "channel-1",
|
|
ServiceClass: "vpn_packets",
|
|
PrimaryRouteID: "route-primary",
|
|
Reason: "route_feedback_recommends_rebuild",
|
|
GuardStatus: "allowed",
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Minute),
|
|
}})
|
|
if len(decisions) != 1 {
|
|
t.Fatalf("decisions = %+v, want one rebuild remediation decision", decisions)
|
|
}
|
|
decision := decisions[0]
|
|
if decision.RouteID != "route-primary" ||
|
|
decision.RebuildStatus != "pending_degraded_fallback" ||
|
|
decision.DecisionSource != "service_channel_remediation_command" ||
|
|
decision.RebuildRequestID != "cmd-rebuild" {
|
|
t.Fatalf("unexpected rebuild remediation decision: %+v", decision)
|
|
}
|
|
}
|
|
|
|
func TestRouteManagerDecisionsFromControlPlaneRejectsGuardedRemediationCommand(t *testing.T) {
|
|
now := time.Now().UTC()
|
|
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
|
|
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
|
|
CommandID: "cmd-guarded",
|
|
Action: "prefer_alternate_route",
|
|
ClusterID: "cluster-1",
|
|
ChannelID: "channel-1",
|
|
ServiceClass: "vpn_packets",
|
|
PrimaryRouteID: "route-primary",
|
|
ReplacementRouteID: "route-outside-policy",
|
|
GuardStatus: "rejected",
|
|
GuardReason: "replacement_exit_outside_signed_pool_policy",
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Minute),
|
|
}})
|
|
if len(decisions) != 0 {
|
|
t.Fatalf("guarded remediation command must not reach route-manager: %+v", decisions)
|
|
}
|
|
}
|
|
|
|
func TestGatewayTransportForAssignmentUsesFabricWithoutBackendFallback(t *testing.T) {
|
|
inbox := vpnruntime.NewFabricPacketInbox(4)
|
|
transport := fabricGatewayTransportForAssignment(
|
|
context.Background(),
|
|
config.Config{},
|
|
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
|
|
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
|
|
&syntheticMeshState{
|
|
ProductionForwardTransport: noopProductionForwardTransport{},
|
|
VPNFabricInbox: inbox,
|
|
Routes: []mesh.SyntheticRoute{{
|
|
RouteID: "route-exit-entry",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "exit-1",
|
|
DestinationNodeID: "entry-1",
|
|
Hops: []string{"exit-1", "entry-1"},
|
|
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
|
|
ExpiresAt: time.Now().UTC().Add(time.Minute),
|
|
}},
|
|
},
|
|
nil,
|
|
)
|
|
if _, ok := transport.(*vpnruntime.FabricPacketTransport); !ok {
|
|
t.Fatalf("transport = %T, want fabric packet transport without backend fallback", transport)
|
|
}
|
|
}
|
|
|
|
func TestGatewayTransportForAssignmentUsesFabricSessionWhenEnabled(t *testing.T) {
|
|
server := httptest.NewServer(mesh.Server{
|
|
Local: mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "entry-1"},
|
|
FabricSessionEnabled: true,
|
|
}.Handler())
|
|
defer server.Close()
|
|
|
|
inbox := vpnruntime.NewFabricPacketInbox(4)
|
|
transport := fabricGatewayTransportForAssignment(
|
|
context.Background(),
|
|
config.Config{VPNFabricSessionTransportEnabled: true},
|
|
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
|
|
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
|
|
&syntheticMeshState{
|
|
ProductionForwardTransport: noopProductionForwardTransport{},
|
|
VPNFabricInbox: inbox,
|
|
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
|
PeerEndpoints: map[string]string{"entry-1": server.URL},
|
|
Routes: []mesh.SyntheticRoute{{
|
|
RouteID: "route-exit-entry",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "exit-1",
|
|
DestinationNodeID: "entry-1",
|
|
Hops: []string{"exit-1", "entry-1"},
|
|
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
|
|
ExpiresAt: time.Now().UTC().Add(time.Minute),
|
|
}},
|
|
},
|
|
nil,
|
|
)
|
|
sessionTransport, ok := transport.(*vpnruntime.FabricSessionPacketTransport)
|
|
if !ok {
|
|
t.Fatalf("transport = %T, want fabric session packet transport", transport)
|
|
}
|
|
if err := sessionTransport.SendGatewayPacketBatch(context.Background(), [][]byte{[]byte("packet")}); err != nil {
|
|
t.Fatalf("send fabric session packet: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestGatewayTransportForAssignmentFallsBackWhenFabricSessionUnavailable(t *testing.T) {
|
|
inbox := vpnruntime.NewFabricPacketInbox(4)
|
|
transport := fabricGatewayTransportForAssignment(
|
|
context.Background(),
|
|
config.Config{VPNFabricSessionTransportEnabled: true},
|
|
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
|
|
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
|
|
&syntheticMeshState{
|
|
ProductionForwardTransport: noopProductionForwardTransport{},
|
|
VPNFabricInbox: inbox,
|
|
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
|
PeerEndpoints: map[string]string{},
|
|
Routes: []mesh.SyntheticRoute{{
|
|
RouteID: "route-exit-entry",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "exit-1",
|
|
DestinationNodeID: "entry-1",
|
|
Hops: []string{"exit-1", "entry-1"},
|
|
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
|
|
ExpiresAt: time.Now().UTC().Add(time.Minute),
|
|
}},
|
|
},
|
|
nil,
|
|
)
|
|
if _, ok := transport.(*vpnruntime.FabricPacketTransport); !ok {
|
|
t.Fatalf("transport = %T, want fallback fabric packet transport", transport)
|
|
}
|
|
}
|
|
|
|
func TestLocalGatewayTransportForAssignmentUsesLocalInboxWithoutBackendFallback(t *testing.T) {
|
|
transport := localGatewayTransportForAssignment(
|
|
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
|
|
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
|
|
&syntheticMeshState{VPNFabricInbox: vpnruntime.NewFabricPacketInbox(4)},
|
|
nil,
|
|
)
|
|
if _, ok := transport.(*vpnruntime.LocalPacketTransport); !ok {
|
|
t.Fatalf("transport = %T, want local packet transport without backend fallback", transport)
|
|
}
|
|
}
|
|
|
|
func TestVPNAssignmentLeaseAutoAcquireAllowedRequiresSelectedExit(t *testing.T) {
|
|
assignment := client.NodeVPNAssignment{
|
|
VPNConnectionID: "vpn-1",
|
|
PlacementPolicy: json.RawMessage(`{
|
|
"entry_node_ids":["entry-1"],
|
|
"exit_node_id":"exit-1"
|
|
}`),
|
|
}
|
|
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
|
|
t.Fatal("entry node must not auto-acquire the gateway lease")
|
|
}
|
|
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-1", assignment) {
|
|
t.Fatal("selected exit node should auto-acquire the gateway lease")
|
|
}
|
|
}
|
|
|
|
func TestVPNAssignmentLeaseAutoAcquireAllowedSupportsExitPool(t *testing.T) {
|
|
assignment := client.NodeVPNAssignment{
|
|
VPNConnectionID: "vpn-1",
|
|
PlacementPolicy: json.RawMessage(`{"exit_node_ids":["exit-1","exit-2"]}`),
|
|
}
|
|
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-2", assignment) {
|
|
t.Fatal("node from exit pool should auto-acquire the gateway lease")
|
|
}
|
|
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
|
|
t.Fatal("node outside exit pool must not auto-acquire the gateway lease")
|
|
}
|
|
}
|
|
|
|
type noopProductionForwardTransport struct{}
|
|
|
|
func (noopProductionForwardTransport) SendProduction(context.Context, string, mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
|
|
return mesh.ProductionForwardResult{}, nil
|
|
}
|
|
|
|
func TestRouteManagerDecisionsFromControlPlaneKeepsExplicitRemediationCommand(t *testing.T) {
|
|
now := time.Now().UTC()
|
|
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{
|
|
RouteID: "route-primary",
|
|
ReplacementRouteID: "route-alternate",
|
|
RebuildRequestID: "feedback-rebuild",
|
|
RebuildStatus: "applied",
|
|
RebuildReason: "service_channel_feedback_rebuild_applied_to_alternate",
|
|
DecisionSource: "service_channel_feedback_replacement",
|
|
Generation: "gen-1",
|
|
}}}
|
|
decisions := routeManagerDecisionsFromControlPlane(report, []client.FabricServiceChannelRemediationCommand{{
|
|
CommandID: "cmd-1",
|
|
Action: "prefer_alternate_route",
|
|
PrimaryRouteID: "route-primary",
|
|
ReplacementRouteID: "route-alternate",
|
|
Reason: "authorized_alternate_route_available",
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Minute),
|
|
}})
|
|
if len(decisions) != 2 {
|
|
t.Fatalf("decisions = %+v, want feedback and explicit remediation command", decisions)
|
|
}
|
|
if decisions[1].DecisionSource != "service_channel_remediation_command" || decisions[1].RebuildRequestID != "cmd-1" {
|
|
t.Fatalf("remediation command was not kept as explicit route-manager input: %+v", decisions)
|
|
}
|
|
}
|
|
|
|
func TestRouteManagerDecisionsFromControlPlaneSkipsCommandAlreadyResolvedByPlanner(t *testing.T) {
|
|
now := time.Now().UTC()
|
|
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{
|
|
RouteID: "route-primary",
|
|
ReplacementRouteID: "route-planner",
|
|
RebuildRequestID: "cmd-rebuild",
|
|
RebuildStatus: "applied",
|
|
RebuildReason: "remediation_rebuild_applied_to_alternate",
|
|
DecisionSource: "service_channel_remediation_command",
|
|
Generation: "config-c18z77",
|
|
}}}
|
|
decisions := routeManagerDecisionsFromControlPlane(report, []client.FabricServiceChannelRemediationCommand{{
|
|
CommandID: "cmd-rebuild",
|
|
Action: "rebuild_route",
|
|
PrimaryRouteID: "route-primary",
|
|
Reason: "route_feedback_recommends_rebuild",
|
|
GuardStatus: "allowed",
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Minute),
|
|
}})
|
|
if len(decisions) != 1 {
|
|
t.Fatalf("decisions = %+v, want only planner-resolved decision", decisions)
|
|
}
|
|
if decisions[0].RebuildStatus != "applied" || decisions[0].ReplacementRouteID != "route-planner" {
|
|
t.Fatalf("unexpected planner decision: %+v", decisions[0])
|
|
}
|
|
}
|
|
|
|
func TestFabricServiceChannelAccessStatsReportsDataPlaneViolations(t *testing.T) {
|
|
stats := newFabricServiceChannelAccessStats()
|
|
stats.Observe(mesh.FabricServiceChannelAccessLogEntry{
|
|
Event: "fabric_service_channel_data_plane_violation",
|
|
ClusterID: "cluster-1",
|
|
ChannelID: "channel-1",
|
|
ResourceID: "vpn-1",
|
|
BackendRelayPolicy: "disabled",
|
|
ViolationStatus: "fabric_route_send_failed_backend_fallback_blocked",
|
|
ViolationReason: "mesh synthetic route not found",
|
|
OccurredAt: time.Unix(10, 0).UTC(),
|
|
})
|
|
report := stats.Report(time.Unix(20, 0).UTC())
|
|
if report["backend_fallback_blocked"] != int64(1) ||
|
|
report["fabric_route_send_failure"] != int64(1) ||
|
|
report["last_data_plane_violation_status"] != "fabric_route_send_failed_backend_fallback_blocked" ||
|
|
report["last_data_plane_violation_reason"] != "mesh synthetic route not found" {
|
|
t.Fatalf("unexpected violation report: %+v", report)
|
|
}
|
|
}
|
|
|
|
func TestVerifyEnrollmentBootstrapAcceptsSignedApproval(t *testing.T) {
|
|
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
|
if err != nil {
|
|
t.Fatalf("generate key: %v", err)
|
|
}
|
|
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
|
|
fingerprint := agentauthority.Fingerprint(publicKey)
|
|
payload := json.RawMessage(`{
|
|
"schema_version":"rap.cluster.node_approval.v1",
|
|
"cluster_id":"cluster-1",
|
|
"join_request_id":"join-request-1",
|
|
"node_id":"node-1",
|
|
"node_fingerprint":"fp-1",
|
|
"identity_status":"active",
|
|
"heartbeat_endpoint":"/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
|
|
"approved_by_user_id":"admin-1",
|
|
"issued_at":"2026-04-28T12:00:00Z",
|
|
"control_plane_only":true,
|
|
"production_forwarding":false
|
|
}`)
|
|
canonical, err := agentauthority.CanonicalJSON(payload)
|
|
if err != nil {
|
|
t.Fatalf("canonical json: %v", err)
|
|
}
|
|
bootstrap := client.NodeBootstrap{
|
|
NodeID: "node-1",
|
|
ClusterID: "cluster-1",
|
|
IdentityStatus: "active",
|
|
HeartbeatEndpoint: "/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
|
|
ClusterAuthority: &client.ClusterAuthorityDescriptor{
|
|
SchemaVersion: agentauthority.AuthoritySchemaVersion,
|
|
ClusterID: "cluster-1",
|
|
AuthorityState: "active",
|
|
KeyAlgorithm: agentauthority.AlgorithmEd25519,
|
|
PublicKey: publicKeyB64,
|
|
PublicKeyFingerprint: fingerprint,
|
|
},
|
|
AuthorityPayload: payload,
|
|
AuthoritySignature: &client.ClusterSignature{
|
|
SchemaVersion: agentauthority.SignatureSchemaVersion,
|
|
Algorithm: agentauthority.AlgorithmEd25519,
|
|
KeyFingerprint: fingerprint,
|
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
|
SignedAt: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
|
},
|
|
}
|
|
|
|
err = verifyEnrollmentBootstrap(bootstrap, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeFingerprint: "fp-1",
|
|
}, config.Config{ClusterAuthorityFingerprint: fingerprint})
|
|
if err != nil {
|
|
t.Fatalf("verify enrollment bootstrap: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestVerifyControlPlaneSyntheticMeshConfigAcceptsSignedServiceChannelFeedback(t *testing.T) {
|
|
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
|
if err != nil {
|
|
t.Fatalf("generate key: %v", err)
|
|
}
|
|
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
|
|
fingerprint := agentauthority.Fingerprint(publicKey)
|
|
now := time.Now().UTC()
|
|
remote := client.SyntheticMeshConfig{
|
|
Enabled: true,
|
|
SchemaVersion: "c17z18.synthetic.v1",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
AuthorityRequired: true,
|
|
ClusterAuthority: &client.ClusterAuthorityDescriptor{
|
|
SchemaVersion: agentauthority.AuthoritySchemaVersion,
|
|
ClusterID: "cluster-1",
|
|
AuthorityState: "authoritative",
|
|
KeyAlgorithm: agentauthority.AlgorithmEd25519,
|
|
PublicKey: publicKeyB64,
|
|
PublicKeyFingerprint: fingerprint,
|
|
},
|
|
ConfigVersion: "config-v1",
|
|
PeerDirectoryVersion: "config-v1",
|
|
PolicyVersion: "config-v1",
|
|
PeerEndpoints: map[string]string{},
|
|
PeerEndpointCandidates: map[string][]client.PeerEndpointCandidate{},
|
|
PeerDirectory: []client.PeerDirectoryEntry{},
|
|
RecoverySeeds: []client.PeerRecoverySeed{},
|
|
RendezvousLeases: []client.PeerRendezvousLease{},
|
|
RoutePathDecisions: &client.RoutePathDecisionReport{
|
|
SchemaVersion: "c17z18.route_path_decisions.v1",
|
|
DecisionMode: "control_plane_effective_path_from_relay_policy_and_service_channel_feedback",
|
|
Generation: "config-v1",
|
|
DecisionCount: 1,
|
|
ReplacementDecisionCount: 1,
|
|
RebuildRequestCount: 1,
|
|
RebuildAppliedCount: 1,
|
|
ControlPlaneOnly: true,
|
|
Decisions: []client.RoutePathDecision{{
|
|
DecisionID: "route-ab-path-node-a-service-channel-feedback",
|
|
RouteID: "route-ab",
|
|
ReplacementRouteID: "route-ac",
|
|
RebuildRequestID: "route-ab-node-a-config-v1-rebuild",
|
|
RebuildStatus: "applied",
|
|
RebuildReason: "service_channel_feedback_rebuild_applied_to_alternate",
|
|
RebuildAttempt: 2,
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
OriginalHops: []string{"node-a", "node-b"},
|
|
EffectiveHops: []string{"node-a", "node-c", "node-b"},
|
|
LocalRole: "source",
|
|
DecisionSource: "service_channel_feedback_replacement",
|
|
Generation: "config-v1",
|
|
PathScore: 1000,
|
|
ScoreReasons: []string{"service_channel_rebuild_applied"},
|
|
ControlPlaneOnly: true,
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
}},
|
|
},
|
|
ServiceChannelFeedback: &client.FabricServiceChannelFeedbackReport{
|
|
SchemaVersion: "c18n.fabric_service_channel_route_feedback_report.v1",
|
|
GeneratedAt: now,
|
|
FeedbackMaxAgeSeconds: 30,
|
|
ObservationCount: 1,
|
|
FencedRouteCount: 1,
|
|
Observations: []client.FabricServiceChannelFeedbackObservation{{
|
|
ClusterID: "cluster-1",
|
|
ReporterNodeID: "node-a",
|
|
RouteID: "route-ab",
|
|
ServiceClass: "vpn_packets",
|
|
FeedbackStatus: "fenced",
|
|
ScoreAdjustment: -1000,
|
|
Reasons: []string{"route_rebuild_recommended"},
|
|
ConsecutiveFailures: 2,
|
|
Payload: json.RawMessage(`{"route_rebuild_recommended":true}`),
|
|
ObservedAt: now,
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
}},
|
|
},
|
|
MeshListener: nil,
|
|
Routes: []client.SyntheticMeshRouteConfig{},
|
|
ProductionForwarding: false,
|
|
}
|
|
configHash, err := syntheticMeshConfigAuthorityHash(remote)
|
|
if err != nil {
|
|
t.Fatalf("config hash: %v", err)
|
|
}
|
|
payload, err := json.Marshal(controlPlaneMeshConfigAuthorityPayload{
|
|
SchemaVersion: "rap.cluster.mesh_config_snapshot.v1",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
ConfigVersion: "config-v1",
|
|
ConfigSHA256: configHash,
|
|
IssuedAt: now,
|
|
ExpiresAt: now.Add(time.Hour),
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("marshal payload: %v", err)
|
|
}
|
|
canonical, err := agentauthority.CanonicalJSON(payload)
|
|
if err != nil {
|
|
t.Fatalf("canonical json: %v", err)
|
|
}
|
|
remote.AuthorityPayload = payload
|
|
remote.AuthoritySignature = &client.ClusterSignature{
|
|
SchemaVersion: agentauthority.SignatureSchemaVersion,
|
|
Algorithm: agentauthority.AlgorithmEd25519,
|
|
KeyFingerprint: fingerprint,
|
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
|
SignedAt: now,
|
|
}
|
|
|
|
err = verifyControlPlaneSyntheticMeshConfig(remote, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
ClusterAuthorityPublicKey: publicKeyB64,
|
|
ClusterAuthorityFingerprint: fingerprint,
|
|
}, config.Config{})
|
|
if err != nil {
|
|
t.Fatalf("verify control-plane synthetic mesh config: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestVerifyEnrollmentBootstrapRejectsPinnedAuthorityMismatch(t *testing.T) {
|
|
bootstrap := client.NodeBootstrap{
|
|
NodeID: "node-1",
|
|
ClusterID: "cluster-1",
|
|
IdentityStatus: "active",
|
|
ClusterAuthority: &client.ClusterAuthorityDescriptor{
|
|
SchemaVersion: agentauthority.AuthoritySchemaVersion,
|
|
ClusterID: "cluster-1",
|
|
KeyAlgorithm: agentauthority.AlgorithmEd25519,
|
|
PublicKeyFingerprint: "rap-ca-ed25519-other",
|
|
},
|
|
}
|
|
err := verifyEnrollmentBootstrap(bootstrap, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeFingerprint: "fp-1",
|
|
}, config.Config{ClusterAuthorityFingerprint: "rap-ca-ed25519-expected"})
|
|
if err == nil {
|
|
t.Fatal("expected pinned authority mismatch")
|
|
}
|
|
}
|
|
|
|
func TestEnsureApprovedIdentityKeepsPollingWhenTimeoutDisabled(t *testing.T) {
|
|
var bootstrapPolls int
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
switch {
|
|
case r.URL.Path == "/node-agents/enroll":
|
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
|
"status": "pending",
|
|
"join_request": map[string]any{"id": "join-request-1"},
|
|
})
|
|
case r.URL.Path == "/node-agents/enrollments/join-request-1/bootstrap":
|
|
bootstrapPolls++
|
|
if bootstrapPolls >= 2 {
|
|
cancel()
|
|
}
|
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
|
"status": "pending",
|
|
"join_request": map[string]any{"id": "join-request-1"},
|
|
})
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
dir := t.TempDir()
|
|
identity, err := state.LoadOrCreate(dir, "cluster-1", "node-a")
|
|
if err != nil {
|
|
t.Fatalf("load identity: %v", err)
|
|
}
|
|
_, err = ensureApprovedIdentity(ctx, config.Config{
|
|
BackendURL: server.URL,
|
|
ClusterID: "cluster-1",
|
|
JoinToken: "join-token",
|
|
NodeName: "node-a",
|
|
StateDir: dir,
|
|
EnrollmentPollInterval: time.Millisecond,
|
|
EnrollmentPollTimeout: 0,
|
|
}, identity, client.New(server.URL))
|
|
if err == nil || !strings.Contains(err.Error(), "context canceled") {
|
|
t.Fatalf("ensureApprovedIdentity err = %v, want context canceled", err)
|
|
}
|
|
if bootstrapPolls < 2 {
|
|
t.Fatalf("bootstrap polls = %d, want at least 2", bootstrapPolls)
|
|
}
|
|
}
|
|
|
|
func TestSyntheticQualityScoreIsBounded(t *testing.T) {
|
|
cases := []struct {
|
|
latency int
|
|
min int
|
|
max int
|
|
}{
|
|
{latency: 0, min: 100, max: 100},
|
|
{latency: 50, min: 90, max: 100},
|
|
{latency: 10000, min: 1, max: 1},
|
|
}
|
|
for _, tc := range cases {
|
|
score := syntheticQualityScore(tc.latency)
|
|
if score < tc.min || score > tc.max {
|
|
t.Fatalf("syntheticQualityScore(%d) = %d, want [%d,%d]", tc.latency, score, tc.min, tc.max)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestProductionEnvelopeObservationSinkFromConfigIsDisabledByDefault(t *testing.T) {
|
|
sink := productionEnvelopeObservationSinkFromConfig(config.Config{})
|
|
if sink != nil {
|
|
t.Fatal("sink is enabled by default")
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshAdvertiseEndpoint: "https://node-a.example.test:443",
|
|
MeshAdvertiseTransport: "wss",
|
|
MeshConnectivityMode: "outbound_only",
|
|
MeshNATType: "symmetric",
|
|
MeshRegion: "eu",
|
|
MeshSyntheticRuntimeEnabled: true,
|
|
MeshProductionForwardingEnabled: true,
|
|
MeshFabricSessionEnabled: true,
|
|
VPNFabricSessionTransportEnabled: true,
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, &syntheticMeshState{
|
|
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
|
}, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
|
|
|
|
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["peer_endpoint"] != "https://node-a.example.test:443" ||
|
|
report["connectivity_mode"] != "outbound_only" ||
|
|
report["nat_type"] != "symmetric" ||
|
|
report["region"] != "eu" {
|
|
t.Fatalf("unexpected endpoint report: %+v", report)
|
|
}
|
|
if payload.Capabilities["mesh_dynamic_endpoint_reporting"] != true {
|
|
t.Fatalf("dynamic endpoint capability missing: %+v", payload.Capabilities)
|
|
}
|
|
if payload.Capabilities["fabric_session_websocket_endpoint"] != true || payload.Capabilities["fabric_data_session_v1"] != true {
|
|
t.Fatalf("fabric session capabilities missing: %+v", payload.Capabilities)
|
|
}
|
|
if report, ok := payload.Metadata["fabric_session_endpoint_report"].(map[string]any); !ok || report["path"] != "/mesh/v1/fabric/session/ws" {
|
|
t.Fatalf("fabric session endpoint report missing: %+v", payload.Metadata)
|
|
}
|
|
if payload.Capabilities["vpn_fabric_session_transport"] != true || payload.Capabilities["vpn_packet_batch_binary_frames"] != true {
|
|
t.Fatalf("vpn fabric session capabilities missing: %+v", payload.Capabilities)
|
|
}
|
|
if report, ok := payload.Metadata["vpn_fabric_session_transport_report"].(map[string]any); !ok ||
|
|
report["packet_payload"] != "rap.vpn_packet_batch.fabric.v1" ||
|
|
report["peer_sessions"] == nil {
|
|
t.Fatalf("vpn fabric session report missing: %+v", payload.Metadata)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsMeshListenerFailureWithoutKillingHeartbeat(t *testing.T) {
|
|
now := time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC)
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshConnectivityMode: "private_lan",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, &syntheticMeshState{
|
|
ListenerReport: meshListenerReport{
|
|
SchemaVersion: "c17z21.mesh_listener_report.v1",
|
|
ConfiguredListenAddr: ":19131",
|
|
ListenPortMode: "manual",
|
|
Status: "listen_failed",
|
|
InboundReachability: "unavailable",
|
|
ControlPlaneReachable: true,
|
|
OneWayConnectivity: true,
|
|
FailureReason: "bind_failed",
|
|
FailureError: "listen tcp :19131: bind: address already in use",
|
|
PortConflict: true,
|
|
},
|
|
}, now)
|
|
|
|
report, ok := payload.Metadata["mesh_listener_report"].(meshListenerReport)
|
|
if !ok {
|
|
t.Fatalf("mesh listener report missing: %+v", payload.Metadata)
|
|
}
|
|
if payload.HealthStatus != "warning" || report.Status != "listen_failed" || !report.PortConflict {
|
|
t.Fatalf("unexpected listener health report: status=%s report=%+v", payload.HealthStatus, report)
|
|
}
|
|
if payload.Capabilities["mesh_listener_diagnostics"] != true || payload.Capabilities["mesh_one_way_connectivity"] != true {
|
|
t.Fatalf("listener capabilities missing: %+v", payload.Capabilities)
|
|
}
|
|
}
|
|
|
|
func TestAdvertisedEndpointCandidatesPreferManualEndpoints(t *testing.T) {
|
|
now := time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC)
|
|
candidates, err := advertisedEndpointCandidates(config.Config{
|
|
MeshAdvertiseEndpointsJSON: `[{"endpoint_id":"node-a-json","node_id":"node-a","transport":"direct_http","address":"http://10.10.10.10:19131","priority":12,"connectivity_mode":"private_lan","reachability":"private"}]`,
|
|
MeshAdvertiseEndpoint: "http://203.0.113.10:19131",
|
|
MeshAdvertiseTransport: "direct_http",
|
|
MeshConnectivityMode: "direct",
|
|
MeshNATType: "port_restricted",
|
|
MeshRegion: "edge",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, nil, now)
|
|
if err != nil {
|
|
t.Fatalf("advertised endpoint candidates failed: %v", err)
|
|
}
|
|
if len(candidates) != 2 {
|
|
t.Fatalf("expected two manual candidates, got %d: %+v", len(candidates), candidates)
|
|
}
|
|
if candidates[0].Address != "http://203.0.113.10:19131" || candidates[0].Priority != 10 {
|
|
t.Fatalf("explicit advertise endpoint must win: %+v", candidates)
|
|
}
|
|
if candidates[1].Address != "http://10.10.10.10:19131" || candidates[1].Priority != 12 {
|
|
t.Fatalf("json candidate order mismatch: %+v", candidates)
|
|
}
|
|
}
|
|
|
|
func TestNetworkInterfaceClassificationSkipsContainerNoise(t *testing.T) {
|
|
tests := map[string]string{
|
|
"ens160": "physical",
|
|
"wg0": "vpn",
|
|
"tailscale0": "vpn",
|
|
"docker0": "container",
|
|
"br-a1b2c3d4": "container",
|
|
"vethabc123": "container",
|
|
}
|
|
for name, want := range tests {
|
|
if got := classifyNetworkInterface(name); got != want {
|
|
t.Fatalf("classifyNetworkInterface(%q)=%q, want %q", name, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadTreatsOutboundOnlyListenerFailureAsOneWayConnectivity(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshSyntheticRuntimeEnabled: true,
|
|
MeshConnectivityMode: "outbound_only",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, &syntheticMeshState{
|
|
ListenerReport: meshListenerReport{
|
|
SchemaVersion: "c17z21.mesh_listener_report.v1",
|
|
ConfiguredListenAddr: ":19131",
|
|
ListenPortMode: "manual",
|
|
Status: "listen_failed",
|
|
InboundReachability: "unavailable",
|
|
ControlPlaneReachable: true,
|
|
OneWayConnectivity: true,
|
|
FailureReason: "bind_failed",
|
|
},
|
|
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
|
|
|
|
if payload.HealthStatus != "healthy" {
|
|
t.Fatalf("HealthStatus = %q, want healthy for outbound-only listener failure", payload.HealthStatus)
|
|
}
|
|
report, ok := payload.Metadata["mesh_outbound_session_report"].(meshOutboundSessionReport)
|
|
if !ok {
|
|
t.Fatalf("mesh outbound session report missing: %+v", payload.Metadata)
|
|
}
|
|
if report.Status != "ready" || !report.UsableForInboundControl || report.ListenerStatus != "listen_failed" {
|
|
t.Fatalf("unexpected outbound session report: %+v", report)
|
|
}
|
|
if payload.Capabilities["mesh_outbound_control_session"] != true ||
|
|
payload.Capabilities["mesh_reverse_control_channel_contract"] != true {
|
|
t.Fatalf("outbound session capabilities missing: %+v", payload.Capabilities)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsMeshConfigLoadFailureWithoutDroppingPresence(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshSyntheticRuntimeEnabled: true,
|
|
MeshConnectivityMode: "private_lan",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, &syntheticMeshState{
|
|
ConfigLoadError: "control-plane synthetic mesh config unavailable",
|
|
ListenerReport: meshListenerReport{
|
|
SchemaVersion: "c17z21.mesh_listener_report.v1",
|
|
ConfiguredListenAddr: ":19131",
|
|
ListenPortMode: "manual",
|
|
Status: "listening",
|
|
InboundReachability: "private",
|
|
ControlPlaneReachable: true,
|
|
},
|
|
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
|
|
|
|
report, ok := payload.Metadata["mesh_outbound_session_report"].(meshOutboundSessionReport)
|
|
if !ok {
|
|
t.Fatalf("mesh outbound session report missing: %+v", payload.Metadata)
|
|
}
|
|
if payload.HealthStatus != "warning" || report.Status != "degraded" || report.ConfigLoadError == "" {
|
|
t.Fatalf("unexpected config-load diagnostic heartbeat: health=%s report=%+v", payload.HealthStatus, report)
|
|
}
|
|
}
|
|
|
|
func TestOutboundSessionReportTreatsListeningPrivateLANAsUsable(t *testing.T) {
|
|
report := meshOutboundSessionReportFromState(config.Config{
|
|
BackendURL: "http://control/api/v1",
|
|
MeshConnectivityMode: "private_lan",
|
|
MeshSyntheticRuntimeEnabled: true,
|
|
}, &syntheticMeshState{
|
|
ListenerReport: meshListenerReport{
|
|
SchemaVersion: "c17z21.mesh_listener_report.v1",
|
|
Status: "listening",
|
|
InboundReachability: reachabilityFromConnectivityMode("private_lan"),
|
|
},
|
|
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
|
|
|
|
if !report.UsableForInboundControl {
|
|
t.Fatalf("listening private LAN listener must be usable: %+v", report)
|
|
}
|
|
if reachabilityFromConnectivityMode("private_lan") != "private" {
|
|
t.Fatalf("private_lan reachability mismatch")
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsMultipleMeshEndpoints(t *testing.T) {
|
|
payload := heartbeatPayload(config.Config{
|
|
MeshAdvertiseEndpointsJSON: `[{
|
|
"endpoint_id": "node-a-lan",
|
|
"address": "http://10.24.10.10:19001",
|
|
"transport": "direct_tcp_tls",
|
|
"reachability": "private",
|
|
"connectivity_mode": "direct",
|
|
"nat_type": "none",
|
|
"region": "corp-eu",
|
|
"priority": 1,
|
|
"policy_tags": ["corp-lan", "same-site"]
|
|
},{
|
|
"endpoint_id": "node-a-public",
|
|
"address": "https://node-a.example.test:443",
|
|
"transport": "direct_tcp_tls",
|
|
"reachability": "public",
|
|
"connectivity_mode": "direct",
|
|
"nat_type": "none",
|
|
"priority": 10
|
|
}]`,
|
|
MeshRegion: "corp-eu",
|
|
}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, nil, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
|
|
|
|
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
|
|
}
|
|
candidates, ok := report["endpoint_candidates"].([]mesh.PeerEndpointCandidate)
|
|
if !ok || len(candidates) != 2 {
|
|
t.Fatalf("unexpected endpoint candidates: %#v", report["endpoint_candidates"])
|
|
}
|
|
if candidates[0].EndpointID != "node-a-lan" || candidates[0].Reachability != "private" {
|
|
t.Fatalf("internal endpoint candidate not preserved: %+v", candidates[0])
|
|
}
|
|
if report["peer_endpoint"] != "http://10.24.10.10:19001" {
|
|
t.Fatalf("default peer endpoint = %v", report["peer_endpoint"])
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesPeerRecoveryReportWithoutAdvertisedEndpoint(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{
|
|
"node-b": "http://node-b:19001",
|
|
"node-c": "http://node-c:19001",
|
|
"node-d": "http://node-d:19001",
|
|
},
|
|
WarmPeerLimit: 3,
|
|
Now: now,
|
|
})
|
|
peerConnections := mesh.NewPeerConnectionTracker(peerCache.Snapshot(), now)
|
|
peerConnections.RecordSuccess("node-b", 20, now)
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: peerCache,
|
|
PeerConnections: peerConnections,
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, meshState, now)
|
|
|
|
report, ok := payload.Metadata["mesh_peer_recovery_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh peer recovery report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["schema_version"] != "c17z9.mesh_peer_recovery_report.v1" ||
|
|
report["mode"] != mesh.PeerRecoveryModeRecovery ||
|
|
report["ready_peer_count"] != 1 ||
|
|
report["target_ready_peers"] != mesh.DefaultStablePeerTarget ||
|
|
report["deficit"] != 2 {
|
|
t.Fatalf("unexpected recovery report: %+v", report)
|
|
}
|
|
if payload.Capabilities["mesh_peer_recovery_planning"] != true {
|
|
t.Fatalf("peer recovery capability missing: %+v", payload.Capabilities)
|
|
}
|
|
intentReport, ok := payload.Metadata["mesh_peer_connection_intent_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("mesh peer connection intent report missing: %+v", payload.Metadata)
|
|
}
|
|
if intentReport["schema_version"] != "c17z12.mesh_peer_connection_intent_report.v1" ||
|
|
intentReport["intent_count"] != 3 ||
|
|
intentReport["recover_count"] != 2 {
|
|
t.Fatalf("unexpected connection intent report: %+v", intentReport)
|
|
}
|
|
if payload.Capabilities["mesh_peer_connection_intent_planning"] != true {
|
|
t.Fatalf("connection intent capability missing: %+v", payload.Capabilities)
|
|
}
|
|
if _, ok := payload.Metadata["mesh_endpoint_report"]; ok {
|
|
t.Fatalf("endpoint report should not be emitted without advertised endpoint: %+v", payload.Metadata)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadIncludesRendezvousLeaseAdmissionReport(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
leases := []mesh.PeerRendezvousLease{
|
|
{
|
|
LeaseID: "lease-node-b-via-node-a",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-a",
|
|
RelayEndpoint: "http://node-a:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ab"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-time.Minute),
|
|
ExpiresAt: now.Add(5 * time.Minute),
|
|
},
|
|
{
|
|
LeaseID: "lease-node-a-via-node-r",
|
|
PeerNodeID: "node-a",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ra"},
|
|
Priority: 20,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-2 * time.Minute),
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
},
|
|
{
|
|
LeaseID: "lease-node-c-via-node-r-expired",
|
|
PeerNodeID: "node-c",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-cr"},
|
|
Priority: 30,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-10 * time.Minute),
|
|
ExpiresAt: now.Add(-time.Second),
|
|
},
|
|
}
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
|
|
RendezvousLeases: leases,
|
|
WarmPeerLimit: 3,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
|
|
tracker.RecordRelayReady(mesh.PeerCacheEntry{
|
|
NodeID: "node-b",
|
|
Endpoint: "http://node-a:19001",
|
|
Warm: true,
|
|
RendezvousLeaseID: "lease-node-b-via-node-a",
|
|
RelayNodeID: "node-a",
|
|
RelayEndpoint: "http://node-a:19001",
|
|
RelayControl: true,
|
|
BestTransport: "relay_control",
|
|
BestReachability: "relay",
|
|
BestConnectivity: "relay_required",
|
|
BestCandidateScore: 500,
|
|
}, 12, now.Add(time.Second))
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
RendezvousLeases: leases,
|
|
PeerConnections: tracker,
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, identity, meshState, now)
|
|
|
|
report, ok := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("rendezvous lease report missing: %+v", payload.Metadata)
|
|
}
|
|
if report["schema_version"] != meshRendezvousLeaseReportSchema ||
|
|
report["lease_count"] != 3 ||
|
|
report["active_count"] != 2 ||
|
|
report["expired_count"] != 1 ||
|
|
report["admitted_as_relay_count"] != 1 ||
|
|
report["admitted_as_peer_count"] != 1 ||
|
|
report["renewal_needed_count"] != 1 ||
|
|
report["relay_control_ready_count"] != 1 {
|
|
t.Fatalf("unexpected lease report: %+v", report)
|
|
}
|
|
if report["control_plane_only"] != true ||
|
|
report["relay_payload_forwarding"] != false ||
|
|
report["production_payload_forwarding"] != false {
|
|
t.Fatalf("payload boundary flags not preserved: %+v", report)
|
|
}
|
|
leaseDetails, ok := report["leases"].([]map[string]any)
|
|
if !ok || len(leaseDetails) != 3 {
|
|
t.Fatalf("unexpected lease details: %#v", report["leases"])
|
|
}
|
|
if leaseDetails[0]["role"] != "relay" ||
|
|
leaseDetails[0]["status"] != "admitted" ||
|
|
leaseDetails[0]["admitted"] != true ||
|
|
leaseDetails[0]["relay_ready"] != true {
|
|
t.Fatalf("relay admission detail missing: %+v", leaseDetails[0])
|
|
}
|
|
if leaseDetails[1]["role"] != "peer" ||
|
|
leaseDetails[1]["status"] != "renewal_needed" ||
|
|
leaseDetails[1]["renewal_needed"] != true {
|
|
t.Fatalf("peer renewal detail missing: %+v", leaseDetails[1])
|
|
}
|
|
if payload.Capabilities[meshRendezvousLeaseTelemetryCapability] != true {
|
|
t.Fatalf("lease telemetry capability missing: %+v", payload.Capabilities)
|
|
}
|
|
}
|
|
|
|
func TestHeartbeatPayloadReportsStaleRelayWithdrawalTelemetry(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-r"}
|
|
lease := mesh.PeerRendezvousLease{
|
|
LeaseID: "lease-node-b-via-node-r",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-rb"},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-time.Minute),
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
}
|
|
altLease := lease
|
|
altLease.LeaseID = "lease-node-b-via-node-r2"
|
|
altLease.RelayNodeID = "node-r2"
|
|
altLease.RelayEndpoint = "http://node-r2:19001"
|
|
altLease.Priority = 20
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
|
|
peer := mesh.PeerCacheEntry{
|
|
NodeID: "node-b",
|
|
Endpoint: "http://node-r:19001",
|
|
Warm: true,
|
|
RendezvousLeaseID: "lease-node-b-via-node-r",
|
|
RelayNodeID: "node-r",
|
|
RelayEndpoint: "http://node-r:19001",
|
|
RelayControl: true,
|
|
}
|
|
tracker.RecordRelayReady(peer, 10, now.Add(time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(2*time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(3*time.Second))
|
|
tracker.RecordFailure("node-b", "relay health failed", now.Add(4*time.Second))
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
|
|
PeerConnections: tracker,
|
|
Source: "control_plane",
|
|
}
|
|
|
|
payload := heartbeatPayload(config.Config{}, identity, meshState, now.Add(5*time.Second))
|
|
|
|
report := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
|
|
if report["stale_relay_count"] != 1 ||
|
|
report["withdrawal_needed_count"] != 1 ||
|
|
report["reselection_needed_count"] != 0 ||
|
|
report["refresh_needed_count"] != 1 {
|
|
t.Fatalf("unexpected stale relay report: %+v", report)
|
|
}
|
|
leaseDetails := report["leases"].([]map[string]any)
|
|
if leaseDetails[0]["stale_relay"] != true ||
|
|
leaseDetails[0]["withdrawal_needed"] != true ||
|
|
leaseDetails[0]["connection_state"] != mesh.PeerConnectionBackoff {
|
|
t.Fatalf("stale relay detail missing: %+v", leaseDetails[0])
|
|
}
|
|
if leaseDetails[1]["stale_relay"] != false ||
|
|
leaseDetails[1]["withdrawal_needed"] != false {
|
|
t.Fatalf("alternate relay lease should not inherit stale state: %+v", leaseDetails[1])
|
|
}
|
|
}
|
|
|
|
func TestRefreshRendezvousLeasesIfNeededReloadsControlPlaneConfig(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
oldLease := mesh.PeerRendezvousLease{
|
|
LeaseID: "lease-node-b-via-node-r-old",
|
|
PeerNodeID: "node-b",
|
|
RelayNodeID: "node-r-old",
|
|
RelayEndpoint: "http://node-r-old:19001",
|
|
Transport: "relay_control",
|
|
ConnectivityMode: "relay_required",
|
|
RouteIDs: []string{"route-ab"},
|
|
Priority: 10,
|
|
ControlPlaneOnly: true,
|
|
IssuedAt: now.Add(-2 * time.Minute),
|
|
ExpiresAt: now.Add(30 * time.Second),
|
|
}
|
|
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
|
|
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
tracker := mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now)
|
|
oldPathDecisions := &client.RoutePathDecisionReport{
|
|
SchemaVersion: "c17z18.route_path_decisions.v1",
|
|
DecisionMode: "control_plane_effective_path_from_relay_policy",
|
|
Generation: "old-config",
|
|
DecisionCount: 1,
|
|
ReplacementDecisionCount: 0,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
Decisions: []client.RoutePathDecision{
|
|
{
|
|
DecisionID: "route-ab-path-node-a-via-node-r-old",
|
|
RouteID: "route-ab",
|
|
ClusterID: "cluster-1",
|
|
LocalNodeID: "node-a",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
OriginalHops: []string{"node-a", "node-r-old", "node-b"},
|
|
EffectiveHops: []string{"node-a", "node-r-old", "node-b"},
|
|
NextHopID: "node-r-old",
|
|
LocalRole: "entry",
|
|
DecisionSource: "route_intent",
|
|
Generation: "old-config",
|
|
PathScore: 1000,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
}
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: oldCache,
|
|
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
|
|
RoutePathDecisions: oldPathDecisions,
|
|
RouteGenerationTracker: newMeshRouteGenerationTracker(
|
|
oldPathDecisions,
|
|
now.Add(-time.Minute),
|
|
),
|
|
PeerConnections: tracker,
|
|
PeerConnectionManager: mesh.NewPeerConnectionManager(mesh.PeerConnectionManagerConfig{Local: local, PeerCache: oldCache, Tracker: tracker, RendezvousLeases: []mesh.PeerRendezvousLease{oldLease}}),
|
|
Source: "control_plane",
|
|
ConfigVersion: "old-config",
|
|
}
|
|
|
|
requests := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
|
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
requests++
|
|
response := map[string]any{
|
|
"synthetic_mesh_config": map[string]any{
|
|
"enabled": true,
|
|
"schema_version": "c17z18.synthetic.v1",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"config_version": "new-config",
|
|
"peer_directory_version": "new-config",
|
|
"policy_version": "new-config",
|
|
"peer_endpoints": map[string]string{"node-r-new": "http://node-r-new:19001"},
|
|
"peer_endpoint_candidates": map[string]any{
|
|
"node-b": []map[string]any{
|
|
{
|
|
"endpoint_id": "node-b-outbound-only",
|
|
"node_id": "node-b",
|
|
"transport": "outbound_reverse",
|
|
"address": "http://node-b:19002",
|
|
"address_family": "ipv4",
|
|
"reachability": "outbound_only",
|
|
"connectivity_mode": "outbound_only",
|
|
"nat_type": "symmetric",
|
|
"region": "test",
|
|
"priority": 5,
|
|
},
|
|
},
|
|
},
|
|
"peer_directory": []map[string]any{
|
|
{"node_id": "node-b", "route_ids": []string{"route-ab"}, "endpoint_count": 0, "candidate_count": 1, "connectivity_modes": []string{"relay_required"}, "recovery_seed": false},
|
|
},
|
|
"rendezvous_leases": []map[string]any{
|
|
{
|
|
"lease_id": "lease-node-b-via-node-r-new",
|
|
"peer_node_id": "node-b",
|
|
"relay_node_id": "node-r-new",
|
|
"relay_endpoint": "http://node-r-new:19001",
|
|
"transport": "relay_control",
|
|
"connectivity_mode": "relay_required",
|
|
"route_ids": []string{"route-ab"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"priority": 5,
|
|
"control_plane_only": true,
|
|
"issued_at": now,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"reason": "refresh_test",
|
|
},
|
|
},
|
|
"route_path_decisions": map[string]any{
|
|
"schema_version": "c17z18.route_path_decisions.v1",
|
|
"decision_mode": "control_plane_effective_path_from_relay_policy",
|
|
"generation": "new-config",
|
|
"decision_count": 1,
|
|
"replacement_decision_count": 1,
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"decisions": []map[string]any{
|
|
{
|
|
"decision_id": "route-ab-path-node-a-via-node-r-new",
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
|
|
"next_hop_id": "node-r-new",
|
|
"local_role": "entry",
|
|
"selected_relay_id": "node-r-new",
|
|
"selected_relay_endpoint": "http://node-r-new:19001",
|
|
"stale_relay_node_id": "node-r-old",
|
|
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
|
|
"rendezvous_lease_reason": "stale_relay_replacement",
|
|
"decision_source": "stale_relay_replacement",
|
|
"generation": "new-config",
|
|
"path_score": 900,
|
|
"score_reasons": []string{"relay_replacement_policy"},
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
},
|
|
"routes": []map[string]any{
|
|
{
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"max_ttl": 6,
|
|
"max_hops": 6,
|
|
},
|
|
},
|
|
"production_forwarding": false,
|
|
},
|
|
}
|
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
|
t.Fatalf("write response: %v", err)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
err := refreshRendezvousLeasesIfNeeded(context.Background(), config.Config{}, identity, client.New(server.URL), meshState, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh leases: %v", err)
|
|
}
|
|
if requests != 1 {
|
|
t.Fatalf("requests = %d, want 1", requests)
|
|
}
|
|
if meshState.ConfigVersion != "new-config" ||
|
|
len(meshState.RendezvousLeases) != 1 ||
|
|
meshState.RendezvousLeases[0].RelayNodeID != "node-r-new" {
|
|
t.Fatalf("mesh state was not refreshed: version=%s leases=%+v", meshState.ConfigVersion, meshState.RendezvousLeases)
|
|
}
|
|
if meshState.RoutePathDecisions == nil ||
|
|
meshState.RoutePathDecisions.SchemaVersion != "c17z18.route_path_decisions.v1" ||
|
|
meshState.RoutePathDecisions.ReplacementDecisionCount != 1 ||
|
|
len(meshState.RoutePathDecisions.Decisions) != 1 ||
|
|
meshState.RoutePathDecisions.Decisions[0].NextHopID != "node-r-new" {
|
|
t.Fatalf("route path decisions were not refreshed: %+v", meshState.RoutePathDecisions)
|
|
}
|
|
if len(meshState.Routes) != 1 ||
|
|
!sameStringSlice(meshState.Routes[0].Hops, []string{"node-a", "node-r-old", "node-r-new", "node-b"}) {
|
|
t.Fatalf("base routes should remain original for non-route-health runtime: %+v", meshState.Routes)
|
|
}
|
|
if len(meshState.RouteHealthRoutes) != 1 ||
|
|
!sameStringSlice(meshState.RouteHealthRoutes[0].Hops, []string{"node-a", "node-r-new", "node-b"}) ||
|
|
meshState.RouteHealthRoutes[0].RouteVersion != "new-config" {
|
|
t.Fatalf("route health routes were not generated from path decisions: %+v", meshState.RouteHealthRoutes)
|
|
}
|
|
if meshState.LeaseRefreshAttempts != 1 || meshState.LeaseRefreshSuccesses != 1 || meshState.LeaseRefreshFailures != 0 {
|
|
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.LeaseRefreshAttempts, meshState.LeaseRefreshSuccesses, meshState.LeaseRefreshFailures)
|
|
}
|
|
if meshState.LastLeaseRefresh == nil ||
|
|
meshState.LastLeaseRefresh.Status != "succeeded" ||
|
|
meshState.LastLeaseRefresh.Reason != "renewal_needed" {
|
|
t.Fatalf("unexpected refresh state: %+v", meshState.LastLeaseRefresh)
|
|
}
|
|
if meshState.LastConfigRefreshAt.IsZero() {
|
|
t.Fatalf("last config refresh time was not updated")
|
|
}
|
|
recoveryPlan := peerRecoveryPlan(meshState, now.Add(time.Second))
|
|
intentPlan := peerConnectionIntentPlan(meshState, recoveryPlan, now.Add(time.Second))
|
|
var peerIntent mesh.PeerConnectionIntent
|
|
for _, intent := range intentPlan.Intents {
|
|
if intent.NodeID == "node-b" {
|
|
peerIntent = intent
|
|
break
|
|
}
|
|
}
|
|
if intentPlan.RendezvousResolvedCount != 1 ||
|
|
peerIntent.RendezvousLeaseID != "lease-node-b-via-node-r-new" ||
|
|
peerIntent.RelayNodeID != "node-r-new" {
|
|
t.Fatalf("refreshed lease was not selected: %+v", intentPlan)
|
|
}
|
|
pathReport := meshRoutePathDecisionReport(meshState, identity, now.Add(time.Second))
|
|
if pathReport["schema_version"] != meshRoutePathDecisionReportSchema ||
|
|
pathReport["replacement_decision_count"] != 1 ||
|
|
pathReport["next_hop_available_count"] != 1 ||
|
|
pathReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route path decision report: %+v", pathReport)
|
|
}
|
|
generationReport := meshRouteGenerationReport(meshState, identity, now.Add(time.Second))
|
|
if generationReport["schema_version"] != meshRouteGenerationReportSchema ||
|
|
generationReport["active_decision_count"] != 1 ||
|
|
generationReport["applied_decision_count"] != 1 ||
|
|
generationReport["withdrawn_decision_count"] != 1 ||
|
|
generationReport["generation_changed"] != true ||
|
|
generationReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route generation report: %+v", generationReport)
|
|
}
|
|
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
|
|
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
|
|
routeHealthConfigReport["route_path_decision_applied_count"] != 1 ||
|
|
routeHealthConfigReport["replacement_route_health_route_count"] != 1 ||
|
|
routeHealthConfigReport["synthetic_route_health_route_path_runtime"] != true ||
|
|
routeHealthConfigReport["test_service_route_config_changed"] != false ||
|
|
routeHealthConfigReport["config_refresh_interval_ms"] != int64(meshSyntheticConfigRefreshInterval/time.Millisecond) ||
|
|
routeHealthConfigReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackRefreshAppliesReplacementConfig(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 0, 0, 0, time.UTC)
|
|
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
|
|
oldRoute := mesh.SyntheticRoute{
|
|
RouteID: "route-ab",
|
|
ClusterID: "cluster-1",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-r-old", "node-b"},
|
|
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
|
|
ExpiresAt: now.Add(10 * time.Minute),
|
|
RouteVersion: "old-config",
|
|
}
|
|
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{"node-r-old": "http://node-r-old:19001"},
|
|
Routes: []mesh.SyntheticRoute{oldRoute},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: oldCache,
|
|
Routes: []mesh.SyntheticRoute{oldRoute},
|
|
RouteHealthRoutes: []mesh.SyntheticRoute{oldRoute},
|
|
Source: "control_plane",
|
|
ConfigVersion: "old-config",
|
|
PeerConnections: mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now),
|
|
}
|
|
|
|
requests := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
|
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
requests++
|
|
response := map[string]any{
|
|
"synthetic_mesh_config": map[string]any{
|
|
"enabled": true,
|
|
"schema_version": "c17z20.synthetic.v1",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"config_version": "new-config",
|
|
"peer_directory_version": "new-config",
|
|
"policy_version": "new-config",
|
|
"peer_endpoints": map[string]string{"node-r-new": "http://node-r-new:19001"},
|
|
"rendezvous_leases": []map[string]any{
|
|
{
|
|
"lease_id": "lease-node-b-via-node-r-new",
|
|
"peer_node_id": "node-b",
|
|
"relay_node_id": "node-r-new",
|
|
"relay_endpoint": "http://node-r-new:19001",
|
|
"transport": "relay_control",
|
|
"connectivity_mode": "relay_required",
|
|
"route_ids": []string{"route-ab"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"priority": 5,
|
|
"control_plane_only": true,
|
|
"issued_at": now,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"reason": "stale_relay_replacement",
|
|
},
|
|
},
|
|
"route_path_decisions": map[string]any{
|
|
"schema_version": "c17z20.route_path_decisions.v1",
|
|
"decision_mode": "control_plane_effective_path_from_relay_policy",
|
|
"generation": "new-config",
|
|
"decision_count": 1,
|
|
"replacement_decision_count": 1,
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"decisions": []map[string]any{
|
|
{
|
|
"decision_id": "route-ab-path-node-a-via-node-r-new",
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"local_node_id": "node-a",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
|
|
"next_hop_id": "node-r-new",
|
|
"local_role": "entry",
|
|
"selected_relay_id": "node-r-new",
|
|
"selected_relay_endpoint": "http://node-r-new:19001",
|
|
"stale_relay_node_id": "node-r-old",
|
|
"rendezvous_peer_node_id": "node-b",
|
|
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
|
|
"rendezvous_lease_reason": "stale_relay_replacement",
|
|
"decision_source": "stale_relay_replacement",
|
|
"generation": "new-config",
|
|
"path_score": 900,
|
|
"score_reasons": []string{"route_health_feedback"},
|
|
"control_plane_only": true,
|
|
"production_forwarding": false,
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
},
|
|
},
|
|
},
|
|
"routes": []map[string]any{
|
|
{
|
|
"route_id": "route-ab",
|
|
"cluster_id": "cluster-1",
|
|
"source_node_id": "node-a",
|
|
"destination_node_id": "node-b",
|
|
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
|
|
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
|
|
"expires_at": now.Add(10 * time.Minute),
|
|
"max_ttl": 6,
|
|
"max_hops": 6,
|
|
},
|
|
},
|
|
"production_forwarding": false,
|
|
},
|
|
}
|
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
|
t.Fatalf("write response: %v", err)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
trigger := meshRouteHealthFeedbackTrigger{
|
|
Reason: "synthetic_route_health_drift",
|
|
RouteID: "route-ab",
|
|
PeerNodeID: "node-b",
|
|
SelectedRelayID: "node-r-old",
|
|
LinkStatus: "reachable",
|
|
DriftDetected: true,
|
|
ObservedAt: now,
|
|
}
|
|
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, identity, client.New(server.URL), meshState, trigger, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh route health feedback: %v", err)
|
|
}
|
|
if requests != 1 {
|
|
t.Fatalf("requests = %d, want 1", requests)
|
|
}
|
|
if meshState.ConfigVersion != "new-config" ||
|
|
len(meshState.RouteHealthRoutes) != 1 ||
|
|
!sameStringSlice(meshState.RouteHealthRoutes[0].Hops, []string{"node-a", "node-r-new", "node-b"}) {
|
|
t.Fatalf("replacement config was not applied: version=%s route_health_routes=%+v", meshState.ConfigVersion, meshState.RouteHealthRoutes)
|
|
}
|
|
if meshState.RouteHealthRefreshAttempts != 1 || meshState.RouteHealthRefreshSuccesses != 1 || meshState.RouteHealthRefreshFailures != 0 {
|
|
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuccesses, meshState.RouteHealthRefreshFailures)
|
|
}
|
|
if meshState.LastRouteHealthRefresh == nil ||
|
|
meshState.LastRouteHealthRefresh.Status != "succeeded" ||
|
|
meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" ||
|
|
meshState.LastRouteHealthRefresh.RefreshedConfigVersion != "new-config" {
|
|
t.Fatalf("unexpected refresh state: %+v", meshState.LastRouteHealthRefresh)
|
|
}
|
|
report := meshRouteHealthFeedbackRefreshReport(meshState, identity, now.Add(time.Second))
|
|
if report["schema_version"] != meshRouteHealthFeedbackRefreshSchema ||
|
|
report["feedback_refresh_attempt_count"] != 1 ||
|
|
report["feedback_refresh_success_count"] != 1 ||
|
|
report["last_feedback_refresh_status"] != "succeeded" ||
|
|
report["last_feedback_refresh_reason"] != "synthetic_route_health_drift" ||
|
|
report["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected feedback refresh report: %+v", report)
|
|
}
|
|
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
|
|
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
|
|
routeHealthConfigReport["feedback_refresh_backoff_ms"] != int64(meshRouteHealthFeedbackRefreshBackoff/time.Millisecond) ||
|
|
routeHealthConfigReport["feedback_refresh_attempt_count"] != 1 ||
|
|
routeHealthConfigReport["feedback_refresh_success_count"] != 1 ||
|
|
routeHealthConfigReport["production_payload_forwarding"] != false {
|
|
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackRefreshBackoffSuppressesRepeatedTrigger(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 5, 0, 0, time.UTC)
|
|
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
|
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
|
|
Local: local,
|
|
PeerEndpoints: map[string]string{"node-b": "http://node-b:19002"},
|
|
WarmPeerLimit: 1,
|
|
Now: now,
|
|
})
|
|
meshState := &syntheticMeshState{
|
|
PeerCache: cache,
|
|
Source: "control_plane",
|
|
LastRouteHealthRefresh: &meshRouteHealthFeedbackRefreshState{
|
|
Status: "succeeded",
|
|
Reason: "synthetic_route_health_drift",
|
|
AttemptedAt: now.Add(-time.Second),
|
|
RouteID: "route-ab",
|
|
},
|
|
}
|
|
trigger := meshRouteHealthFeedbackTrigger{
|
|
Reason: "synthetic_route_health_failure",
|
|
RouteID: "route-ab",
|
|
PeerNodeID: "node-b",
|
|
LinkStatus: "unreachable",
|
|
FailureReason: "probe failed",
|
|
ObservedAt: now,
|
|
}
|
|
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, client.New("http://127.0.0.1:1"), meshState, trigger, now)
|
|
if err != nil {
|
|
t.Fatalf("refresh should have been suppressed without backend call: %v", err)
|
|
}
|
|
if meshState.RouteHealthRefreshAttempts != 0 || meshState.RouteHealthRefreshSuppressed != 1 {
|
|
t.Fatalf("unexpected counters: attempts=%d suppressed=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuppressed)
|
|
}
|
|
if meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" {
|
|
t.Fatalf("suppressed refresh should not replace last state: %+v", meshState.LastRouteHealthRefresh)
|
|
}
|
|
}
|
|
|
|
func TestRouteHealthFeedbackTriggerFromObservation(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 22, 10, 0, 0, time.UTC)
|
|
route := mesh.SyntheticRoute{
|
|
RouteID: "route-ab",
|
|
SourceNodeID: "node-a",
|
|
DestinationNodeID: "node-b",
|
|
Hops: []string{"node-a", "node-r-old", "node-b"},
|
|
}
|
|
decision := client.RoutePathDecision{
|
|
RouteID: "route-ab",
|
|
RendezvousPeerNodeID: "node-b",
|
|
SelectedRelayID: "node-r-old",
|
|
RendezvousLeaseID: "lease-old",
|
|
RendezvousLeaseReason: "auto_outbound_only",
|
|
}
|
|
trigger, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
|
|
"route_path_drift_detected": true,
|
|
}, now)
|
|
if !ok ||
|
|
trigger.Reason != "synthetic_route_health_drift" ||
|
|
trigger.RouteID != "route-ab" ||
|
|
trigger.PeerNodeID != "node-b" ||
|
|
trigger.SelectedRelayID != "node-r-old" ||
|
|
!trigger.DriftDetected {
|
|
t.Fatalf("unexpected drift trigger: %+v ok=%t", trigger, ok)
|
|
}
|
|
if _, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
|
|
"route_path_drift_detected": false,
|
|
}, now); ok {
|
|
t.Fatal("healthy route-health observation should not trigger refresh")
|
|
}
|
|
}
|
|
|
|
func TestMeshRouteGenerationTrackerReportsReplacementWithdrawOnFirstApply(t *testing.T) {
|
|
now := time.Date(2026, 4, 28, 13, 50, 0, 0, time.UTC)
|
|
report := &client.RoutePathDecisionReport{
|
|
SchemaVersion: "c17z18.route_path_decisions.v1",
|
|
Generation: "config-replacement",
|
|
ReplacementDecisionCount: 1,
|
|
Decisions: []client.RoutePathDecision{
|
|
{
|
|
DecisionID: "route-1-path-node-a-via-node-r-new",
|
|
RouteID: "route-1",
|
|
LocalNodeID: "node-a",
|
|
OriginalHops: []string{"node-a", "node-r-old", "node-r-new", "node-c"},
|
|
EffectiveHops: []string{"node-a", "node-r-new", "node-c"},
|
|
DecisionSource: "stale_relay_replacement",
|
|
Generation: "config-replacement",
|
|
LocalRole: "entry",
|
|
NextHopID: "node-r-new",
|
|
SelectedRelayID: "node-r-new",
|
|
StaleRelayNodeID: "node-r-old",
|
|
RendezvousLeaseID: "lease-node-c-via-node-r-new",
|
|
PathScore: 760,
|
|
ControlPlaneOnly: true,
|
|
ProductionForwarding: false,
|
|
RendezvousLeaseReason: "stale_relay_replacement",
|
|
SelectedRelayEndpoint: "http://node-r-new:19124",
|
|
},
|
|
},
|
|
}
|
|
meshState := &syntheticMeshState{
|
|
Source: "control_plane",
|
|
ConfigVersion: "config-replacement",
|
|
RoutePathDecisions: report,
|
|
RouteGenerationTracker: newMeshRouteGenerationTracker(report, now),
|
|
}
|
|
generationReport := meshRouteGenerationReport(meshState, state.Identity{
|
|
ClusterID: "cluster-1",
|
|
NodeID: "node-a",
|
|
}, now.Add(time.Second))
|
|
if generationReport["active_decision_count"] != 1 ||
|
|
generationReport["applied_decision_count"] != 1 ||
|
|
generationReport["withdrawn_decision_count"] != 1 ||
|
|
generationReport["total_withdrawn_decision_count"] != 1 ||
|
|
generationReport["generation_changed"] != true {
|
|
t.Fatalf("unexpected first-apply route generation report: %+v", generationReport)
|
|
}
|
|
}
|
|
|
|
func TestProductionEnvelopeObservationSinkFromConfigCreatesBoundedSink(t *testing.T) {
|
|
sink := productionEnvelopeObservationSinkFromConfig(config.Config{
|
|
MeshProductionObservationSinkCapacity: 2,
|
|
})
|
|
if sink == nil {
|
|
t.Fatal("sink is nil")
|
|
}
|
|
if sink.Capacity() != 2 {
|
|
t.Fatalf("sink capacity = %d, want 2", sink.Capacity())
|
|
}
|
|
}
|
|
|
|
func TestProductionForwardingLogStateDistinguishesGateFromRuntime(t *testing.T) {
|
|
gateEnabled, runtimeEnabled := productionForwardingLogState(config.Config{
|
|
MeshProductionForwardingEnabled: true,
|
|
}, false)
|
|
if !gateEnabled {
|
|
t.Fatal("gateEnabled = false, want true")
|
|
}
|
|
if !runtimeEnabled {
|
|
t.Fatal("runtimeEnabled = false, want true")
|
|
}
|
|
gateEnabled, runtimeEnabled = productionForwardingLogState(config.Config{}, false)
|
|
if gateEnabled || runtimeEnabled {
|
|
t.Fatalf("default log state = gate:%t runtime:%t, want false/false", gateEnabled, runtimeEnabled)
|
|
}
|
|
gateEnabled, runtimeEnabled = productionForwardingLogState(config.Config{}, true)
|
|
if !gateEnabled || !runtimeEnabled {
|
|
t.Fatalf("signed control-plane log state = gate:%t runtime:%t, want true/true", gateEnabled, runtimeEnabled)
|
|
}
|
|
}
|
|
|
|
func TestMeshLinkStatusFromPeerProbeMapsDeferredForLatestLinks(t *testing.T) {
|
|
cases := map[string]string{
|
|
mesh.PeerConnectionProbeReachable: "reachable",
|
|
mesh.PeerConnectionProbeUnreachable: "unreachable",
|
|
mesh.PeerConnectionProbeDeferred: "degraded",
|
|
mesh.PeerConnectionProbeSkipped: "unknown",
|
|
"unexpected": "unknown",
|
|
}
|
|
for input, want := range cases {
|
|
if got := meshLinkStatusFromPeerProbe(input); got != want {
|
|
t.Fatalf("meshLinkStatusFromPeerProbe(%q) = %q, want %q", input, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestLogProductionObservationSinkMetricsToleratesNilState(t *testing.T) {
|
|
logProductionObservationSinkMetrics(nil)
|
|
logProductionObservationSinkMetrics(&syntheticMeshState{})
|
|
}
|
|
|
|
func TestLogProductionObservationSinkMetricsOnlyWhenChanged(t *testing.T) {
|
|
sink := mesh.NewProductionEnvelopeObservationSink(2)
|
|
meshState := &syntheticMeshState{ProductionObservationSink: sink}
|
|
var logs strings.Builder
|
|
previousOutput := log.Writer()
|
|
log.SetOutput(&logs)
|
|
defer log.SetOutput(previousOutput)
|
|
defer log.SetOutput(io.Discard)
|
|
|
|
logProductionObservationSinkMetrics(meshState)
|
|
firstLen := logs.Len()
|
|
if firstLen == 0 {
|
|
t.Fatal("first metrics log was not written")
|
|
}
|
|
logProductionObservationSinkMetrics(meshState)
|
|
if logs.Len() != firstLen {
|
|
t.Fatal("metrics log was written again without metric changes")
|
|
}
|
|
if err := sink.Observe(context.Background(), mesh.ProductionEnvelopeObservation{MessageID: "message-1"}); err != nil {
|
|
t.Fatalf("observe: %v", err)
|
|
}
|
|
logProductionObservationSinkMetrics(meshState)
|
|
if logs.Len() == firstLen {
|
|
t.Fatal("metrics log was not written after metric changes")
|
|
}
|
|
}
|
|
|
|
func TestProductionObservationSinkMetricsEqual(t *testing.T) {
|
|
a := mesh.ProductionEnvelopeObservationSinkMetrics{
|
|
Capacity: 2,
|
|
CurrentDepth: 1,
|
|
AcceptedTotal: 1,
|
|
DroppedOldest: 0,
|
|
}
|
|
if !productionObservationSinkMetricsEqual(a, a) {
|
|
t.Fatal("identical metrics were not equal")
|
|
}
|
|
b := a
|
|
b.DroppedOldest = 1
|
|
if productionObservationSinkMetricsEqual(a, b) {
|
|
t.Fatal("different metrics were equal")
|
|
}
|
|
}
|