Files
rdp-proxy/agents/rap-node-agent/cmd/rap-node-agent/main_test.go
T
m 20d361a886
build / backend (push) Has been cancelled
build / node-agent (push) Has been cancelled
build / worker (push) Has been cancelled
рабочий вариант, но скороть 10 МБит
2026-05-22 21:46:49 +03:00

4247 lines
164 KiB
Go

package main
import (
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/rsa"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"math/big"
"net"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/agent"
agentauthority "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/config"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/hostagent"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/vpnruntime"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/webingress"
)
func TestLoadSyntheticMeshConfigPrefersScopedFile(t *testing.T) {
route := mesh.SyntheticRoute{
RouteID: "route-file",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-b"},
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
ExpiresAt: time.Now().UTC().Add(time.Hour),
RouteVersion: "route-v1",
PolicyVersion: "policy-v1",
PeerDirectoryVersion: "peers-v1",
}
payload, err := json.Marshal(mesh.ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
PeerEndpoints: map[string]string{"node-b": "quic://127.0.0.1:19002"},
PeerDirectory: []mesh.PeerDirectoryEntry{
{NodeID: "node-b", RouteIDs: []string{"route-file"}, EndpointCount: 1},
},
RecoverySeeds: []mesh.PeerRecoverySeed{
{NodeID: "node-b", Endpoint: "quic://127.0.0.1:19002", Transport: "direct_quic", Priority: 10},
},
Routes: []mesh.SyntheticRoute{route},
})
if err != nil {
t.Fatalf("marshal scoped config: %v", err)
}
path := filepath.Join(t.TempDir(), "mesh-scoped.json")
if err := os.WriteFile(path, payload, 0o600); err != nil {
t.Fatalf("write scoped config: %v", err)
}
loaded, err := loadSyntheticMeshConfig(context.Background(), config.Config{
MeshSyntheticConfigPath: path,
MeshPeerEndpointsJSON: `{"node-b":"quic://debug.invalid:19443"}`,
MeshSyntheticRoutesJSON: `[]`,
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil)
if err != nil {
t.Fatalf("load synthetic config: %v", err)
}
if loaded.Source != "scoped_config" {
t.Fatalf("source = %q, want scoped_config", loaded.Source)
}
if loaded.PeerEndpoints["node-b"] != "quic://127.0.0.1:19002" {
t.Fatalf("peer endpoint = %q", loaded.PeerEndpoints["node-b"])
}
if len(loaded.Routes) != 1 || loaded.Routes[0].RouteID != "route-file" {
t.Fatalf("routes = %+v", loaded.Routes)
}
if len(loaded.PeerDirectory) != 1 || len(loaded.RecoverySeeds) != 1 {
t.Fatalf("peer runtime config missing: directory=%+v seeds=%+v", loaded.PeerDirectory, loaded.RecoverySeeds)
}
}
func TestQUICFabricTLSConfigPersistsCertificateAcrossRestarts(t *testing.T) {
cfg := config.Config{StateDir: t.TempDir()}
identity := state.Identity{NodeID: "node-a"}
_, firstFingerprint, err := quicFabricTLSConfig(cfg, identity)
if err != nil {
t.Fatalf("create quic fabric tls config: %v", err)
}
_, secondFingerprint, err := quicFabricTLSConfig(cfg, identity)
if err != nil {
t.Fatalf("reload quic fabric tls config: %v", err)
}
if firstFingerprint == "" || secondFingerprint == "" || firstFingerprint != secondFingerprint {
t.Fatalf("fingerprints = %q then %q, want stable persisted certificate", firstFingerprint, secondFingerprint)
}
if _, err := os.Stat(filepath.Join(cfg.StateDir, "quic-fabric.crt")); err != nil {
t.Fatalf("persisted certificate missing: %v", err)
}
if _, err := os.Stat(filepath.Join(cfg.StateDir, "quic-fabric.key")); err != nil {
t.Fatalf("persisted private key missing: %v", err)
}
}
func TestSyntheticMeshConfigAuthorityHashUsesRawConfigPayload(t *testing.T) {
raw := json.RawMessage(`{
"enabled": true,
"schema_version": "c18z-test.synthetic.v1",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"authority_required": true,
"cluster_authority": {"schema_version":"rap.cluster_authority.v1"},
"authority_payload": {"ignored": true},
"authority_signature": {"ignored": true},
"config_version": "config-1",
"peer_endpoints": {},
"routes": [],
"production_forwarding": true,
"future_backend_field": {"must_remain_hash_visible": true}
}`)
var remote client.SyntheticMeshConfig
if err := json.Unmarshal(raw, &remote); err != nil {
t.Fatalf("unmarshal synthetic config: %v", err)
}
var unsigned map[string]json.RawMessage
if err := json.Unmarshal(raw, &unsigned); err != nil {
t.Fatalf("unmarshal unsigned map: %v", err)
}
delete(unsigned, "authority_payload")
delete(unsigned, "authority_signature")
unsignedRaw, err := json.Marshal(unsigned)
if err != nil {
t.Fatalf("marshal unsigned map: %v", err)
}
want, err := agentauthority.HashRaw(unsignedRaw)
if err != nil {
t.Fatalf("hash unsigned map: %v", err)
}
got, err := syntheticMeshConfigAuthorityHash(remote)
if err != nil {
t.Fatalf("hash synthetic config: %v", err)
}
if got != want {
t.Fatalf("hash = %s, want raw-preserving hash %s", got, want)
}
}
func TestPersistUpdateHintTriggerWakesLocalUpdaterOnNewGeneration(t *testing.T) {
stateDir := t.TempDir()
cfg := config.Config{StateDir: stateDir, NodeName: "ifcm-rufms-s-mo1cr"}
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity := state.Identity{
ClusterID: "cluster-1",
NodeID: "node-1",
NodeName: "ifcm-rufms-s-mo1cr",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}
if err := state.Save(filepath.Join(stateDir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
hint := &client.NodeUpdateHint{
CheckNow: true,
Generation: "gen-1",
Products: []string{"rap-node-agent"},
TargetVersions: map[string]string{"rap-node-agent": "0.2.345-updaterdiagnostics"},
}
signNodeUpdateHintForTest(t, hint, identity, privateKey)
var gotName string
var gotArgs []string
previous := runLocalUpdateWakeCommand
previousLookPath := localUpdateWakeLookPath
runLocalUpdateWakeCommand = func(ctx context.Context, name string, args ...string) error {
gotName = name
gotArgs = append([]string{}, args...)
return nil
}
localUpdateWakeLookPath = func(name string) (string, error) {
return name, nil
}
defer func() {
runLocalUpdateWakeCommand = previous
localUpdateWakeLookPath = previousLookPath
}()
if err := persistUpdateHintTrigger(context.Background(), cfg, identity, hint); err != nil {
t.Fatalf("persistUpdateHintTrigger: %v", err)
}
if gotName == "" {
t.Fatalf("expected local updater wake command to run")
}
if runtime.GOOS == "windows" {
if gotName != "schtasks" {
t.Fatalf("wake command = %q, want schtasks", gotName)
}
if !reflect.DeepEqual(gotArgs, []string{"/Run", "/TN", "RAP Host Agent Updater ifcm-rufms-s-mo1cr"}) {
t.Fatalf("wake args = %#v", gotArgs)
}
}
if generation := hostagent.CurrentUpdateTriggerGenerationForNodeAgent(stateDir); generation != "gen-1" {
t.Fatalf("generation = %q, want gen-1", generation)
}
payload, err := os.ReadFile(hostagent.UpdateRescueIntentPath(stateDir))
if err != nil {
t.Fatalf("read rescue intent: %v", err)
}
if !strings.Contains(string(payload), `"0.2.345-updaterdiagnostics"`) {
t.Fatalf("rescue intent missing target version: %s", payload)
}
}
func TestPersistUpdateHintTriggerSkipsWakeForRepeatedGeneration(t *testing.T) {
stateDir := t.TempDir()
cfg := config.Config{StateDir: stateDir, NodeName: "ifcm-rufms-s-mo1cr"}
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity := state.Identity{
ClusterID: "cluster-1",
NodeID: "node-1",
NodeName: "ifcm-rufms-s-mo1cr",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}
if err := state.Save(filepath.Join(stateDir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
hint := &client.NodeUpdateHint{
CheckNow: true,
Generation: "gen-1",
}
signNodeUpdateHintForTest(t, hint, identity, privateKey)
if err := hostagent.SaveUpdateTrigger(stateDir, signedStoredUpdateTriggerForTest(hint)); err != nil {
t.Fatalf("seed trigger: %v", err)
}
called := false
previous := runLocalUpdateWakeCommand
previousLookPath := localUpdateWakeLookPath
runLocalUpdateWakeCommand = func(ctx context.Context, name string, args ...string) error {
called = true
return nil
}
localUpdateWakeLookPath = func(name string) (string, error) {
return name, nil
}
defer func() {
runLocalUpdateWakeCommand = previous
localUpdateWakeLookPath = previousLookPath
}()
if err := persistUpdateHintTrigger(context.Background(), cfg, identity, hint); err != nil {
t.Fatalf("persistUpdateHintTrigger: %v", err)
}
if called {
t.Fatalf("expected no wake call for repeated generation")
}
}
func TestPersistUpdateHintTriggerTreatsMissingLocalSchedulerAsDelivered(t *testing.T) {
stateDir := t.TempDir()
cfg := config.Config{StateDir: stateDir, NodeName: "ifcm-rufms-s-mo1cr"}
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity := state.Identity{
ClusterID: "cluster-1",
NodeID: "node-1",
NodeName: "ifcm-rufms-s-mo1cr",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}
if err := state.Save(filepath.Join(stateDir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
hint := &client.NodeUpdateHint{
CheckNow: true,
Generation: "gen-1",
}
signNodeUpdateHintForTest(t, hint, identity, privateKey)
called := false
previousWake := runLocalUpdateWakeCommand
previousLookPath := localUpdateWakeLookPath
runLocalUpdateWakeCommand = func(ctx context.Context, name string, args ...string) error {
called = true
return nil
}
localUpdateWakeLookPath = func(name string) (string, error) {
return "", errors.New("scheduler unavailable")
}
defer func() {
runLocalUpdateWakeCommand = previousWake
localUpdateWakeLookPath = previousLookPath
}()
if err := persistUpdateHintTrigger(context.Background(), cfg, identity, hint); err != nil {
t.Fatalf("persistUpdateHintTrigger: %v", err)
}
if called {
t.Fatalf("expected no wake command when local scheduler is unavailable")
}
if generation := hostagent.CurrentUpdateTriggerGenerationForNodeAgent(stateDir); generation != "gen-1" {
t.Fatalf("generation = %q, want gen-1", generation)
}
}
func TestPersistUpdateHintTriggerFallsBackToDirectHostAgentLaunchWhenWakeFails(t *testing.T) {
if runtime.GOOS != "windows" {
t.Skip("windows-only fallback")
}
stateDir := t.TempDir()
installDir := t.TempDir()
hostAgentPath := filepath.Join(installDir, "rap-host-agent.exe")
nodeAgentPath := filepath.Join(installDir, "rap-node-agent.exe")
if err := os.WriteFile(hostAgentPath, []byte("host-agent"), 0o755); err != nil {
t.Fatalf("write host-agent: %v", err)
}
if err := os.WriteFile(nodeAgentPath, []byte("node-agent"), 0o755); err != nil {
t.Fatalf("write node-agent: %v", err)
}
cfg := config.Config{
StateDir: stateDir,
NodeName: "ifcm-rufms-s-mo1cr",
FabricRegistryRecordsJSON: `[{"service":"control-api"}]`,
ClusterAuthorityPublicKey: "test-pub",
MeshRegion: "ifcm",
}
identity := state.Identity{
NodeID: "node-1",
ClusterID: "cluster-1",
NodeName: "ifcm-rufms-s-mo1cr",
ClusterAuthorityPublicKey: "",
}
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity.ClusterAuthorityPublicKey = base64.StdEncoding.EncodeToString(publicKey)
if err := state.Save(filepath.Join(stateDir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
hint := &client.NodeUpdateHint{CheckNow: true, Generation: "gen-2"}
signNodeUpdateHintForTest(t, hint, identity, privateKey)
previousWake := runLocalUpdateWakeCommand
previousLookPath := localUpdateWakeLookPath
previousLaunch := runLocalUpdateLaunchCommand
previousExecutable := localExecutablePath
defer func() {
runLocalUpdateWakeCommand = previousWake
localUpdateWakeLookPath = previousLookPath
runLocalUpdateLaunchCommand = previousLaunch
localExecutablePath = previousExecutable
}()
localExecutablePath = func() (string, error) { return nodeAgentPath, nil }
localUpdateWakeLookPath = func(name string) (string, error) {
return name, nil
}
runLocalUpdateWakeCommand = func(ctx context.Context, name string, args ...string) error {
return errors.New("wake failed")
}
var gotName string
var gotArgs []string
runLocalUpdateLaunchCommand = func(ctx context.Context, name string, args ...string) error {
gotName = name
gotArgs = append([]string{}, args...)
return nil
}
if err := persistUpdateHintTrigger(context.Background(), cfg, identity, hint); err != nil {
t.Fatalf("persistUpdateHintTrigger: %v", err)
}
if gotName != hostAgentPath {
t.Fatalf("launch binary = %q, want %q", gotName, hostAgentPath)
}
if !containsSequence(gotArgs, []string{"--windows-task-name", "RAP Node Agent ifcm-rufms-s-mo1cr"}) {
t.Fatalf("launch args missing task name: %#v", gotArgs)
}
}
func TestPersistUpdateHintTriggerRejectsUnsignedHint(t *testing.T) {
stateDir := t.TempDir()
cfg := config.Config{StateDir: stateDir, NodeName: "ifcm-rufms-s-mo1cr"}
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity := state.Identity{
ClusterID: "cluster-1",
NodeID: "node-1",
NodeName: "ifcm-rufms-s-mo1cr",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}
hint := &client.NodeUpdateHint{CheckNow: true, Generation: "gen-unsigned"}
if err := persistUpdateHintTrigger(context.Background(), cfg, identity, hint); err == nil || !strings.Contains(err.Error(), "authority signature is required") {
t.Fatalf("expected unsigned hint rejection, got %v", err)
}
}
func signNodeUpdateHintForTest(t *testing.T, hint *client.NodeUpdateHint, identity state.Identity, privateKey ed25519.PrivateKey) {
t.Helper()
payload := nodeUpdateHintAuthorityPayload{
SchemaVersion: "rap.node_update_hint_authority.v1",
ClusterID: identity.ClusterID,
NodeID: identity.NodeID,
Generation: hint.Generation,
CheckNow: hint.CheckNow,
Products: append([]string(nil), hint.Products...),
TargetVersions: hint.TargetVersions,
Reason: hint.Reason,
DeliveryMode: hint.DeliveryMode,
SubscriptionStatus: hint.SubscriptionStatus,
UpdateService: hint.UpdateService,
UpdateServiceCandidates: append([]client.NodeUpdateServiceAssignment(nil), hint.UpdateServiceCandidates...),
RescuePollSeconds: hint.RescuePollSeconds,
ControlPlaneOnly: true,
ProductionForwarding: false,
}
rawPayload, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
canonical, err := agentauthority.CanonicalJSON(rawPayload)
if err != nil {
t.Fatalf("CanonicalJSON: %v", err)
}
publicKey, ok := privateKey.Public().(ed25519.PublicKey)
if !ok {
t.Fatal("private key public part is not ed25519")
}
signed := ed25519.Sign(privateKey, canonical)
signature := agentauthority.Signature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: agentauthority.Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(signed),
}
hint.AuthorityPayload = rawPayload
hint.AuthoritySignature = &client.ClusterSignature{
SchemaVersion: signature.SchemaVersion,
Algorithm: signature.Algorithm,
KeyFingerprint: signature.KeyFingerprint,
Signature: signature.Signature,
}
}
func signedStoredUpdateTriggerForTest(hint *client.NodeUpdateHint) hostagent.UpdateTrigger {
trigger := hostagent.UpdateTrigger{
SchemaVersion: "rap.node_update_trigger.v1",
Generation: strings.TrimSpace(hint.Generation),
Products: append([]string{}, hint.Products...),
Reason: strings.TrimSpace(hint.Reason),
DeliveryMode: strings.TrimSpace(hint.DeliveryMode),
SubscriptionStatus: strings.TrimSpace(hint.SubscriptionStatus),
RescuePollSeconds: hint.RescuePollSeconds,
AuthorityPayload: append(json.RawMessage(nil), hint.AuthorityPayload...),
ObservedAt: time.Now().UTC(),
}
if hint.AuthoritySignature != nil {
trigger.AuthoritySignature = &agentauthority.Signature{
SchemaVersion: hint.AuthoritySignature.SchemaVersion,
Algorithm: hint.AuthoritySignature.Algorithm,
KeyFingerprint: hint.AuthoritySignature.KeyFingerprint,
Signature: hint.AuthoritySignature.Signature,
}
}
if hint.UpdateService != nil {
trigger.UpdateServiceNodeID = strings.TrimSpace(hint.UpdateService.NodeID)
trigger.UpdateServiceStatus = strings.TrimSpace(hint.UpdateService.Status)
}
for _, candidate := range hint.UpdateServiceCandidates {
if nodeID := strings.TrimSpace(candidate.NodeID); nodeID != "" {
trigger.UpdateServiceCandidateNodeIDs = append(trigger.UpdateServiceCandidateNodeIDs, nodeID)
}
if endpoint := strings.TrimSpace(candidate.Endpoint); endpoint != "" {
trigger.UpdateServiceCandidateEndpoints = append(trigger.UpdateServiceCandidateEndpoints, endpoint)
}
}
return trigger
}
func signedStoredUpdateRescueIntentForTest(identity state.Identity, cfg config.Config, hint *client.NodeUpdateHint, observedAt time.Time) hostagent.UpdateRescueIntent {
intent := hostagent.UpdateRescueIntent{
SchemaVersion: "rap.node_update_rescue_intent.v1",
Generation: strings.TrimSpace(hint.Generation),
ClusterID: strings.TrimSpace(identity.ClusterID),
NodeID: strings.TrimSpace(identity.NodeID),
NodeName: firstNonEmpty(strings.TrimSpace(identity.NodeName), strings.TrimSpace(cfg.NodeName)),
StateDir: strings.TrimSpace(cfg.StateDir),
Products: append([]string{}, hint.Products...),
TargetVersions: copyStringMap(hint.TargetVersions),
Reason: strings.TrimSpace(hint.Reason),
AuthorityPayload: append(json.RawMessage(nil), hint.AuthorityPayload...),
ObservedAt: observedAt,
}
if hint.AuthoritySignature != nil {
intent.AuthoritySignature = &agentauthority.Signature{
SchemaVersion: hint.AuthoritySignature.SchemaVersion,
Algorithm: hint.AuthoritySignature.Algorithm,
KeyFingerprint: hint.AuthoritySignature.KeyFingerprint,
Signature: hint.AuthoritySignature.Signature,
}
}
return intent
}
func signedUpdateStateJSONForTest(t *testing.T, identity state.Identity, privateKey ed25519.PrivateKey, currentVersion, targetVersion string) []byte {
t.Helper()
intent := map[string]any{
"schema_version": "rap.node_update_intent.v1",
"intent_id": "intent-1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"target_version": targetVersion,
"strategy": "rolling",
"generation": "intent-gen-1",
"created_at": "2026-05-19T04:00:00Z",
"expires_at": "2026-05-19T05:00:00Z",
"rollback_allowed": true,
"required_lease": true,
}
lease := map[string]any{
"schema_version": "rap.node_update_lease.v1",
"lease_id": "lease-1",
"intent_id": "intent-1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"target_version": targetVersion,
"strategy": "rolling",
"status": "waiting",
"reason": "parallel_limit_reached",
"max_parallel": 1,
"active_update_count": 1,
"acquired_at": "2026-05-19T04:00:00Z",
"expires_at": "2026-05-19T04:05:00Z",
}
rawIntent, err := json.Marshal(intent)
if err != nil {
t.Fatalf("marshal intent: %v", err)
}
intentHash, err := agentauthority.HashRaw(rawIntent)
if err != nil {
t.Fatalf("hash intent: %v", err)
}
rawLease, err := json.Marshal(lease)
if err != nil {
t.Fatalf("marshal lease: %v", err)
}
leaseHash, err := agentauthority.HashRaw(rawLease)
if err != nil {
t.Fatalf("hash lease: %v", err)
}
payload := map[string]any{
"schema_version": "rap.node_update_plan_authority.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": currentVersion,
"action": "none",
"target_version": targetVersion,
"artifact_sha256": "",
"fabric_registry_records_sha256": "",
"update_intent_sha256": intentHash,
"rollout_lease_sha256": leaseHash,
"control_plane_only": true,
"production_forwarding": false,
}
rawPayload, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
canonical, err := agentauthority.CanonicalJSON(rawPayload)
if err != nil {
t.Fatalf("canonical payload: %v", err)
}
publicKey := privateKey.Public().(ed25519.PublicKey)
signature := agentauthority.Signature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: agentauthority.Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
}
rawSignature, err := json.Marshal(signature)
if err != nil {
t.Fatalf("marshal signature: %v", err)
}
statePayload := map[string]any{
"product": "rap-node-agent",
"current_version": currentVersion,
"target_version": targetVersion,
"plan_action": "none",
"plan_reason": "rollout_lease_wait",
"update_intent": intent,
"rollout_lease": lease,
"authority_payload": json.RawMessage(rawPayload),
"authority_signature": json.RawMessage(rawSignature),
"updated_at": "2026-05-19T04:01:00Z",
}
result, err := json.Marshal(statePayload)
if err != nil {
t.Fatalf("marshal update state: %v", err)
}
return result
}
func TestApplyUpdateRuntimeMetadataReadsStateAndTrigger(t *testing.T) {
stateDir := t.TempDir()
cfg := config.Config{StateDir: stateDir, NodeName: "node-a"}
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
identity := state.Identity{
ClusterID: "cluster-1",
NodeID: "node-1",
NodeName: "node-a",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}
if err := state.Save(filepath.Join(stateDir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
hint := &client.NodeUpdateHint{
CheckNow: true,
Generation: "gen-42",
Products: []string{"rap-node-agent"},
TargetVersions: map[string]string{"rap-node-agent": "0.2.345-updaterdiagnostics"},
Reason: "enabled_update_policy",
}
signNodeUpdateHintForTest(t, hint, identity, privateKey)
trigger := signedStoredUpdateTriggerForTest(hint)
trigger.WakeStatus = "started"
trigger.LocalLaunchStatus = "failed"
trigger.LocalLaunchError = "host agent not found"
trigger.ObservedAt = time.Date(2026, 5, 19, 4, 0, 0, 0, time.UTC)
if err := hostagent.SaveUpdateTrigger(stateDir, trigger); err != nil {
t.Fatalf("SaveUpdateTrigger: %v", err)
}
rescueIntent := signedStoredUpdateRescueIntentForTest(identity, cfg, hint, time.Date(2026, 5, 19, 4, 0, 1, 0, time.UTC))
if err := hostagent.SaveUpdateRescueIntent(stateDir, rescueIntent); err != nil {
t.Fatalf("SaveUpdateRescueIntent: %v", err)
}
if err := os.WriteFile(filepath.Join(stateDir, hostagent.UpdateStateFileName), signedUpdateStateJSONForTest(t, identity, privateKey, "0.2.332-relaycertintentfix", "0.2.333-dockerjoinfix"), 0o600); err != nil {
t.Fatalf("write node update state: %v", err)
}
if err := os.WriteFile(filepath.Join(stateDir, "host-update-state-rap-host-agent.json"), []byte(`{
"product":"rap-host-agent",
"current_version":"0.2.327-registryjoinrewrite",
"target_version":"0.2.327-registryjoinrewrite",
"updated_at":"2026-05-19T04:01:10Z"
}`), 0o600); err != nil {
t.Fatalf("write host update state: %v", err)
}
payload := agent.HeartbeatPayload()
applyUpdateRuntimeMetadata(&payload, config.Config{StateDir: stateDir})
raw, ok := payload.Metadata["update_runtime"].(map[string]any)
if !ok {
t.Fatalf("update_runtime metadata missing: %+v", payload.Metadata)
}
if raw["trigger_generation"] != "gen-42" {
t.Fatalf("trigger_generation = %#v", raw["trigger_generation"])
}
if raw["trigger_wake_status"] != "started" || raw["trigger_local_launch_status"] != "failed" {
t.Fatalf("trigger wake/local launch metadata missing: %#v", raw)
}
if raw["rescue_intent_generation"] != "gen-42" || raw["rescue_intent_reason"] != "enabled_update_policy" {
t.Fatalf("rescue intent metadata missing: %#v", raw)
}
if raw["node_agent_state_present"] != true {
t.Fatalf("node_agent_state_present = %#v", raw["node_agent_state_present"])
}
if raw["node_agent_update_intent_generation"] != "intent-gen-1" || raw["update_intent_generation"] != "intent-gen-1" {
t.Fatalf("node update intent generation missing: %#v", raw)
}
if raw["node_agent_rollout_lease_status"] != "waiting" || raw["node_agent_rollout_lease_active_update_count"] != 1 {
t.Fatalf("node rollout lease metadata missing: %#v", raw)
}
if raw["host_agent_state_present"] != true {
t.Fatalf("host_agent_state_present = %#v", raw["host_agent_state_present"])
}
if raw["reason"] != "host-agent updater state observed" {
t.Fatalf("reason = %#v", raw["reason"])
}
}
func containsSequence(values []string, want []string) bool {
for i := 0; i+len(want) <= len(values); i++ {
match := true
for j := range want {
if values[i+j] != want[j] {
match = false
break
}
}
if match {
return true
}
}
return false
}
func TestLocalUpdateWakeSlugNormalizesNodeName(t *testing.T) {
if got := localUpdateWakeSlug(" IFcm RUFMS/S-MO1CR "); got != "ifcm-rufms-s-mo1cr" {
t.Fatalf("slug = %q", got)
}
}
func TestRouteManagerDecisionsFromControlPlaneConsumesRemediationCommand(t *testing.T) {
now := time.Now().UTC()
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
CommandID: "cmd-1",
Action: "prefer_alternate_route",
ClusterID: "cluster-1",
ChannelID: "channel-1",
ServiceClass: "vpn_packets",
PrimaryRouteID: "route-primary",
ReplacementRouteID: "route-alternate",
Reason: "authorized_alternate_route_available",
IssuedAt: now,
ExpiresAt: now.Add(time.Minute),
}})
if len(decisions) != 1 {
t.Fatalf("decisions = %+v, want one remediation decision", decisions)
}
decision := decisions[0]
if decision.RouteID != "route-primary" ||
decision.ReplacementRouteID != "route-alternate" ||
decision.RebuildStatus != "applied" ||
decision.DecisionSource != "service_channel_remediation_command" ||
decision.RebuildRequestID != "cmd-1" {
t.Fatalf("unexpected remediation decision: %+v", decision)
}
}
func TestRouteManagerDecisionsFromControlPlaneConsumesRebuildRouteCommand(t *testing.T) {
now := time.Now().UTC()
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
CommandID: "cmd-rebuild",
Action: "rebuild_route",
ClusterID: "cluster-1",
ChannelID: "channel-1",
ServiceClass: "vpn_packets",
PrimaryRouteID: "route-primary",
Reason: "route_feedback_recommends_rebuild",
GuardStatus: "allowed",
IssuedAt: now,
ExpiresAt: now.Add(time.Minute),
}})
if len(decisions) != 1 {
t.Fatalf("decisions = %+v, want one rebuild remediation decision", decisions)
}
decision := decisions[0]
if decision.RouteID != "route-primary" ||
decision.RebuildStatus != "pending_degraded_route_state" ||
decision.DecisionSource != "service_channel_remediation_command" ||
decision.RebuildRequestID != "cmd-rebuild" {
t.Fatalf("unexpected rebuild remediation decision: %+v", decision)
}
}
func TestRouteManagerDecisionsFromControlPlaneRejectsGuardedRemediationCommand(t *testing.T) {
now := time.Now().UTC()
decisions := routeManagerDecisionsFromControlPlane(nil, []client.FabricServiceChannelRemediationCommand{{
SchemaVersion: "rap.fabric_service_channel_access_remediation_command.v1",
CommandID: "cmd-guarded",
Action: "prefer_alternate_route",
ClusterID: "cluster-1",
ChannelID: "channel-1",
ServiceClass: "vpn_packets",
PrimaryRouteID: "route-primary",
ReplacementRouteID: "route-outside-policy",
GuardStatus: "rejected",
GuardReason: "replacement_exit_outside_signed_pool_policy",
IssuedAt: now,
ExpiresAt: now.Add(time.Minute),
}})
if len(decisions) != 0 {
t.Fatalf("guarded remediation command must not reach route-manager: %+v", decisions)
}
}
func TestGatewayTransportForAssignmentUsesFabricWithoutDegradedRoute(t *testing.T) {
inbox := vpnruntime.NewFabricPacketInbox(4)
transport := fabricGatewayTransportForAssignment(
context.Background(),
config.Config{},
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{
ProductionForwardTransport: noopProductionForwardTransport{},
VPNFabricInbox: inbox,
Routes: []mesh.SyntheticRoute{{
RouteID: "route-exit-entry",
ClusterID: "cluster-1",
SourceNodeID: "exit-1",
DestinationNodeID: "entry-1",
Hops: []string{"exit-1", "entry-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
}},
},
nil,
)
if _, ok := transport.(*vpnruntime.FabricPacketTransport); !ok {
t.Fatalf("transport = %T, want fabric packet transport without degraded route", transport)
}
}
func TestGatewayTransportForAssignmentUsesFabricSessionWhenEnabled(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
inbox := vpnruntime.NewFabricPacketInbox(4)
transport := fabricGatewayTransportForAssignment(
context.Background(),
config.Config{VPNFabricSessionTransportEnabled: true},
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{
ProductionForwardTransport: noopProductionForwardTransport{},
VPNFabricInbox: inbox,
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"entry-1": {{
EndpointID: "entry-1-quic",
NodeID: "entry-1",
Transport: "direct_quic",
Address: "quic://" + server.Addr().String(),
Reachability: "public",
ConnectivityMode: "direct",
Metadata: json.RawMessage(fmt.Sprintf(`{"tls_cert_sha256":%q}`, testMainQUICCertSHA256(t, tlsConfig))),
}},
},
Routes: []mesh.SyntheticRoute{{
RouteID: "route-exit-entry",
ClusterID: "cluster-1",
SourceNodeID: "exit-1",
DestinationNodeID: "entry-1",
Hops: []string{"exit-1", "entry-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
}},
},
nil,
)
sessionTransport, ok := transport.(*vpnruntime.FabricSessionPacketTransport)
if !ok {
t.Fatalf("transport = %T, want fabric session packet transport", transport)
}
if err := sessionTransport.SendGatewayPacketBatch(context.Background(), [][]byte{[]byte("packet")}); err != nil {
t.Fatalf("send fabric session packet: %v", err)
}
}
func TestGatewayTransportForAssignmentFallsBackWhenFabricSessionUnavailable(t *testing.T) {
inbox := vpnruntime.NewFabricPacketInbox(4)
transport := fabricGatewayTransportForAssignment(
context.Background(),
config.Config{VPNFabricSessionTransportEnabled: true},
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{
ProductionForwardTransport: noopProductionForwardTransport{},
VPNFabricInbox: inbox,
PeerEndpoints: map[string]string{},
Routes: []mesh.SyntheticRoute{{
RouteID: "route-exit-entry",
ClusterID: "cluster-1",
SourceNodeID: "exit-1",
DestinationNodeID: "entry-1",
Hops: []string{"exit-1", "entry-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
}},
},
nil,
)
if transport != nil {
t.Fatalf("transport = %T, want nil when QUIC fabric session is unavailable", transport)
}
}
func TestLocalGatewayTransportForAssignmentUsesLocalInboxWithoutDegradedRoute(t *testing.T) {
transport := localGatewayTransportForAssignment(
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{VPNFabricInbox: vpnruntime.NewFabricPacketInbox(4)},
nil,
)
if _, ok := transport.(*vpnruntime.LocalPacketTransport); !ok {
t.Fatalf("transport = %T, want local packet transport without degraded route", transport)
}
}
func TestVPNAssignmentLeaseAutoAcquireAllowedRequiresSelectedExit(t *testing.T) {
assignment := client.NodeVPNAssignment{
VPNConnectionID: "vpn-1",
PlacementPolicy: json.RawMessage(`{
"entry_node_ids":["entry-1"],
"exit_node_id":"exit-1"
}`),
}
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
t.Fatal("entry node must not auto-acquire the gateway lease")
}
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-1", assignment) {
t.Fatal("selected exit node should auto-acquire the gateway lease")
}
}
func TestVPNAssignmentLeaseAutoAcquireAllowedSupportsExitPool(t *testing.T) {
assignment := client.NodeVPNAssignment{
VPNConnectionID: "vpn-1",
PlacementPolicy: json.RawMessage(`{"exit_node_ids":["exit-1","exit-2"]}`),
}
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-2", assignment) {
t.Fatal("node from exit pool should auto-acquire the gateway lease")
}
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
t.Fatal("node outside exit pool must not auto-acquire the gateway lease")
}
}
type noopProductionForwardTransport struct{}
func (noopProductionForwardTransport) SendProduction(context.Context, string, mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
return mesh.ProductionForwardResult{}, nil
}
func testMainQUICTLSConfig(t *testing.T) *tls.Config {
t.Helper()
key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
t.Fatalf("generate rsa key: %v", err)
}
template := x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: "rap-quic-test"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(time.Hour),
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
}
der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
if err != nil {
t.Fatalf("create certificate: %v", err)
}
return &tls.Config{
Certificates: []tls.Certificate{{
Certificate: [][]byte{der},
PrivateKey: key,
}},
NextProtos: []string{"rap-fabric-data-session-v1"},
}
}
func testMainQUICCertSHA256(t *testing.T, config *tls.Config) string {
t.Helper()
if config == nil || len(config.Certificates) == 0 || len(config.Certificates[0].Certificate) == 0 {
t.Fatal("missing test certificate")
}
sum := sha256.Sum256(config.Certificates[0].Certificate[0])
return hex.EncodeToString(sum[:])
}
func TestFabricControlForwardHandlerUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
if req.Path != "/auth/login" {
return nil, fmt.Errorf("unexpected path %s", req.Path)
}
return json.Marshal(client.RawControlResponse{StatusCode: 200, Body: json.RawMessage(`{"via":"fabric"}`)})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
now := time.Now().UTC()
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
record := mesh.FabricRegistryGossipRecord{
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
ClusterID: "cluster-1",
Service: mesh.FabricRegistryServiceControlAPI,
Scope: mesh.FabricRegistryScopeCluster,
Epoch: 1,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Hour),
IssuerNodeID: "authority-1",
IssuerRole: mesh.FabricRegistryAuthorityControl,
Endpoints: []mesh.FabricRegistryEndpoint{{
EndpointID: "control-a",
Address: "quic://" + server.Addr().String(),
Transport: "direct_quic",
PeerCertSHA256: testMainQUICCertSHA256(t, tlsConfig),
}},
}
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
if err != nil {
t.Fatalf("sign registry record: %v", err)
}
registry := mesh.NewFabricRegistry()
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
LocalClusterID: "cluster-1",
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
RequiredSignatures: 1,
Now: now,
}, true); err != nil {
t.Fatalf("apply registry record: %v", err)
}
transport := mesh.NewQUICFabricTransport(nil)
transport.SetLocalPeerID("node-a")
handler := fabricControlForwardHandlerFromMeshState(nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: transport,
ListenerRuntimeConfig: config.Config{MeshRegion: "test"},
})
payload, err := handler(context.Background(), []byte(`{"method":"POST","path":"/auth/login","body":{"user":"a"}}`))
if err != nil {
t.Fatalf("fabric control handler: %v", err)
}
var response client.RawControlResponse
if err := json.Unmarshal(payload, &response); err != nil {
t.Fatalf("decode raw control response: %v", err)
}
if response.StatusCode != 200 || string(response.Body) != `{"via":"fabric"}` {
t.Fatalf("response = %+v", response)
}
}
func TestHeartbeatViaFabricControlUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
if req.Method != http.MethodPost || req.Path != "/clusters/cluster-1/nodes/node-a/heartbeats" {
return nil, fmt.Errorf("unexpected request: %+v", req)
}
return json.Marshal(client.RawControlResponse{
StatusCode: 202,
Body: json.RawMessage(`{
"heartbeat":{"id":"hb-1"},
"testing_flags":{"enabled":true,"synthetic_links_enabled":true,"applied_scopes":["cluster"]},
"update_hint":{"schema_version":"rap.node_update_hint.v1","check_now":true,"generation":"gen-1"}
}`),
})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
now := time.Now().UTC()
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
record := mesh.FabricRegistryGossipRecord{
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
ClusterID: "cluster-1",
Service: mesh.FabricRegistryServiceControlAPI,
Scope: mesh.FabricRegistryScopeCluster,
Epoch: 1,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Hour),
IssuerNodeID: "authority-1",
IssuerRole: mesh.FabricRegistryAuthorityControl,
Endpoints: []mesh.FabricRegistryEndpoint{{
EndpointID: "control-a",
Address: "quic://" + server.Addr().String(),
Transport: "direct_quic",
PeerCertSHA256: testMainQUICCertSHA256(t, tlsConfig),
}},
}
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
if err != nil {
t.Fatalf("sign registry record: %v", err)
}
registry := mesh.NewFabricRegistry()
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
LocalClusterID: "cluster-1",
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
RequiredSignatures: 1,
Now: now,
}, true); err != nil {
t.Fatalf("apply registry record: %v", err)
}
response, viaFabric, err := heartbeatViaFabricControl(context.Background(), state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
}, client.HeartbeatRequest{HealthStatus: "healthy"})
if err != nil {
t.Fatalf("heartbeat via fabric: %v", err)
}
if !viaFabric || !response.TestingFlags.Enabled || response.UpdateHint == nil || response.UpdateHint.Generation != "gen-1" {
t.Fatalf("unexpected heartbeat response viaFabric=%t response=%+v", viaFabric, response)
}
}
func TestSyntheticMeshConfigRefreshUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
if req.Method != http.MethodGet || req.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
return nil, fmt.Errorf("unexpected request: %+v", req)
}
return json.Marshal(client.RawControlResponse{
StatusCode: 200,
Body: json.RawMessage(`{
"synthetic_mesh_config":{
"enabled":true,
"config_version":"fabric-gen-1",
"peer_directory_version":"pd-1",
"policy_version":"pol-1",
"peer_endpoints":{},
"routes":[]
}
}`),
})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
loaded, err := loadSyntheticMeshConfigRuntime(context.Background(), config.Config{}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
})
if err != nil {
t.Fatalf("load synthetic mesh config via fabric: %v", err)
}
if loaded.Source != "control_plane" || loaded.ConfigVersion != "fabric-gen-1" {
t.Fatalf("loaded = %+v", loaded)
}
}
func TestReportMeshLinkUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
var received client.RawControlRequest
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
if err := json.Unmarshal(payload, &received); err != nil {
return nil, err
}
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/mesh/links" {
return nil, fmt.Errorf("unexpected request: %+v", received)
}
return json.Marshal(client.RawControlResponse{StatusCode: 202, Body: json.RawMessage(`{"ok":true}`)})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
err = reportMeshLink(context.Background(), nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
}, client.MeshLinkObservationRequest{
SourceNodeID: "node-a",
TargetNodeID: "node-b",
LinkStatus: "reachable",
})
if err != nil {
t.Fatalf("report mesh link via fabric: %v", err)
}
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"target_node_id":"node-b"`) {
t.Fatalf("unexpected received body: %s", string(received.Body))
}
}
func TestReportTelemetryUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
var received client.RawControlRequest
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
if err := json.Unmarshal(payload, &received); err != nil {
return nil, err
}
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/nodes/node-a/telemetry" {
return nil, fmt.Errorf("unexpected request: %+v", received)
}
return json.Marshal(client.RawControlResponse{StatusCode: 202, Body: json.RawMessage(`{"ok":true}`)})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
err = reportTelemetry(context.Background(), nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
}, client.TelemetryRequest{Payload: map[string]any{"fabric": "quic"}})
if err != nil {
t.Fatalf("report telemetry via fabric: %v", err)
}
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"fabric":"quic"`) {
t.Fatalf("unexpected received body: %s", string(received.Body))
}
}
func TestWorkloadControlUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
var paths []string
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
paths = append(paths, req.Method+" "+req.Path)
switch req.Path {
case "/clusters/cluster-1/nodes/node-a/workloads/desired":
return json.Marshal(client.RawControlResponse{
StatusCode: 200,
Body: json.RawMessage(`{"desired_workloads":[{"service_type":"vpn-egress","desired_state":"enabled","runtime_mode":"node"}]}`),
})
case "/clusters/cluster-1/nodes/node-a/workloads/vpn-egress/status":
if len(req.Body) == 0 || !strings.Contains(string(req.Body), `"reported_state":"running"`) {
return nil, fmt.Errorf("unexpected status body: %s", string(req.Body))
}
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
default:
return nil, fmt.Errorf("unexpected request: %+v", req)
}
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
meshState := &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
}
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
desired, err := desiredWorkloads(context.Background(), nil, identity, meshState)
if err != nil {
t.Fatalf("desired workloads via fabric: %v", err)
}
if len(desired) != 1 || desired[0].ServiceType != "vpn-egress" {
t.Fatalf("desired = %+v", desired)
}
if err := reportSingleWorkloadStatus(context.Background(), nil, identity, meshState, "vpn-egress", client.WorkloadStatusRequest{ReportedState: "running"}); err != nil {
t.Fatalf("report workload status via fabric: %v", err)
}
want := []string{
"GET /clusters/cluster-1/nodes/node-a/workloads/desired",
"POST /clusters/cluster-1/nodes/node-a/workloads/vpn-egress/status",
}
if !reflect.DeepEqual(paths, want) {
t.Fatalf("paths = %+v, want %+v", paths, want)
}
}
func TestAdminRuntimeProjectionUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
var received client.RawControlRequest
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
if err := json.Unmarshal(payload, &received); err != nil {
return nil, err
}
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/nodes/node-a/admin-runtime/projection" {
return nil, fmt.Errorf("unexpected request: %+v", received)
}
return json.Marshal(client.RawControlResponse{
StatusCode: 200,
Body: json.RawMessage(`{"schema_version":"rap.admin_runtime_projection.v1","status":"ok","status_code":200,"body":{"page":"cluster"}}`),
})
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
projection, err := controlAPIProjectionClient{
Identity: state.Identity{ClusterID: "cluster-1", NodeID: "node-a"},
MeshState: &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
},
}.Project(context.Background(), webingress.ControlAPIProjectionRequest{
SchemaVersion: "rap.web_ingress_projection.v1",
Method: http.MethodGet,
Path: "/clusters",
Scope: "cluster",
ServiceClass: "admin-ingress",
})
if err != nil {
t.Fatalf("admin projection via fabric: %v", err)
}
if projection.StatusCode != 200 || string(projection.Body) != `{"page":"cluster"}` {
t.Fatalf("projection = %+v", projection)
}
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"service_class":"admin-ingress"`) {
t.Fatalf("unexpected received body: %s", string(received.Body))
}
}
func TestVPNAssignmentControlUsesRegistryQUICControlAPI(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
var paths []string
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
paths = append(paths, req.Method+" "+req.Path)
switch req.Path {
case "/clusters/cluster-1/nodes/node-a/vpn/assignments":
return json.Marshal(client.RawControlResponse{
StatusCode: 200,
Body: json.RawMessage(`{"vpn_assignments":[{"vpn_connection_id":"vpn-1","desired_state":"enabled","assignment_reason":"eligible_candidate"}]}`),
})
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/acquire":
return json.Marshal(client.RawControlResponse{
StatusCode: 201,
Body: json.RawMessage(`{"lease":{"lease_id":"lease-1","owner_node_id":"node-a","lease_generation":1,"status":"active"}}`),
})
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/lease-1/renew":
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/status":
if len(req.Body) == 0 || !strings.Contains(string(req.Body), `"observed_status":"assigned"`) {
return nil, fmt.Errorf("unexpected status body: %s", string(req.Body))
}
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
default:
return nil, fmt.Errorf("unexpected request: %+v", req)
}
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
meshState := &syntheticMeshState{
FabricRegistry: registry,
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
}
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
assignments, err := nodeVPNAssignments(context.Background(), nil, identity, meshState)
if err != nil {
t.Fatalf("vpn assignments via fabric: %v", err)
}
if len(assignments) != 1 || assignments[0].VPNConnectionID != "vpn-1" {
t.Fatalf("assignments = %+v", assignments)
}
lease, err := acquireNodeVPNAssignmentLease(context.Background(), nil, identity, meshState, "vpn-1", client.NodeVPNAssignmentLeaseAcquireRequest{TTLSeconds: 300})
if err != nil {
t.Fatalf("acquire lease via fabric: %v", err)
}
if lease == nil || lease.LeaseID != "lease-1" {
t.Fatalf("lease = %+v", lease)
}
if err := renewNodeVPNAssignmentLease(context.Background(), nil, identity, meshState, "vpn-1", "lease-1", client.NodeVPNAssignmentLeaseRenewRequest{TTLSeconds: 300}); err != nil {
t.Fatalf("renew lease via fabric: %v", err)
}
if err := reportNodeVPNAssignmentStatus(context.Background(), nil, identity, meshState, "vpn-1", client.NodeVPNAssignmentStatusRequest{ObservedStatus: "assigned"}); err != nil {
t.Fatalf("report status via fabric: %v", err)
}
want := []string{
"GET /clusters/cluster-1/nodes/node-a/vpn/assignments",
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/acquire",
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/lease-1/renew",
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/status",
}
if !reflect.DeepEqual(paths, want) {
t.Fatalf("paths = %+v, want %+v", paths, want)
}
}
func signedTestControlRegistry(t *testing.T, clusterID string, endpoint string, certSHA256 string) *mesh.FabricRegistry {
t.Helper()
now := time.Now().UTC()
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
record := mesh.FabricRegistryGossipRecord{
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
ClusterID: clusterID,
Service: mesh.FabricRegistryServiceControlAPI,
Scope: mesh.FabricRegistryScopeCluster,
Epoch: 1,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Hour),
IssuerNodeID: "authority-1",
IssuerRole: mesh.FabricRegistryAuthorityControl,
Endpoints: []mesh.FabricRegistryEndpoint{{
EndpointID: "control-a",
Address: endpoint,
Transport: "direct_quic",
PeerCertSHA256: certSHA256,
}},
}
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
if err != nil {
t.Fatalf("sign registry record: %v", err)
}
registry := mesh.NewFabricRegistry()
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
LocalClusterID: clusterID,
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
RequiredSignatures: 1,
Now: now,
}, true); err != nil {
t.Fatalf("apply registry record: %v", err)
}
return registry
}
func TestRouteManagerDecisionsFromControlPlaneKeepsExplicitRemediationCommand(t *testing.T) {
now := time.Now().UTC()
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{
RouteID: "route-primary",
ReplacementRouteID: "route-alternate",
RebuildRequestID: "feedback-rebuild",
RebuildStatus: "applied",
RebuildReason: "service_channel_feedback_rebuild_applied_to_alternate",
DecisionSource: "service_channel_feedback_replacement",
Generation: "gen-1",
}}}
decisions := routeManagerDecisionsFromControlPlane(report, []client.FabricServiceChannelRemediationCommand{{
CommandID: "cmd-1",
Action: "prefer_alternate_route",
PrimaryRouteID: "route-primary",
ReplacementRouteID: "route-alternate",
Reason: "authorized_alternate_route_available",
IssuedAt: now,
ExpiresAt: now.Add(time.Minute),
}})
if len(decisions) != 2 {
t.Fatalf("decisions = %+v, want feedback and explicit remediation command", decisions)
}
if decisions[1].DecisionSource != "service_channel_remediation_command" || decisions[1].RebuildRequestID != "cmd-1" {
t.Fatalf("remediation command was not kept as explicit route-manager input: %+v", decisions)
}
}
func TestRouteManagerDecisionsFromControlPlaneSkipsCommandAlreadyResolvedByPlanner(t *testing.T) {
now := time.Now().UTC()
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{
RouteID: "route-primary",
ReplacementRouteID: "route-planner",
RebuildRequestID: "cmd-rebuild",
RebuildStatus: "applied",
RebuildReason: "remediation_rebuild_applied_to_alternate",
DecisionSource: "service_channel_remediation_command",
Generation: "config-c18z77",
}}}
decisions := routeManagerDecisionsFromControlPlane(report, []client.FabricServiceChannelRemediationCommand{{
CommandID: "cmd-rebuild",
Action: "rebuild_route",
PrimaryRouteID: "route-primary",
Reason: "route_feedback_recommends_rebuild",
GuardStatus: "allowed",
IssuedAt: now,
ExpiresAt: now.Add(time.Minute),
}})
if len(decisions) != 1 {
t.Fatalf("decisions = %+v, want only planner-resolved decision", decisions)
}
if decisions[0].RebuildStatus != "applied" || decisions[0].ReplacementRouteID != "route-planner" {
t.Fatalf("unexpected planner decision: %+v", decisions[0])
}
}
func TestFabricServiceChannelAccessStatsReportsDataPlaneViolations(t *testing.T) {
stats := newFabricServiceChannelAccessStats()
stats.Observe(mesh.FabricServiceChannelAccessLogEntry{
Event: "fabric_service_channel_data_plane_violation",
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-1",
DegradedRoutePolicy: "disabled",
ViolationStatus: "fabric_route_send_failed_degraded_route_blocked",
ViolationReason: "mesh synthetic route not found",
OccurredAt: time.Unix(10, 0).UTC(),
})
report := stats.Report(time.Unix(20, 0).UTC())
if report["degraded_fabric_route_blocked"] != int64(1) ||
report["degraded_route_count"] != int64(0) ||
report["degraded_route_blocked_count"] != int64(1) ||
report["fabric_route_send_failure"] != int64(1) ||
report["last_data_plane_violation_status"] != "degraded_fabric_route_blocked" ||
report["last_data_plane_violation_status_raw"] != "fabric_route_send_failed_degraded_route_blocked" ||
report["last_data_plane_violation_reason"] != "mesh synthetic route not found" {
t.Fatalf("unexpected violation report: %+v", report)
}
}
func TestVerifyEnrollmentJoinContractAcceptsSignedApproval(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatalf("generate key: %v", err)
}
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
fingerprint := agentauthority.Fingerprint(publicKey)
payload := json.RawMessage(`{
"schema_version":"rap.cluster.node_approval.v1",
"cluster_id":"cluster-1",
"join_request_id":"join-request-1",
"node_id":"node-1",
"node_fingerprint":"fp-1",
"identity_status":"active",
"heartbeat_endpoint":"/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
"approved_by_user_id":"admin-1",
"issued_at":"2026-04-28T12:00:00Z",
"control_plane_only":true,
"production_forwarding":false
}`)
canonical, err := agentauthority.CanonicalJSON(payload)
if err != nil {
t.Fatalf("canonical json: %v", err)
}
join := client.NodeJoinContract{
NodeID: "node-1",
ClusterID: "cluster-1",
IdentityStatus: "active",
HeartbeatEndpoint: "/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
ClusterAuthority: &client.ClusterAuthorityDescriptor{
SchemaVersion: agentauthority.AuthoritySchemaVersion,
ClusterID: "cluster-1",
AuthorityState: "active",
KeyAlgorithm: agentauthority.AlgorithmEd25519,
PublicKey: publicKeyB64,
PublicKeyFingerprint: fingerprint,
},
AuthorityPayload: payload,
AuthoritySignature: &client.ClusterSignature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: fingerprint,
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
SignedAt: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
},
}
err = verifyEnrollmentJoinContract(join, state.Identity{
ClusterID: "cluster-1",
NodeFingerprint: "fp-1",
}, config.Config{ClusterAuthorityFingerprint: fingerprint})
if err != nil {
t.Fatalf("verify enrollment join: %v", err)
}
}
func TestVerifyEnrollmentJoinContractAcceptsSignedQuorumDescriptor(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatalf("generate key: %v", err)
}
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
fingerprint := agentauthority.Fingerprint(publicKey)
descriptor := agentauthority.QuorumDescriptor{
SchemaVersion: agentauthority.QuorumSchemaVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 1,
Members: []agentauthority.QuorumMember{
{
NodeID: "authority-1",
Role: "update-authority",
PublicKey: publicKeyB64,
PublicKeyFingerprint: fingerprint,
Scopes: []string{"update-authority"},
},
},
}
descriptorHash, err := agentauthority.QuorumDescriptorHash(descriptor)
if err != nil {
t.Fatalf("hash quorum descriptor: %v", err)
}
rawDescriptor, err := json.Marshal(descriptor)
if err != nil {
t.Fatalf("marshal quorum descriptor: %v", err)
}
payload, err := json.Marshal(map[string]any{
"schema_version": "rap.cluster.node_approval.v1",
"cluster_id": "cluster-1",
"join_request_id": "join-request-1",
"node_id": "node-1",
"node_fingerprint": "fp-1",
"identity_status": "active",
"heartbeat_endpoint": "/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
"approved_by_user_id": "admin-1",
"cluster_authority_quorum_sha256": descriptorHash,
"issued_at": "2026-04-28T12:00:00Z",
"control_plane_only": true,
"production_forwarding": false,
})
if err != nil {
t.Fatalf("marshal authority payload: %v", err)
}
canonical, err := agentauthority.CanonicalJSON(payload)
if err != nil {
t.Fatalf("canonical json: %v", err)
}
join := client.NodeJoinContract{
NodeID: "node-1",
ClusterID: "cluster-1",
IdentityStatus: "active",
HeartbeatEndpoint: "/api/v1/clusters/cluster-1/nodes/node-1/heartbeats",
ClusterAuthorityQuorum: rawDescriptor,
ClusterAuthority: &client.ClusterAuthorityDescriptor{
SchemaVersion: agentauthority.AuthoritySchemaVersion,
ClusterID: "cluster-1",
AuthorityState: "active",
KeyAlgorithm: agentauthority.AlgorithmEd25519,
PublicKey: publicKeyB64,
PublicKeyFingerprint: fingerprint,
},
AuthorityPayload: payload,
AuthoritySignature: &client.ClusterSignature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: fingerprint,
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
SignedAt: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
},
}
err = verifyEnrollmentJoinContract(join, state.Identity{
ClusterID: "cluster-1",
NodeFingerprint: "fp-1",
}, config.Config{ClusterAuthorityFingerprint: fingerprint})
if err != nil {
t.Fatalf("verify enrollment join: %v", err)
}
}
func TestVerifyControlPlaneSyntheticMeshConfigAcceptsSignedServiceChannelFeedback(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatalf("generate key: %v", err)
}
publicKeyB64 := base64.StdEncoding.EncodeToString(publicKey)
fingerprint := agentauthority.Fingerprint(publicKey)
now := time.Now().UTC()
remote := client.SyntheticMeshConfig{
Enabled: true,
SchemaVersion: "c17z18.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
AuthorityRequired: true,
ClusterAuthority: &client.ClusterAuthorityDescriptor{
SchemaVersion: agentauthority.AuthoritySchemaVersion,
ClusterID: "cluster-1",
AuthorityState: "authoritative",
KeyAlgorithm: agentauthority.AlgorithmEd25519,
PublicKey: publicKeyB64,
PublicKeyFingerprint: fingerprint,
},
ConfigVersion: "config-v1",
PeerDirectoryVersion: "config-v1",
PolicyVersion: "config-v1",
PeerEndpoints: map[string]string{},
PeerEndpointCandidates: map[string][]client.PeerEndpointCandidate{},
PeerDirectory: []client.PeerDirectoryEntry{},
RecoverySeeds: []client.PeerRecoverySeed{},
RendezvousLeases: []client.PeerRendezvousLease{},
RoutePathDecisions: &client.RoutePathDecisionReport{
SchemaVersion: "c17z18.route_path_decisions.v1",
DecisionMode: "control_plane_effective_path_from_relay_policy_and_service_channel_feedback",
Generation: "config-v1",
DecisionCount: 1,
ReplacementDecisionCount: 1,
RebuildRequestCount: 1,
RebuildAppliedCount: 1,
ControlPlaneOnly: true,
Decisions: []client.RoutePathDecision{{
DecisionID: "route-ab-path-node-a-service-channel-feedback",
RouteID: "route-ab",
ReplacementRouteID: "route-ac",
RebuildRequestID: "route-ab-node-a-config-v1-rebuild",
RebuildStatus: "applied",
RebuildReason: "service_channel_feedback_rebuild_applied_to_alternate",
RebuildAttempt: 2,
ClusterID: "cluster-1",
LocalNodeID: "node-a",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
OriginalHops: []string{"node-a", "node-b"},
EffectiveHops: []string{"node-a", "node-c", "node-b"},
LocalRole: "source",
DecisionSource: "service_channel_feedback_replacement",
Generation: "config-v1",
PathScore: 1000,
ScoreReasons: []string{"service_channel_rebuild_applied"},
ControlPlaneOnly: true,
ExpiresAt: now.Add(30 * time.Second),
}},
},
ServiceChannelFeedback: &client.FabricServiceChannelFeedbackReport{
SchemaVersion: "c18n.fabric_service_channel_route_feedback_report.v1",
GeneratedAt: now,
FeedbackMaxAgeSeconds: 30,
ObservationCount: 1,
FencedRouteCount: 1,
Observations: []client.FabricServiceChannelFeedbackObservation{{
ClusterID: "cluster-1",
ReporterNodeID: "node-a",
RouteID: "route-ab",
ServiceClass: "vpn_packets",
FeedbackStatus: "fenced",
ScoreAdjustment: -1000,
Reasons: []string{"route_rebuild_recommended"},
ConsecutiveFailures: 2,
Payload: json.RawMessage(`{"route_rebuild_recommended":true}`),
ObservedAt: now,
ExpiresAt: now.Add(30 * time.Second),
}},
},
MeshListener: nil,
Routes: []client.SyntheticMeshRouteConfig{},
ProductionForwarding: false,
}
configHash, err := syntheticMeshConfigAuthorityHash(remote)
if err != nil {
t.Fatalf("config hash: %v", err)
}
payload, err := json.Marshal(controlPlaneMeshConfigAuthorityPayload{
SchemaVersion: "rap.cluster.mesh_config_snapshot.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
ConfigVersion: "config-v1",
ConfigSHA256: configHash,
IssuedAt: now,
ExpiresAt: now.Add(time.Hour),
ControlPlaneOnly: true,
ProductionForwarding: false,
})
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
canonical, err := agentauthority.CanonicalJSON(payload)
if err != nil {
t.Fatalf("canonical json: %v", err)
}
remote.AuthorityPayload = payload
remote.AuthoritySignature = &client.ClusterSignature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: fingerprint,
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
SignedAt: now,
}
err = verifyControlPlaneSyntheticMeshConfig(remote, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
ClusterAuthorityPublicKey: publicKeyB64,
ClusterAuthorityFingerprint: fingerprint,
}, config.Config{})
if err != nil {
t.Fatalf("verify control-plane synthetic mesh config: %v", err)
}
}
func TestVerifyEnrollmentJoinContractRejectsPinnedAuthorityMismatch(t *testing.T) {
join := client.NodeJoinContract{
NodeID: "node-1",
ClusterID: "cluster-1",
IdentityStatus: "active",
ClusterAuthority: &client.ClusterAuthorityDescriptor{
SchemaVersion: agentauthority.AuthoritySchemaVersion,
ClusterID: "cluster-1",
KeyAlgorithm: agentauthority.AlgorithmEd25519,
PublicKeyFingerprint: "rap-ca-ed25519-other",
},
}
err := verifyEnrollmentJoinContract(join, state.Identity{
ClusterID: "cluster-1",
NodeFingerprint: "fp-1",
}, config.Config{ClusterAuthorityFingerprint: "rap-ca-ed25519-expected"})
if err == nil {
t.Fatal("expected pinned authority mismatch")
}
}
func TestLoadFabricRegistryBootstrapAcceptsSignedCandidate(t *testing.T) {
now := time.Now().UTC()
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
record := mesh.FabricRegistryGossipRecord{
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
ClusterID: "cluster-1",
Service: mesh.FabricRegistryServiceControlAPI,
Scope: mesh.FabricRegistryScopeCluster,
Epoch: 1,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Hour),
IssuerNodeID: "authority-node",
IssuerRole: mesh.FabricRegistryAuthorityControl,
Endpoints: []mesh.FabricRegistryEndpoint{
{EndpointID: "control-a", Address: "quic://control.example.test:19443", Transport: "direct_quic"},
},
}
signed, err := mesh.SignFabricRegistryGossipRecord(record, mesh.FabricRegistryTrustedIssuer{
IssuerID: "cluster-authority",
Role: mesh.FabricRegistryAuthorityControl,
}, privateKey)
if err != nil {
t.Fatalf("sign registry record: %v", err)
}
raw, err := json.Marshal([]mesh.FabricRegistryGossipRecord{signed})
if err != nil {
t.Fatalf("marshal registry records: %v", err)
}
registry, report, diag := loadFabricRegistryBootstrap(config.Config{
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
FabricRegistryRecordsJSON: string(raw),
}, state.Identity{ClusterID: "cluster-1"})
if registry == nil || report.Total != 1 || report.Candidate != 1 || report.Rejected != 0 {
t.Fatalf("unexpected registry join report: %+v registry=%v", report, registry)
}
if !diag.ConfigPresent || diag.ConfigBytes == 0 || diag.LoadError != "" {
t.Fatalf("unexpected registry join diagnostics: %+v", diag)
}
if _, ok := registry.Active("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now); ok {
t.Fatal("join record should remain candidate until live verification")
}
if !registry.MarkLiveVerified("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now) {
t.Fatal("MarkLiveVerified = false")
}
if _, ok := registry.Active("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now); !ok {
t.Fatal("expected active record after live verification")
}
}
func TestLoadFabricRegistryBootstrapReportsDecodeError(t *testing.T) {
_, report, diag := loadFabricRegistryBootstrap(config.Config{
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(make([]byte, ed25519.PublicKeySize)),
FabricRegistryRecordsJSON: `{"not":"an array"}`,
}, state.Identity{ClusterID: "cluster-1"})
if !diag.ConfigPresent || diag.ConfigBytes == 0 {
t.Fatalf("expected config presence diagnostics: %+v", diag)
}
if diag.LoadError == "" {
t.Fatalf("expected load error diagnostics: %+v", diag)
}
if report.Rejected != 1 {
t.Fatalf("expected rejected report, got %+v", report)
}
}
func TestValidateLoadedSyntheticMeshConfigAcceptsQUICOnlyControlPlaneSurfaces(t *testing.T) {
err := validateLoadedSyntheticMeshConfigQUICOnly(loadedSyntheticMeshConfig{
PeerEndpoints: map[string]string{
"node-a": "quic://node-a.example.test:443",
},
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-http-migration",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:443",
Reachability: "public",
ConnectivityMode: "direct",
},
},
},
RecoverySeeds: []mesh.PeerRecoverySeed{
{
NodeID: "node-c",
Endpoint: "quic://node-c.example.test:19001",
Transport: "reverse_quic",
},
},
RendezvousLeases: []mesh.PeerRendezvousLease{
{
LeaseID: "lease-http-migration",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r.example.test:19001",
Transport: "relay_quic",
},
},
RoutePathDecisions: &client.RoutePathDecisionReport{
Decisions: []client.RoutePathDecision{{DecisionID: "decision-http-migration", SelectedRelayEndpoint: "quic://node-r.example.test:19001"}},
},
})
if err != nil {
t.Fatalf("expected QUIC-only config to validate, got %v", err)
}
}
func TestEnsureApprovedIdentityKeepsPollingWhenTimeoutDisabled(t *testing.T) {
var joinPolls int
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate authority key: %v", err)
}
tlsConfig := testNodeAgentJoinContractQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
var request client.RawControlRequest
if err := json.Unmarshal(payload, &request); err != nil {
return nil, err
}
switch request.Path {
case "/node-agents/enroll":
return json.Marshal(client.RawControlResponse{
StatusCode: http.StatusOK,
Body: mustJSONRawMain(t, client.EnrollResponse{
Status: "pending",
JoinRequest: mustJSONRawMain(t, map[string]any{"id": "join-request-1"}),
}),
})
case "/node-agents/enrollments/join-request-1/join":
joinPolls++
if joinPolls >= 2 {
cancel()
}
return json.Marshal(client.RawControlResponse{
StatusCode: http.StatusOK,
Body: mustJSONRawMain(t, client.EnrollmentJoinResponse{
Status: "pending",
JoinRequest: mustJSONRawMain(t, map[string]any{"id": "join-request-1"}),
}),
})
default:
return json.Marshal(map[string]any{"error": "unexpected path"})
}
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
_, port, err := net.SplitHostPort(server.Addr().String())
if err != nil {
t.Fatalf("split server addr: %v", err)
}
dir := t.TempDir()
identity, err := state.LoadOrCreate(dir, "cluster-1", "node-a")
if err != nil {
t.Fatalf("load identity: %v", err)
}
_, err = ensureApprovedIdentity(ctx, config.Config{
ClusterID: "cluster-1",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
JoinToken: "join-token",
NodeName: "node-a",
StateDir: dir,
FabricRegistryRecordsJSON: signedNodeJoinContractControlRegistry(t, "cluster-1", "quic://127.0.0.1:"+port, testNodeAgentQUICCertSHA256(t, tlsConfig), publicKey, privateKey),
EnrollmentPollInterval: time.Millisecond,
EnrollmentPollTimeout: 0,
}, identity, client.New(""))
if err == nil || !strings.Contains(err.Error(), "context canceled") {
t.Fatalf("ensureApprovedIdentity err = %v, want context canceled", err)
}
if joinPolls < 2 {
t.Fatalf("join polls = %d, want at least 2", joinPolls)
}
}
func TestEnsureApprovedIdentityUsesFabricJoinContractWithoutBackendURL(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate authority key: %v", err)
}
dir := t.TempDir()
identity, err := state.LoadOrCreate(dir, "cluster-1", "node-a")
if err != nil {
t.Fatalf("load identity: %v", err)
}
tlsConfig := testNodeAgentJoinContractQUICTLSConfig(t)
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
ListenAddr: ":0",
TLSConfig: tlsConfig,
FabricControlHandler: func(ctx context.Context, payload []byte) ([]byte, error) {
var req client.RawControlRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}
switch req.Path {
case "/node-agents/enroll":
return json.Marshal(client.RawControlResponse{
StatusCode: http.StatusOK,
Body: mustJSONRawMain(t, client.EnrollResponse{
Status: "pending",
JoinRequest: mustJSONRawMain(t, map[string]any{"id": "join-request-1"}),
}),
})
case "/node-agents/enrollments/join-request-1/join":
return json.Marshal(client.RawControlResponse{
StatusCode: http.StatusOK,
Body: mustJSONRawMain(t, client.EnrollmentJoinResponse{
Status: "approved",
JoinContract: testSignedNodeJoinContract(t, publicKey, privateKey, "cluster-1", "join-request-1", identity.NodeFingerprint),
}),
})
default:
return json.Marshal(map[string]any{"error": "unexpected path"})
}
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
_, port, err := net.SplitHostPort(server.Addr().String())
if err != nil {
t.Fatalf("split server addr: %v", err)
}
approved, err := ensureApprovedIdentity(context.Background(), config.Config{
ClusterID: "cluster-1",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
JoinToken: "join-token",
NodeName: "node-a",
StateDir: dir,
FabricRegistryRecordsJSON: signedNodeJoinContractControlRegistry(t, "cluster-1", "quic://127.0.0.1:"+port, testNodeAgentQUICCertSHA256(t, tlsConfig), publicKey, privateKey),
EnrollmentPollInterval: time.Millisecond,
EnrollmentPollTimeout: time.Second,
}, identity, client.New(""))
if err != nil {
t.Fatalf("ensureApprovedIdentity: %v", err)
}
if approved.NodeID != "node-approved-1" || approved.IdentityStatus != "approved" {
t.Fatalf("approved identity = %+v", approved)
}
}
func testNodeAgentJoinContractQUICTLSConfig(t *testing.T) *tls.Config {
t.Helper()
key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
t.Fatalf("generate rsa key: %v", err)
}
template := x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: "127.0.0.1"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(time.Hour),
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
IPAddresses: []net.IP{net.ParseIP("127.0.0.1")},
}
der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
if err != nil {
t.Fatalf("create cert: %v", err)
}
return &tls.Config{
Certificates: []tls.Certificate{{Certificate: [][]byte{der}, PrivateKey: key}},
NextProtos: []string{"rap-fabric-data-session-v1"},
}
}
func testNodeAgentQUICCertSHA256(t *testing.T, cfg *tls.Config) string {
t.Helper()
if len(cfg.Certificates) == 0 || len(cfg.Certificates[0].Certificate) == 0 {
t.Fatal("missing test certificate")
}
sum := sha256.Sum256(cfg.Certificates[0].Certificate[0])
return hex.EncodeToString(sum[:])
}
func signedNodeJoinContractControlRegistry(t *testing.T, clusterID, endpoint, certSHA256 string, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey) string {
t.Helper()
now := time.Now().UTC()
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "cluster-authority", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
record := mesh.FabricRegistryGossipRecord{
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
ClusterID: clusterID,
Service: mesh.FabricRegistryServiceControlAPI,
Scope: mesh.FabricRegistryScopeCluster,
Epoch: 1,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Hour),
IssuerNodeID: "cluster-authority",
IssuerRole: mesh.FabricRegistryAuthorityControl,
Endpoints: []mesh.FabricRegistryEndpoint{{
EndpointID: "control-a",
Address: endpoint,
Transport: "direct_quic",
PeerCertSHA256: certSHA256,
}},
}
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
if err != nil {
t.Fatalf("sign registry record: %v", err)
}
raw, err := json.Marshal([]mesh.FabricRegistryGossipRecord{signed})
if err != nil {
t.Fatalf("marshal registry record: %v", err)
}
return string(raw)
}
func testSignedNodeJoinContract(t *testing.T, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey, clusterID, joinRequestID, nodeFingerprint string) *client.NodeJoinContract {
t.Helper()
payload := nodeApprovalAuthorityPayload{
SchemaVersion: "rap.cluster.node_approval.v1",
ClusterID: clusterID,
JoinRequestID: joinRequestID,
NodeID: "node-approved-1",
NodeFingerprint: nodeFingerprint,
IdentityStatus: "approved",
HeartbeatEndpoint: "/clusters/cluster-1/nodes/node-approved-1/heartbeats",
ApprovedByUserID: "admin",
ControlPlaneOnly: true,
}
raw, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
canonical, err := agentauthority.CanonicalJSON(raw)
if err != nil {
t.Fatalf("canonical payload: %v", err)
}
signature := ed25519.Sign(privateKey, canonical)
return &client.NodeJoinContract{
NodeID: "node-approved-1",
ClusterID: clusterID,
IdentityStatus: "approved",
HeartbeatEndpoint: "/clusters/cluster-1/nodes/node-approved-1/heartbeats",
ClusterAuthority: &client.ClusterAuthorityDescriptor{
SchemaVersion: agentauthority.AuthoritySchemaVersion,
ClusterID: clusterID,
AuthorityState: "active",
KeyAlgorithm: agentauthority.AlgorithmEd25519,
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
PublicKeyFingerprint: agentauthority.Fingerprint(publicKey),
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
},
AuthorityPayload: raw,
AuthoritySignature: &client.ClusterSignature{
SchemaVersion: agentauthority.SignatureSchemaVersion,
Algorithm: agentauthority.AlgorithmEd25519,
KeyFingerprint: agentauthority.Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(signature),
SignedAt: time.Now().UTC(),
},
}
}
func mustJSONRawMain(t *testing.T, value any) json.RawMessage {
t.Helper()
raw, err := json.Marshal(value)
if err != nil {
t.Fatalf("marshal json: %v", err)
}
return raw
}
func TestSyntheticQualityScoreIsBounded(t *testing.T) {
cases := []struct {
latency int
min int
max int
}{
{latency: 0, min: 100, max: 100},
{latency: 50, min: 90, max: 100},
{latency: 10000, min: 1, max: 1},
}
for _, tc := range cases {
score := syntheticQualityScore(tc.latency)
if score < tc.min || score > tc.max {
t.Fatalf("syntheticQualityScore(%d) = %d, want [%d,%d]", tc.latency, score, tc.min, tc.max)
}
}
}
func TestProductionEnvelopeObservationSinkFromConfigIsDisabledByDefault(t *testing.T) {
sink := productionEnvelopeObservationSinkFromConfig(config.Config{})
if sink != nil {
t.Fatal("sink is enabled by default")
}
}
func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
payload := heartbeatPayload(config.Config{
MeshAdvertiseEndpoint: "quic://node-a.example.test:19443",
MeshAdvertiseTransport: "direct_quic",
MeshConnectivityMode: "outbound_only",
MeshNATType: "symmetric",
MeshRegion: "eu",
FabricRuntimeEnabled: true,
MeshProductionForwardingEnabled: true,
VPNFabricSessionTransportEnabled: true,
VPNFabricSessionStreamShards: 6,
VPNFabricQUICMaxStreamsPerConn: 24,
MeshQUICFabricEnabled: true,
MeshQUICFabricListenAddr: ":19443",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, &syntheticMeshState{
VPNFabricQUICTransport: func() *mesh.QUICFabricTransport {
transport := mesh.NewQUICFabricTransport(nil)
transport.MaxStreamsPerConn = 24
return transport
}(),
VPNFabricIngress: &vpnruntime.FabricClientPacketIngress{
FlowScheduler: vpnruntime.NewFabricFlowScheduler(0, 0),
ClusterID: "cluster-1",
LocalNodeID: "node-a",
},
QUICFabricListenAddr: "127.0.0.1:19443",
}, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
if !ok {
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
}
if report["peer_endpoint"] != "quic://node-a.example.test:19443" ||
report["transport"] != "direct_quic" ||
report["connectivity_mode"] != "outbound_only" ||
report["nat_type"] != "symmetric" ||
report["region"] != "eu" {
t.Fatalf("unexpected endpoint report: %+v", report)
}
candidates, ok := report["endpoint_candidates"].([]mesh.PeerEndpointCandidate)
if !ok || len(candidates) != 2 || candidates[0].Transport != "direct_quic" || candidates[1].EndpointID != "node-a-quic-fabric" {
t.Fatalf("unexpected endpoint candidates: %+v", report["endpoint_candidates"])
}
if payload.Capabilities["mesh_dynamic_endpoint_reporting"] != true {
t.Fatalf("dynamic endpoint capability missing: %+v", payload.Capabilities)
}
if payload.Capabilities["fabric_session_websocket_endpoint"] == true || payload.Capabilities["fabric_data_session_v1"] != true {
t.Fatalf("fabric session capabilities missing: %+v", payload.Capabilities)
}
if report, ok := payload.Metadata["fabric_session_endpoint_report"].(map[string]any); !ok || report["transport"] != "quic" {
t.Fatalf("fabric session endpoint report missing: %+v", payload.Metadata)
} else if quic, ok := report["quic"].(map[string]any); !ok || quic["listen_addr"] != ":19443" || quic["effective_listen_addr"] != "127.0.0.1:19443" {
t.Fatalf("fabric quic endpoint report missing: %+v", report)
}
if payload.Capabilities["fabric_quic_endpoint"] != true {
t.Fatalf("fabric quic capability missing: %+v", payload.Capabilities)
}
if payload.Capabilities["web_ingress_runtime_receiver"] != true {
t.Fatalf("web ingress runtime receiver capability missing: %+v", payload.Capabilities)
}
if report, ok := payload.Metadata["web_ingress_runtime_receiver_report"].(map[string]any); !ok ||
report["schema_version"] != "rap.web_ingress.runtime_receiver_report.v1" ||
report["quic_stream_id"] != mesh.WebIngressForwardQUICStreamID ||
report["reason"] != "trusted_keys_required" {
t.Fatalf("web ingress runtime receiver report missing: %+v", payload.Metadata)
}
if payload.Capabilities["vpn_fabric_session_transport"] != true || payload.Capabilities["vpn_packet_batch_binary_frames"] != true {
t.Fatalf("vpn fabric session capabilities missing: %+v", payload.Capabilities)
}
if report, ok := payload.Metadata["vpn_fabric_session_transport_report"].(map[string]any); !ok ||
report["packet_payload"] != "rap.vpn_packet_batch.fabric.v1" ||
report["transport"] != "fabric_session_binary_frames" ||
report["stream_shards_per_class"] != 6 {
t.Fatalf("vpn fabric session report missing: %+v", payload.Metadata)
} else if report["quic_sessions"] == nil || report["quic_max_streams_per_conn"] != 24 {
t.Fatalf("vpn fabric quic session report missing: %+v", report)
} else if report["quic_capacity_pressure"] == nil {
t.Fatalf("vpn fabric quic pressure report missing: %+v", report)
} else if pressure, ok := report["flow_pressure"].(map[string]any); !ok ||
pressure["schema_version"] != "rap.vpn_fabric_flow_pressure.v1" ||
pressure["pressure_level"] != "nominal" ||
pressure["pressure_score"] != 0 ||
pressure["recommended_action"] != "observe" {
t.Fatalf("vpn fabric flow pressure report missing: %+v", report)
}
if payload.Capabilities["vpn_fabric_session_stream_shards"] != true {
t.Fatalf("vpn fabric stream shard capability missing: %+v", payload.Capabilities)
}
if payload.Capabilities["vpn_fabric_flow_pressure"] != true {
t.Fatalf("vpn fabric flow pressure capability missing: %+v", payload.Capabilities)
}
if payload.Capabilities["vpn_fabric_endpoint_health_feedback"] != true {
t.Fatalf("vpn fabric endpoint health capability missing: %+v", payload.Capabilities)
}
if report, ok := payload.Metadata["vpn_fabric_endpoint_health_report"].(map[string]any); !ok ||
report["schema_version"] != "rap.vpn_fabric_endpoint_health_report.v1" {
t.Fatalf("vpn fabric endpoint health report missing: %+v", payload.Metadata)
}
}
func TestHeartbeatPayloadReportsWebIngressReceiverWithoutSyntheticRuntime(t *testing.T) {
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
payload := heartbeatPayload(config.Config{
WebIngressTrustedKeysJSON: webingress.TrustedKeysJSONForPublicKey("web-key-1", publicKey),
WebIngressRuntimeServiceClasses: "admin-ingress",
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, time.Date(2026, 5, 17, 1, 2, 3, 0, time.UTC))
report, ok := payload.Metadata["web_ingress_runtime_receiver_report"].(map[string]any)
if !ok {
t.Fatalf("web ingress runtime receiver report missing: %+v", payload.Metadata)
}
if payload.Capabilities["web_ingress_runtime_receiver"] != true ||
report["enabled"] != true ||
report["trusted_key_count"] != 1 {
t.Fatalf("payload=%+v report=%+v", payload, report)
}
classes, ok := report["service_classes"].([]string)
if !ok || len(classes) != 1 || classes[0] != "admin-ingress" {
t.Fatalf("service_classes = %#v", report["service_classes"])
}
}
func TestVPNFabricSessionDialStatsReport(t *testing.T) {
stats := newVPNFabricSessionDialStats()
stats.Attempts.Add(1)
stats.ObserveCapacityLimited(mesh.FabricTransportTarget{
EndpointID: "node-b-quic",
Endpoint: "quic://node-b.example.test:19443",
Transport: "direct_quic",
})
stats.ObserveCapacityLimited(mesh.FabricTransportTarget{
EndpointID: "node-b-quic",
Endpoint: "quic://node-b.example.test:19443",
Transport: "direct_quic",
})
stats.ObserveCandidateFailure("session_open_failed")
stats.ObserveSelected(mesh.FabricTransportTarget{
Endpoint: "quic://node-b.example.test:19443",
Transport: "direct_quic",
PeerCertSHA256: "abcdef",
})
report := stats.Report(time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC))
if report["attempts"] != int64(1) ||
report["selected"] != int64(1) ||
report["candidate_failures"] != int64(3) ||
report["capacity_limited"] != int64(2) ||
report["session_open_failures"] != int64(1) ||
report["quic_selected"] != int64(1) ||
report["pinned_cert_selected"] != int64(1) ||
report["last_transport"] != "direct_quic" ||
report["last_endpoint"] != "quic://node-b.example.test:19443" ||
report["last_capacity_endpoint"] != "quic://node-b.example.test:19443" ||
report["last_capacity_transport"] != "direct_quic" ||
report["last_failure_reason"] != "session_open_failed" {
t.Fatalf("unexpected dial stats report: %+v", report)
}
capacityPressure, ok := report["capacity_pressure"].([]vpnFabricCapacityCounter)
if !ok || len(capacityPressure) != 1 {
t.Fatalf("capacity pressure counters missing: %+v", report["capacity_pressure"])
}
if capacityPressure[0].EndpointID != "node-b-quic" ||
capacityPressure[0].Endpoint != "quic://node-b.example.test:19443" ||
capacityPressure[0].Transport != "direct_quic" ||
capacityPressure[0].Count != 2 ||
capacityPressure[0].LastSeenUnixSec <= 0 {
t.Fatalf("unexpected capacity pressure counter: %+v", capacityPressure[0])
}
}
func TestFabricSessionOpenFailureReasonClassifiesCapacity(t *testing.T) {
if got := fabricSessionOpenFailureReason(mesh.ErrQUICFabricStreamLimitReached); got != "capacity_limited" {
t.Fatalf("failure reason = %q, want capacity_limited", got)
}
if got := fabricSessionOpenFailureReason(errors.New("dial failed")); got != "session_open_failed" {
t.Fatalf("failure reason = %q, want session_open_failed", got)
}
}
func TestVPNFabricEndpointObservationReportIsBoundedAndNewestFirst(t *testing.T) {
store := newVPNFabricEndpointObservationStore("node-a")
base := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
store.observations["old"] = mesh.EndpointCandidateHealthObservation{
EndpointID: "old",
SuccessCount: 1,
ObservedAt: base.Add(-time.Minute),
}
store.observations["new"] = mesh.EndpointCandidateHealthObservation{
EndpointID: "new",
FailureCount: 1,
ObservedAt: base,
ReliabilityScore: 35,
}
report := store.Report(base, 1)
if report["schema_version"] != "rap.vpn_fabric_endpoint_health_report.v1" ||
report["reporter_node_id"] != "node-a" ||
report["total"] != 2 ||
report["reported"] != 1 ||
report["dropped"] != 1 {
t.Fatalf("unexpected report counters: %+v", report)
}
observations, ok := report["observations"].([]mesh.EndpointCandidateHealthObservation)
if !ok || len(observations) != 1 || observations[0].EndpointID != "new" {
t.Fatalf("unexpected observations: %+v", report["observations"])
}
}
func TestVPNFabricEndpointObservationStoreTagsLocalSource(t *testing.T) {
store := newVPNFabricEndpointObservationStore("node-a", "home-test")
store.ObserveFailure("endpoint-a", "session_open_failed")
snapshot := store.Snapshot()
observation := snapshot["endpoint-a"]
if observation.Source != "local_vpn_fabric_session" || observation.ReporterNodeID != "node-a" || observation.ReporterRegion != "home-test" {
t.Fatalf("unexpected local observation source: %+v", observation)
}
}
func TestVPNFabricEndpointObservationStoreRecordsCapacityWithoutFailure(t *testing.T) {
store := newVPNFabricEndpointObservationStore("node-a", "home-test")
store.ObserveCapacity("endpoint-a")
snapshot := store.Snapshot()
observation := snapshot["endpoint-a"]
if observation.LastFailureReason != "capacity_limited" ||
observation.FailureCount != 0 ||
observation.ReliabilityScore != 90 ||
observation.Source != "local_vpn_fabric_session" ||
observation.ReporterNodeID != "node-a" ||
observation.ReporterRegion != "home-test" {
t.Fatalf("unexpected capacity observation: %+v", observation)
}
}
func TestVPNFabricEndpointObservationStorePrunesOldAndExcessEntries(t *testing.T) {
store := newVPNFabricEndpointObservationStore("")
now := time.Now().UTC()
store.observations["old"] = mesh.EndpointCandidateHealthObservation{
EndpointID: "old",
ObservedAt: now.Add(-vpnFabricEndpointObservationMaxAge - time.Second),
}
for i := 0; i < maxVPNFabricEndpointObservationEntries+10; i++ {
endpointID := fmt.Sprintf("endpoint-%03d", i)
store.observations[endpointID] = mesh.EndpointCandidateHealthObservation{
EndpointID: endpointID,
ObservedAt: now.Add(time.Duration(i) * time.Second),
}
}
snapshot := store.Snapshot()
if len(snapshot) != maxVPNFabricEndpointObservationEntries {
t.Fatalf("snapshot size = %d, want %d", len(snapshot), maxVPNFabricEndpointObservationEntries)
}
if _, ok := snapshot["old"]; ok {
t.Fatalf("old observation was not pruned: %+v", snapshot["old"])
}
if _, ok := snapshot["endpoint-000"]; ok {
t.Fatalf("oldest excess observation was not pruned")
}
if _, ok := snapshot[fmt.Sprintf("endpoint-%03d", maxVPNFabricEndpointObservationEntries+9)]; !ok {
t.Fatalf("newest observation was pruned")
}
}
func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
now := time.Now().UTC()
target, ok := vpnFabricSessionTarget(&syntheticMeshState{
PeerEndpoints: map[string]string{
"node-b": "quic://node-b.example.test:443",
},
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay_quic",
Address: "quic://relay.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-quic",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
Metadata: json.RawMessage(`{"tls_cert_sha256":"abcdef"}`),
},
},
},
}, "node-b")
if !ok {
t.Fatal("target missing")
}
if target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != "direct_quic" || target.PeerCertSHA256 != "abcdef" {
t.Fatalf("target = %+v, want direct quic candidate", target)
}
}
func TestVPNFabricSessionTargetRejectsNonQUICPeerEndpoint(t *testing.T) {
_, ok := vpnFabricSessionTarget(&syntheticMeshState{
PeerEndpoints: map[string]string{
"node-b": "quic://node-b.example.test:443/",
},
}, "node-b")
if ok {
t.Fatal("non-QUIC peer endpoint unexpectedly produced a QUIC target")
}
}
func TestVPNFabricSessionTargetsIncludeRankedQUICCandidatesWithoutDisallowedFallback(t *testing.T) {
now := time.Now().UTC()
targets := vpnFabricSessionTargets(&syntheticMeshState{
PeerEndpoints: map[string]string{
"node-b": "quic://node-b-http-migration.example.test:443/",
},
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay_quic",
Address: "quic://relay.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-quic",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
},
},
}, "node-b")
if len(targets) != 2 {
t.Fatalf("target count = %d, want 2 ranked QUIC candidates: %+v", len(targets), targets)
}
if targets[0].Transport != "direct_quic" || targets[0].Endpoint != "quic://node-b.example.test:19443" || targets[1].Transport != "relay_quic" {
t.Fatalf("targets were not ranked as direct QUIC then relay fallback: %+v", targets)
}
}
func TestVPNFabricSessionTargetsUseLocalHealthObservations(t *testing.T) {
now := time.Now().UTC()
observations := newVPNFabricEndpointObservationStore("")
observations.ObserveFailure("node-b-quic", "session_open_failed")
observations.ObserveFailure("node-b-quic", "session_open_failed")
targets := vpnFabricSessionTargets(&syntheticMeshState{
VPNFabricEndpointObservations: observations,
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-quic",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-ice",
NodeID: "node-b",
Transport: "ice_quic",
Address: "quic://node-b.example.test:19444",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
},
},
}, "node-b")
if len(targets) != 2 || targets[0].EndpointID != "node-b-ice" || targets[1].EndpointID != "node-b-quic" {
t.Fatalf("targets must prefer healthy ICE QUIC while keeping direct QUIC fallback: %+v", targets)
}
}
func TestVPNFabricSessionTargetsUseRemoteHealthObservations(t *testing.T) {
now := time.Now().UTC()
targets := vpnFabricSessionTargets(&syntheticMeshState{
PeerEndpointObservations: map[string]mesh.EndpointCandidateHealthObservation{
"node-b-quic": {
EndpointID: "node-b-quic",
FailureCount: 2,
LastFailureReason: "control_plane_session_open_failed",
ReliabilityScore: 35,
ObservedAt: now,
},
},
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-quic",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-ice",
NodeID: "node-b",
Transport: "ice_quic",
Address: "quic://node-b.example.test:19444",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
},
},
},
}, "node-b")
if len(targets) != 2 || targets[0].EndpointID != "node-b-ice" || targets[1].EndpointID != "node-b-quic" {
t.Fatalf("targets must prefer remotely healthy ICE QUIC while keeping direct QUIC fallback: %+v", targets)
}
}
func TestVPNFabricSessionTargetsUseCapacityPressureForLoadSpread(t *testing.T) {
now := time.Now().UTC()
stats := newVPNFabricSessionDialStats()
for i := 0; i < 8; i++ {
stats.ObserveCapacityLimited(mesh.FabricTransportTarget{
EndpointID: "node-b-quic-a",
Endpoint: "quic://node-b-a.example.test:19443",
Transport: "direct_quic",
})
}
targets := vpnFabricSessionTargets(&syntheticMeshState{
VPNFabricSessionDialStats: stats,
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-quic-a",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b-a.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-quic-b",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://node-b-b.example.test:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 5,
LastVerifiedAt: &now,
},
},
},
}, "node-b")
if len(targets) != 2 || targets[0].EndpointID != "node-b-quic-b" || targets[1].EndpointID != "node-b-quic-a" {
t.Fatalf("targets must prefer less pressured QUIC endpoint while keeping busy fallback: %+v", targets)
}
}
func TestQUICEndpointCapacityPressureForScoringUsesLiveSnapshot(t *testing.T) {
now := time.Now().UTC()
pressure := quicEndpointCapacityPressureForScoringFromSnapshot([]mesh.PeerEndpointCandidate{
{
EndpointID: "node-b-quic-a",
Transport: "direct_quic",
Address: "quic://NODE-B-A.example.test:19443/",
},
{
EndpointID: "node-b-quic-b",
Transport: "direct_quic",
Address: "quic://node-b-b.example.test:19443",
},
}, mesh.QUICFabricTransportSnapshot{
Connections: []mesh.QUICFabricConnSnapshot{
{
Endpoint: "node-b-a.example.test:19443",
ActiveStreams: 5,
MaxStreams: 10,
CapacityPressurePercent: 50,
},
},
}, now)
if len(pressure) != 1 {
t.Fatalf("pressure count = %d, want 1: %+v", len(pressure), pressure)
}
got := pressure["node-b-quic-a"]
if got.EndpointID != "node-b-quic-a" || got.Count != 5 || got.LastSeenUnixSec != now.Unix() {
t.Fatalf("unexpected pressure: %+v", got)
}
if _, ok := pressure["node-b-quic-b"]; ok {
t.Fatalf("unpressured endpoint was included: %+v", pressure)
}
}
func TestMergeEndpointCapacityPressureKeepsStrongerSignal(t *testing.T) {
merged := mergeEndpointCapacityPressure(
map[string]mesh.EndpointCandidateCapacityPressure{
"node-b-quic": {EndpointID: "node-b-quic", Count: 9, LastSeenUnixSec: 10},
},
map[string]mesh.EndpointCandidateCapacityPressure{
"node-b-quic": {EndpointID: "node-b-quic", Count: 1, LastSeenUnixSec: 20},
"node-b-ice": {EndpointID: "node-b-ice", Count: 2, LastSeenUnixSec: 20},
},
)
if merged["node-b-quic"].Count != 9 || merged["node-b-quic"].LastSeenUnixSec != 10 {
t.Fatalf("weaker fresh pressure replaced stronger signal: %+v", merged["node-b-quic"])
}
if merged["node-b-ice"].Count != 2 {
t.Fatalf("new pressure missing: %+v", merged)
}
}
func TestFabricCandidateTransportPolicyStatsCountsRejectedDisallowedCandidates(t *testing.T) {
nodes, total, quicTotal, rejectedTotal, rejectedByTransport := fabricCandidateTransportPolicyStats(map[string][]mesh.PeerEndpointCandidate{
"node-b": {
{EndpointID: "node-b-direct", Transport: "direct_quic"},
{EndpointID: "node-b-relay", Transport: "relay"},
{EndpointID: "node-b-wss", Transport: "wss"},
},
"node-c": {
{EndpointID: "node-c-ice", Transport: "ice_quic"},
{EndpointID: "node-c-empty"},
},
})
if nodes != 2 || total != 5 || quicTotal != 2 || rejectedTotal != 3 {
t.Fatalf("stats = nodes:%d total:%d quic:%d rejected:%d", nodes, total, quicTotal, rejectedTotal)
}
if rejectedByTransport["relay"] != 1 || rejectedByTransport["wss"] != 1 || rejectedByTransport["empty"] != 1 {
t.Fatalf("rejected transports = %+v", rejectedByTransport)
}
}
func TestVPNFabricQUICPressureReportRanksBusyConnections(t *testing.T) {
report := vpnFabricQUICPressureReport(mesh.QUICFabricTransportSnapshot{
Connections: []mesh.QUICFabricConnSnapshot{
{
PeerID: "node-c",
Endpoint: "node-c.example.test:19443",
ActiveStreams: 1,
MaxStreams: 10,
CapacityPressurePercent: 10,
},
{
PeerID: "node-b",
Endpoint: "node-b.example.test:19443",
ActiveStreams: 9,
MaxStreams: 10,
CapacityPressurePercent: 90,
Saturated: true,
LastUsedUnixSec: 100,
},
{
PeerID: "idle",
Endpoint: "idle.example.test:19443",
ActiveStreams: 0,
MaxStreams: 10,
},
},
}, 1)
if len(report) != 1 {
t.Fatalf("report count = %d, want 1: %+v", len(report), report)
}
if report[0].PeerID != "node-b" ||
report[0].CapacityPressurePercent != 90 ||
!report[0].Saturated ||
report[0].LastUsedUnixSec != 100 {
t.Fatalf("unexpected pressure report: %+v", report[0])
}
}
func TestVPNFabricFlowPressureReportIncludesRecommendedAction(t *testing.T) {
report := vpnFabricFlowPressureReport(vpnruntime.FabricFlowSchedulerSnapshot{
PressureLevel: "warning",
PressureScore: 35,
PressureReasons: []string{"bulk_pressure", "backpressure"},
RecommendedAction: "throttle_bulk",
BackpressureActive: true,
BulkPressureActive: true,
BulkPressureChannelCount: 16,
InteractiveOrControlCount: 1,
RouteSwitchReasonCounts: map[string]int{"peer_unavailable": 2},
RecommendedParallelWindows: map[string]int{vpnruntime.FabricTrafficClassBulk: 1},
AdaptiveBackpressureActive: true,
AdaptiveBackpressureReason: "bulk_window_reduced_to_protect_interactive",
AdaptivePolicyFingerprint: "policy-fp",
QualityWindowFailureCount: 0,
QualityWindowDropCount: 0,
QualityWindowSlowCount: 0,
RouteRecoveredChannelCount: 0,
RouteRecoveryMaxMillis: 0,
RouteRecoveryAvgMillis: 0,
PressureHistory: []vpnruntime.FabricFlowPressureHistorySample{
{
ObservedAt: "2026-05-16T12:00:00Z",
PressureLevel: "warning",
PressureScore: 35,
PressureReasons: []string{"bulk_pressure"},
RecommendedAction: "throttle_bulk",
},
},
})
if report["recommended_action"] != "throttle_bulk" ||
report["pressure_score"] != 35 ||
report["bulk_pressure_channel_count"] != 16 {
t.Fatalf("unexpected flow pressure report: %+v", report)
}
history, ok := report["pressure_history"].([]vpnruntime.FabricFlowPressureHistorySample)
if !ok || len(history) != 1 || history[0].RecommendedAction != "throttle_bulk" {
t.Fatalf("unexpected flow pressure history: %+v", report["pressure_history"])
}
}
func TestMergedEndpointCandidateObservationsKeepsNewest(t *testing.T) {
now := time.Now().UTC()
merged := mergedEndpointCandidateObservations(
map[string]mesh.EndpointCandidateHealthObservation{
"endpoint-a": {EndpointID: "endpoint-a", ReliabilityScore: 90, ObservedAt: now},
},
map[string]mesh.EndpointCandidateHealthObservation{
"endpoint-a": {EndpointID: "endpoint-a", ReliabilityScore: 35, ObservedAt: now.Add(-time.Minute)},
"endpoint-b": {EndpointID: "endpoint-b", ReliabilityScore: 80, ObservedAt: now},
},
)
if merged["endpoint-a"].ReliabilityScore != 90 || merged["endpoint-b"].ReliabilityScore != 80 {
t.Fatalf("unexpected merged observations: %+v", merged)
}
}
func TestHeartbeatPayloadReportsMeshListenerFailureWithoutKillingHeartbeat(t *testing.T) {
now := time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC)
payload := heartbeatPayload(config.Config{
MeshConnectivityMode: "private_lan",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, &syntheticMeshState{
ListenerReport: fabricListenerReport{
SchemaVersion: "c17z21.mesh_listener_report.v1",
ConfiguredListenAddr: ":19131",
ListenPortMode: "manual",
Status: "listen_failed",
InboundReachability: "unavailable",
ControlPlaneReachable: true,
OneWayConnectivity: true,
FailureReason: "bind_failed",
FailureError: "listen tcp :19131: bind: address already in use",
PortConflict: true,
},
}, now)
report, ok := payload.Metadata["mesh_listener_report"].(fabricListenerReport)
if !ok {
t.Fatalf("mesh listener report missing: %+v", payload.Metadata)
}
if payload.HealthStatus != "warning" || report.Status != "listen_failed" || !report.PortConflict {
t.Fatalf("unexpected listener health report: status=%s report=%+v", payload.HealthStatus, report)
}
if payload.Capabilities["mesh_listener_diagnostics"] != true || payload.Capabilities["mesh_one_way_connectivity"] != true {
t.Fatalf("listener capabilities missing: %+v", payload.Capabilities)
}
}
func TestAdvertisedEndpointCandidatesPreferManualEndpoints(t *testing.T) {
now := time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC)
candidates, err := advertisedEndpointCandidates(config.Config{
MeshAdvertiseEndpointsJSON: `[{"endpoint_id":"node-a-json","node_id":"node-a","transport":"direct_quic","address":"quic://10.10.10.10:19443","priority":12,"connectivity_mode":"private_lan","reachability":"private"}]`,
MeshAdvertiseEndpoint: "quic://203.0.113.10:19443",
MeshAdvertiseTransport: "direct_quic",
MeshConnectivityMode: "direct",
MeshNATType: "port_restricted",
MeshRegion: "edge",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, nil, now)
if err != nil {
t.Fatalf("advertised endpoint candidates failed: %v", err)
}
if len(candidates) != 2 {
t.Fatalf("expected manual and JSON QUIC candidates, got %d: %+v", len(candidates), candidates)
}
if candidates[0].Address != "quic://203.0.113.10:19443" || candidates[0].Priority != 10 {
t.Fatalf("explicit advertise endpoint must win: %+v", candidates)
}
if candidates[1].EndpointID != "node-a-json" {
t.Fatalf("configured endpoint must remain as fallback candidate: %+v", candidates)
}
}
func TestAdvertisedEndpointCandidatesRejectsDisallowedConfiguredCandidateTransport(t *testing.T) {
_, err := advertisedEndpointCandidates(config.Config{
MeshAdvertiseEndpointsJSON: `[{"endpoint_id":"node-a-ws","node_id":"node-a","transport":"websocket","address":"quic://10.10.10.10:19443","connectivity_mode":"direct","reachability":"public"}]`,
MeshAdvertiseTransport: "direct_quic",
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC))
if err == nil || !strings.Contains(err.Error(), "QUIC transport") {
t.Fatalf("expected QUIC transport validation error, got %v", err)
}
}
func TestAdvertisedEndpointCandidatesRejectsDisallowedConfiguredCandidateScheme(t *testing.T) {
_, err := advertisedEndpointCandidates(config.Config{
MeshAdvertiseEndpointsJSON: `[{"endpoint_id":"node-a-https","node_id":"node-a","transport":"direct_quic","address":"https://node-a.example.test:443","connectivity_mode":"direct","reachability":"public"}]`,
MeshAdvertiseTransport: "direct_quic",
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC))
if err == nil || !strings.Contains(err.Error(), "QUIC endpoint") {
t.Fatalf("expected QUIC endpoint validation error, got %v", err)
}
}
func TestNetworkInterfaceClassificationSkipsContainerNoise(t *testing.T) {
tests := map[string]string{
"ens160": "physical",
"wg0": "vpn",
"tailscale0": "vpn",
"docker0": "container",
"br-a1b2c3d4": "container",
"vethabc123": "container",
}
for name, want := range tests {
if got := classifyNetworkInterface(name); got != want {
t.Fatalf("classifyNetworkInterface(%q)=%q, want %q", name, got, want)
}
}
}
func TestHeartbeatPayloadTreatsOutboundOnlyListenerFailureAsOneWayConnectivity(t *testing.T) {
payload := heartbeatPayload(config.Config{
FabricRuntimeEnabled: true,
MeshConnectivityMode: "outbound_only",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, &syntheticMeshState{
ListenerReport: fabricListenerReport{
SchemaVersion: "c17z21.mesh_listener_report.v1",
ConfiguredListenAddr: ":19131",
ListenPortMode: "manual",
Status: "listen_failed",
InboundReachability: "unavailable",
ControlPlaneReachable: true,
OneWayConnectivity: true,
FailureReason: "bind_failed",
},
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
if payload.HealthStatus != "healthy" {
t.Fatalf("HealthStatus = %q, want healthy for outbound-only listener failure", payload.HealthStatus)
}
report, ok := payload.Metadata["mesh_outbound_session_report"].(meshOutboundSessionReport)
if !ok {
t.Fatalf("mesh outbound session report missing: %+v", payload.Metadata)
}
if report.Status != "ready" || !report.UsableForInboundControl || report.ListenerStatus != "listen_failed" {
t.Fatalf("unexpected outbound session report: %+v", report)
}
if payload.Capabilities["mesh_outbound_control_session"] != true ||
payload.Capabilities["mesh_reverse_control_channel_contract"] != true {
t.Fatalf("outbound session capabilities missing: %+v", payload.Capabilities)
}
}
func TestHeartbeatPayloadReportsMeshConfigLoadFailureWithoutDroppingPresence(t *testing.T) {
payload := heartbeatPayload(config.Config{
FabricRuntimeEnabled: true,
MeshConnectivityMode: "private_lan",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, &syntheticMeshState{
ConfigLoadError: "control-plane synthetic mesh config unavailable",
ListenerReport: fabricListenerReport{
SchemaVersion: "c17z21.mesh_listener_report.v1",
ConfiguredListenAddr: ":19131",
ListenPortMode: "manual",
Status: "listening",
InboundReachability: "private",
ControlPlaneReachable: true,
},
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
report, ok := payload.Metadata["mesh_outbound_session_report"].(meshOutboundSessionReport)
if !ok {
t.Fatalf("mesh outbound session report missing: %+v", payload.Metadata)
}
if payload.HealthStatus != "warning" || report.Status != "degraded" || report.ConfigLoadError == "" {
t.Fatalf("unexpected config-load diagnostic heartbeat: health=%s report=%+v", payload.HealthStatus, report)
}
}
func TestHeartbeatPayloadReportsFabricRegistryWithoutSyntheticRuntime(t *testing.T) {
tlsConfig := testMainQUICTLSConfig(t)
registry := signedTestControlRegistry(t, "cluster-1", "quic://127.0.0.1:19131", testMainQUICCertSHA256(t, tlsConfig))
payload := heartbeatPayload(config.Config{
FabricRuntimeEnabled: false,
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, &syntheticMeshState{
FabricRegistry: registry,
ListenerReport: fabricListenerReport{
SchemaVersion: "c17z21.mesh_listener_report.v1",
Status: "listening",
InboundReachability: "public",
},
ListenerRuntimeConfig: config.Config{MeshRegion: "test"},
}, time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC))
report, ok := payload.Metadata["fabric_registry_runtime_report"].(map[string]any)
if !ok {
t.Fatalf("fabric registry runtime report missing: %+v", payload.Metadata)
}
if report["status"] != "degraded" {
t.Fatalf("registry runtime status = %+v, want degraded", report)
}
if payload.Capabilities["fabric_registry_service_resolver"] != true {
t.Fatalf("fabric registry capability missing: %+v", payload.Capabilities)
}
}
func TestOutboundSessionReportTreatsListeningPrivateLANAsUsable(t *testing.T) {
report := meshOutboundSessionReportFromState(config.Config{
MeshConnectivityMode: "private_lan",
FabricRuntimeEnabled: true,
}, &syntheticMeshState{
ListenerReport: fabricListenerReport{
SchemaVersion: "c17z21.mesh_listener_report.v1",
Status: "listening",
InboundReachability: reachabilityFromConnectivityMode("private_lan"),
},
}, time.Date(2026, 4, 30, 9, 0, 0, 0, time.UTC))
if !report.UsableForInboundControl {
t.Fatalf("listening private LAN listener must be usable: %+v", report)
}
if reachabilityFromConnectivityMode("private_lan") != "private" {
t.Fatalf("private_lan reachability mismatch")
}
}
func TestHeartbeatPayloadReportsMultipleMeshEndpoints(t *testing.T) {
payload := heartbeatPayload(config.Config{
MeshAdvertiseEndpointsJSON: `[{
"endpoint_id": "node-a-lan",
"address": "quic://10.24.10.10:19443",
"transport": "direct_quic",
"reachability": "private",
"connectivity_mode": "direct",
"nat_type": "none",
"region": "corp-eu",
"priority": 1,
"policy_tags": ["corp-lan", "same-site"]
},{
"endpoint_id": "node-a-public",
"address": "quic://node-a.example.test:19443",
"transport": "direct_quic",
"reachability": "public",
"connectivity_mode": "direct",
"nat_type": "none",
"priority": 10
}]`,
MeshRegion: "corp-eu",
}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, nil, time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC))
report, ok := payload.Metadata["mesh_endpoint_report"].(map[string]any)
if !ok {
t.Fatalf("mesh endpoint report missing: %+v", payload.Metadata)
}
candidates, ok := report["endpoint_candidates"].([]mesh.PeerEndpointCandidate)
if !ok || len(candidates) != 2 {
t.Fatalf("unexpected endpoint candidates: %#v", report["endpoint_candidates"])
}
if candidates[0].EndpointID != "node-a-lan" || candidates[0].Reachability != "private" {
t.Fatalf("internal endpoint candidate not preserved: %+v", candidates[0])
}
if report["peer_endpoint"] != "quic://10.24.10.10:19443" {
t.Fatalf("default peer endpoint = %v", report["peer_endpoint"])
}
}
func TestFabricRegistryRuntimeStatusClassification(t *testing.T) {
tests := []struct {
name string
snapshot mesh.FabricRegistrySnapshot
requiredResolved int
requiredTotal int
resolvedTotal int
wantStatus string
wantReason string
}{
{
name: "candidate only",
snapshot: mesh.FabricRegistrySnapshot{Candidate: 3},
requiredResolved: 0,
requiredTotal: 3,
resolvedTotal: 0,
wantStatus: "candidate_only",
wantReason: "signed_records_loaded_but_core_services_not_live_verified",
},
{
name: "active",
snapshot: mesh.FabricRegistrySnapshot{Active: 3},
requiredResolved: 3,
requiredTotal: 3,
resolvedTotal: 4,
wantStatus: "active",
wantReason: "required_core_services_resolved",
},
{
name: "degraded",
snapshot: mesh.FabricRegistrySnapshot{Active: 1, Candidate: 2},
requiredResolved: 1,
requiredTotal: 3,
resolvedTotal: 1,
wantStatus: "degraded",
wantReason: "partial_core_service_resolution",
},
{
name: "missing",
snapshot: mesh.FabricRegistrySnapshot{},
requiredResolved: 0,
requiredTotal: 3,
resolvedTotal: 0,
wantStatus: "missing",
wantReason: "no_registry_resolution",
},
}
for _, tc := range tests {
if got := fabricRegistryRuntimeStatus(tc.snapshot, tc.requiredResolved, tc.requiredTotal, tc.resolvedTotal); got != tc.wantStatus {
t.Fatalf("%s: status = %q, want %q", tc.name, got, tc.wantStatus)
}
if got := fabricRegistryRuntimeStatusReason(tc.snapshot, tc.requiredResolved, tc.requiredTotal, tc.resolvedTotal); got != tc.wantReason {
t.Fatalf("%s: reason = %q, want %q", tc.name, got, tc.wantReason)
}
}
}
func TestAdvertisedEndpointCandidatesIncludeSTUNAndRelayFallback(t *testing.T) {
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
candidates, err := advertisedEndpointCandidates(config.Config{
MeshAdvertiseTransport: "quic",
MeshConnectivityMode: "outbound_only",
MeshNATType: "symmetric",
MeshSiteID: "home",
MeshLocalityGroupID: "home-lan",
MeshNATGroupID: "nat-a",
MeshSTUNReflexiveEndpoint: "quic://203.0.113.22:19443",
MeshSTUNServer: "stun.example.test:3478",
MeshRelayNodeID: "node-r",
MeshRelayEndpoint: "quic://node-r.example.test:19443",
MeshProductionForwardingEnabled: true,
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, now)
if err != nil {
t.Fatalf("advertised endpoint candidates: %v", err)
}
if len(candidates) != 2 {
t.Fatalf("candidates = %+v, want STUN and relay fallback", candidates)
}
if candidates[0].EndpointID != "node-a-stun-reflexive" || candidates[0].Transport != "ice_quic" || candidates[0].Reachability != "public" {
t.Fatalf("unexpected STUN candidate: %+v", candidates[0])
}
if candidates[1].EndpointID != "node-a-relay-fallback" || candidates[1].Transport != "relay_quic" || candidates[1].ConnectivityMode != "relay_required" {
t.Fatalf("unexpected relay candidate: %+v", candidates[1])
}
var metadata map[string]any
if err := json.Unmarshal(candidates[0].Metadata, &metadata); err != nil {
t.Fatalf("metadata parse: %v", err)
}
if metadata["site_id"] != "home" || metadata["locality_group_id"] != "home-lan" || metadata["nat_group_id"] != "nat-a" || metadata["stun_server"] != "stun.example.test:3478" {
t.Fatalf("missing discovery metadata: %+v", metadata)
}
}
func TestHeartbeatPayloadIncludesPeerRecoveryReportWithoutAdvertisedEndpoint(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
peerCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: local,
PeerEndpoints: map[string]string{
"node-b": "quic://node-b:19443",
"node-c": "quic://node-c:19443",
"node-d": "quic://node-d:19443",
},
WarmPeerLimit: 3,
Now: now,
})
peerConnections := mesh.NewPeerConnectionTracker(peerCache.Snapshot(), now)
peerConnections.RecordSuccess("node-b", 20, now)
meshState := &syntheticMeshState{
PeerCache: peerCache,
PeerConnections: peerConnections,
}
payload := heartbeatPayload(config.Config{}, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, meshState, now)
report, ok := payload.Metadata["mesh_peer_recovery_report"].(map[string]any)
if !ok {
t.Fatalf("mesh peer recovery report missing: %+v", payload.Metadata)
}
if report["schema_version"] != "c17z9.mesh_peer_recovery_report.v1" ||
report["mode"] != mesh.PeerRecoveryModeRecovery ||
report["ready_peer_count"] != 1 ||
report["target_ready_peers"] != mesh.DefaultStablePeerTarget ||
report["deficit"] != 2 {
t.Fatalf("unexpected recovery report: %+v", report)
}
if payload.Capabilities["mesh_peer_recovery_planning"] != true {
t.Fatalf("peer recovery capability missing: %+v", payload.Capabilities)
}
intentReport, ok := payload.Metadata["mesh_peer_connection_intent_report"].(map[string]any)
if !ok {
t.Fatalf("mesh peer connection intent report missing: %+v", payload.Metadata)
}
if intentReport["schema_version"] != "c17z12.mesh_peer_connection_intent_report.v1" ||
intentReport["intent_count"] != 3 ||
intentReport["recover_count"] != 2 {
t.Fatalf("unexpected connection intent report: %+v", intentReport)
}
if payload.Capabilities["mesh_peer_connection_intent_planning"] != true {
t.Fatalf("connection intent capability missing: %+v", payload.Capabilities)
}
if payload.Capabilities["mesh_peer_cache_endpoint_health_ranking"] != true {
t.Fatalf("peer cache endpoint health capability missing: %+v", payload.Capabilities)
}
if _, ok := payload.Metadata["mesh_endpoint_report"]; ok {
t.Fatalf("endpoint report should not be emitted without advertised endpoint: %+v", payload.Metadata)
}
}
func TestHeartbeatPayloadIncludesRendezvousLeaseAdmissionReport(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
leases := []mesh.PeerRendezvousLease{
{
LeaseID: "lease-node-b-via-node-a",
PeerNodeID: "node-b",
RelayNodeID: "node-a",
RelayEndpoint: "quic://node-a:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-ab"},
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(5 * time.Minute),
},
{
LeaseID: "lease-node-a-via-node-r",
PeerNodeID: "node-a",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-ra"},
Priority: 20,
ControlPlaneOnly: true,
IssuedAt: now.Add(-2 * time.Minute),
ExpiresAt: now.Add(30 * time.Second),
},
{
LeaseID: "lease-node-c-via-node-r-expired",
PeerNodeID: "node-c",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-cr"},
Priority: 30,
ControlPlaneOnly: true,
IssuedAt: now.Add(-10 * time.Minute),
ExpiresAt: now.Add(-time.Second),
},
}
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
RendezvousLeases: leases,
WarmPeerLimit: 3,
Now: now,
})
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
tracker.RecordRelayReady(mesh.PeerCacheEntry{
NodeID: "node-b",
Endpoint: "quic://node-a:19443",
Warm: true,
RendezvousLeaseID: "lease-node-b-via-node-a",
RelayNodeID: "node-a",
RelayEndpoint: "quic://node-a:19443",
RelayQUIC: true,
BestTransport: "relay_quic",
BestReachability: "relay",
BestConnectivity: "relay_required",
BestCandidateScore: 500,
}, 12, now.Add(time.Second))
meshState := &syntheticMeshState{
PeerCache: cache,
RendezvousLeases: leases,
PeerConnections: tracker,
}
payload := heartbeatPayload(config.Config{}, identity, meshState, now)
report, ok := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
if !ok {
t.Fatalf("rendezvous lease report missing: %+v", payload.Metadata)
}
if report["schema_version"] != meshRendezvousLeaseReportSchema ||
report["lease_count"] != 3 ||
report["active_count"] != 2 ||
report["expired_count"] != 1 ||
report["admitted_as_relay_count"] != 1 ||
report["admitted_as_peer_count"] != 1 ||
report["renewal_needed_count"] != 1 ||
report["relay_quic_ready_count"] != 1 {
t.Fatalf("unexpected lease report: %+v", report)
}
if report["control_plane_only"] != true ||
report["relay_payload_forwarding"] != false ||
report["production_payload_forwarding"] != false {
t.Fatalf("payload boundary flags not preserved: %+v", report)
}
leaseDetails, ok := report["leases"].([]map[string]any)
if !ok || len(leaseDetails) != 3 {
t.Fatalf("unexpected lease details: %#v", report["leases"])
}
if leaseDetails[0]["role"] != "relay" ||
leaseDetails[0]["status"] != "admitted" ||
leaseDetails[0]["admitted"] != true ||
leaseDetails[0]["relay_ready"] != true {
t.Fatalf("relay admission detail missing: %+v", leaseDetails[0])
}
if leaseDetails[1]["role"] != "peer" ||
leaseDetails[1]["status"] != "renewal_needed" ||
leaseDetails[1]["renewal_needed"] != true {
t.Fatalf("peer renewal detail missing: %+v", leaseDetails[1])
}
if payload.Capabilities[meshRendezvousLeaseTelemetryCapability] != true {
t.Fatalf("lease telemetry capability missing: %+v", payload.Capabilities)
}
}
func TestHeartbeatPayloadReportsStaleRelayWithdrawalTelemetry(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-r"}
lease := mesh.PeerRendezvousLease{
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-rb"},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(10 * time.Minute),
}
altLease := lease
altLease.LeaseID = "lease-node-b-via-node-r2"
altLease.RelayNodeID = "node-r2"
altLease.RelayEndpoint = "quic://node-r2:19443"
altLease.Priority = 20
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID},
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
WarmPeerLimit: 1,
Now: now,
})
tracker := mesh.NewPeerConnectionTracker(cache.Snapshot(), now)
peer := mesh.PeerCacheEntry{
NodeID: "node-b",
Endpoint: "quic://node-r:19443",
Warm: true,
RendezvousLeaseID: "lease-node-b-via-node-r",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r:19443",
RelayQUIC: true,
}
tracker.RecordRelayReady(peer, 10, now.Add(time.Second))
tracker.RecordFailure("node-b", "relay health failed", now.Add(2*time.Second))
tracker.RecordFailure("node-b", "relay health failed", now.Add(3*time.Second))
tracker.RecordFailure("node-b", "relay health failed", now.Add(4*time.Second))
meshState := &syntheticMeshState{
PeerCache: cache,
RendezvousLeases: []mesh.PeerRendezvousLease{lease, altLease},
PeerConnections: tracker,
Source: "control_plane",
}
payload := heartbeatPayload(config.Config{}, identity, meshState, now.Add(5*time.Second))
report := payload.Metadata["mesh_rendezvous_lease_report"].(map[string]any)
if report["stale_relay_count"] != 1 ||
report["withdrawal_needed_count"] != 1 ||
report["reselection_needed_count"] != 0 ||
report["refresh_needed_count"] != 1 {
t.Fatalf("unexpected stale relay report: %+v", report)
}
leaseDetails := report["leases"].([]map[string]any)
if leaseDetails[0]["stale_relay"] != true ||
leaseDetails[0]["withdrawal_needed"] != true ||
leaseDetails[0]["connection_state"] != mesh.PeerConnectionBackoff {
t.Fatalf("stale relay detail missing: %+v", leaseDetails[0])
}
if leaseDetails[1]["stale_relay"] != false ||
leaseDetails[1]["withdrawal_needed"] != false {
t.Fatalf("alternate relay lease should not inherit stale state: %+v", leaseDetails[1])
}
}
func TestRefreshRendezvousLeasesIfNeededReloadsControlPlaneConfig(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
oldLease := mesh.PeerRendezvousLease{
LeaseID: "lease-node-b-via-node-r-old",
PeerNodeID: "node-b",
RelayNodeID: "node-r-old",
RelayEndpoint: "quic://node-r-old:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-ab"},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now.Add(-2 * time.Minute),
ExpiresAt: now.Add(30 * time.Second),
}
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: local,
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
WarmPeerLimit: 1,
Now: now,
})
tracker := mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now)
oldPathDecisions := &client.RoutePathDecisionReport{
SchemaVersion: "c17z18.route_path_decisions.v1",
DecisionMode: "control_plane_effective_path_from_relay_policy",
Generation: "old-config",
DecisionCount: 1,
ReplacementDecisionCount: 0,
ControlPlaneOnly: true,
ProductionForwarding: false,
Decisions: []client.RoutePathDecision{
{
DecisionID: "route-ab-path-node-a-via-node-r-old",
RouteID: "route-ab",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
OriginalHops: []string{"node-a", "node-r-old", "node-b"},
EffectiveHops: []string{"node-a", "node-r-old", "node-b"},
NextHopID: "node-r-old",
LocalRole: "entry",
DecisionSource: "route_intent",
Generation: "old-config",
PathScore: 1000,
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: now.Add(10 * time.Minute),
},
},
}
meshState := &syntheticMeshState{
PeerCache: oldCache,
RendezvousLeases: []mesh.PeerRendezvousLease{oldLease},
RoutePathDecisions: oldPathDecisions,
RouteGenerationTracker: newMeshRouteGenerationTracker(
oldPathDecisions,
now.Add(-time.Minute),
),
PeerConnections: tracker,
PeerConnectionManager: mesh.NewPeerConnectionManager(mesh.PeerConnectionManagerConfig{Local: local, PeerCache: oldCache, Tracker: tracker, RendezvousLeases: []mesh.PeerRendezvousLease{oldLease}}),
Source: "control_plane",
ConfigVersion: "old-config",
}
requests := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
requests++
response := map[string]any{
"synthetic_mesh_config": map[string]any{
"enabled": true,
"schema_version": "c17z18.synthetic.v1",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"config_version": "new-config",
"peer_directory_version": "new-config",
"policy_version": "new-config",
"peer_endpoints": map[string]string{"node-r-new": "quic://node-r-new:19443"},
"peer_endpoint_candidates": map[string]any{
"node-b": []map[string]any{
{
"endpoint_id": "node-b-outbound-only",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "quic://node-b:19002",
"address_family": "ipv4",
"reachability": "outbound_only",
"connectivity_mode": "outbound_only",
"nat_type": "symmetric",
"region": "test",
"priority": 5,
},
},
},
"peer_directory": []map[string]any{
{"node_id": "node-b", "route_ids": []string{"route-ab"}, "endpoint_count": 0, "candidate_count": 1, "connectivity_modes": []string{"relay_required"}, "recovery_seed": false},
},
"rendezvous_leases": []map[string]any{
{
"lease_id": "lease-node-b-via-node-r-new",
"peer_node_id": "node-b",
"relay_node_id": "node-r-new",
"relay_endpoint": "quic://node-r-new:19443",
"transport": "relay_quic",
"connectivity_mode": "relay_required",
"route_ids": []string{"route-ab"},
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
"priority": 5,
"control_plane_only": true,
"issued_at": now,
"expires_at": now.Add(10 * time.Minute),
"reason": "refresh_test",
},
},
"route_path_decisions": map[string]any{
"schema_version": "c17z18.route_path_decisions.v1",
"decision_mode": "control_plane_effective_path_from_relay_policy",
"generation": "new-config",
"decision_count": 1,
"replacement_decision_count": 1,
"control_plane_only": true,
"production_forwarding": false,
"decisions": []map[string]any{
{
"decision_id": "route-ab-path-node-a-via-node-r-new",
"route_id": "route-ab",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"source_node_id": "node-a",
"destination_node_id": "node-b",
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
"next_hop_id": "node-r-new",
"local_role": "entry",
"selected_relay_id": "node-r-new",
"selected_relay_endpoint": "quic://node-r-new:19443",
"stale_relay_node_id": "node-r-old",
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
"rendezvous_lease_reason": "stale_relay_replacement",
"decision_source": "stale_relay_replacement",
"generation": "new-config",
"path_score": 900,
"score_reasons": []string{"relay_replacement_policy"},
"control_plane_only": true,
"production_forwarding": false,
"expires_at": now.Add(10 * time.Minute),
},
},
},
"routes": []map[string]any{
{
"route_id": "route-ab",
"cluster_id": "cluster-1",
"source_node_id": "node-a",
"destination_node_id": "node-b",
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
"expires_at": now.Add(10 * time.Minute),
"max_ttl": 6,
"max_hops": 6,
},
},
"production_forwarding": false,
},
}
if err := json.NewEncoder(w).Encode(response); err != nil {
t.Fatalf("write response: %v", err)
}
}))
defer server.Close()
err := refreshRendezvousLeasesIfNeeded(context.Background(), config.Config{}, identity, client.New(""), meshState, now)
if err == nil || !strings.Contains(err.Error(), "fabric control synthetic mesh config route unavailable") {
t.Fatalf("refresh leases error = %v, want fabric route failure", err)
}
if requests != 0 {
t.Fatalf("requests = %d, want 0 backend requests", requests)
}
if meshState.ConfigVersion != "old-config" ||
len(meshState.RendezvousLeases) != 1 ||
meshState.RendezvousLeases[0].RelayNodeID != "node-r-old" {
t.Fatalf("mesh state should not refresh without fabric control: version=%s leases=%+v", meshState.ConfigVersion, meshState.RendezvousLeases)
}
return
if meshState.RoutePathDecisions == nil ||
meshState.RoutePathDecisions.SchemaVersion != "c17z18.route_path_decisions.v1" ||
meshState.RoutePathDecisions.ReplacementDecisionCount != 1 ||
len(meshState.RoutePathDecisions.Decisions) != 1 ||
meshState.RoutePathDecisions.Decisions[0].NextHopID != "node-r-new" {
t.Fatalf("route path decisions were not refreshed: %+v", meshState.RoutePathDecisions)
}
if len(meshState.Routes) != 1 ||
!sameStringSlice(meshState.Routes[0].Hops, []string{"node-a", "node-r-old", "node-r-new", "node-b"}) {
t.Fatalf("base routes should remain original for non-route-health runtime: %+v", meshState.Routes)
}
if len(meshState.RouteHealthRoutes) != 1 ||
!sameStringSlice(meshState.RouteHealthRoutes[0].Hops, []string{"node-a", "node-r-new", "node-b"}) ||
meshState.RouteHealthRoutes[0].RouteVersion != "new-config" {
t.Fatalf("route health routes were not generated from path decisions: %+v", meshState.RouteHealthRoutes)
}
if meshState.LeaseRefreshAttempts != 1 || meshState.LeaseRefreshSuccesses != 1 || meshState.LeaseRefreshFailures != 0 {
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.LeaseRefreshAttempts, meshState.LeaseRefreshSuccesses, meshState.LeaseRefreshFailures)
}
if meshState.LastLeaseRefresh == nil ||
meshState.LastLeaseRefresh.Status != "succeeded" ||
meshState.LastLeaseRefresh.Reason != "renewal_needed" {
t.Fatalf("unexpected refresh state: %+v", meshState.LastLeaseRefresh)
}
if meshState.LastConfigRefreshAt.IsZero() {
t.Fatalf("last config refresh time was not updated")
}
recoveryPlan := peerRecoveryPlan(meshState, now.Add(time.Second))
intentPlan := peerConnectionIntentPlan(meshState, recoveryPlan, now.Add(time.Second))
var peerIntent mesh.PeerConnectionIntent
for _, intent := range intentPlan.Intents {
if intent.NodeID == "node-b" {
peerIntent = intent
break
}
}
if intentPlan.RendezvousResolvedCount != 1 ||
peerIntent.RendezvousLeaseID != "lease-node-b-via-node-r-new" ||
peerIntent.RelayNodeID != "node-r-new" {
t.Fatalf("refreshed lease was not selected: %+v", intentPlan)
}
pathReport := meshRoutePathDecisionReport(meshState, identity, now.Add(time.Second))
if pathReport["schema_version"] != meshRoutePathDecisionReportSchema ||
pathReport["replacement_decision_count"] != 1 ||
pathReport["next_hop_available_count"] != 1 ||
pathReport["production_payload_forwarding"] != false {
t.Fatalf("unexpected route path decision report: %+v", pathReport)
}
generationReport := meshRouteGenerationReport(meshState, identity, now.Add(time.Second))
if generationReport["schema_version"] != meshRouteGenerationReportSchema ||
generationReport["active_decision_count"] != 1 ||
generationReport["applied_decision_count"] != 1 ||
generationReport["withdrawn_decision_count"] != 1 ||
generationReport["generation_changed"] != true ||
generationReport["production_payload_forwarding"] != false {
t.Fatalf("unexpected route generation report: %+v", generationReport)
}
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
routeHealthConfigReport["route_path_decision_applied_count"] != 1 ||
routeHealthConfigReport["replacement_route_health_route_count"] != 1 ||
routeHealthConfigReport["synthetic_route_health_route_path_runtime"] != true ||
routeHealthConfigReport["test_service_route_config_changed"] != false ||
routeHealthConfigReport["config_refresh_interval_ms"] != int64(meshSyntheticConfigRefreshInterval/time.Millisecond) ||
routeHealthConfigReport["production_payload_forwarding"] != false {
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
}
}
func TestRouteHealthFeedbackRefreshAppliesReplacementConfig(t *testing.T) {
now := time.Date(2026, 4, 28, 22, 0, 0, 0, time.UTC)
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
local := mesh.PeerIdentity{ClusterID: identity.ClusterID, NodeID: identity.NodeID}
oldRoute := mesh.SyntheticRoute{
RouteID: "route-ab",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-r-old", "node-b"},
AllowedChannels: []string{mesh.SyntheticChannelFabricControl},
ExpiresAt: now.Add(10 * time.Minute),
RouteVersion: "old-config",
}
oldCache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: local,
PeerEndpoints: map[string]string{"node-r-old": "quic://node-r-old:19443"},
Routes: []mesh.SyntheticRoute{oldRoute},
WarmPeerLimit: 1,
Now: now,
})
meshState := &syntheticMeshState{
PeerCache: oldCache,
Routes: []mesh.SyntheticRoute{oldRoute},
RouteHealthRoutes: []mesh.SyntheticRoute{oldRoute},
Source: "control_plane",
ConfigVersion: "old-config",
PeerConnections: mesh.NewPeerConnectionTracker(oldCache.Snapshot(), now),
}
requests := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
requests++
response := map[string]any{
"synthetic_mesh_config": map[string]any{
"enabled": true,
"schema_version": "c17z20.synthetic.v1",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"config_version": "new-config",
"peer_directory_version": "new-config",
"policy_version": "new-config",
"peer_endpoints": map[string]string{"node-r-new": "quic://node-r-new:19443"},
"rendezvous_leases": []map[string]any{
{
"lease_id": "lease-node-b-via-node-r-new",
"peer_node_id": "node-b",
"relay_node_id": "node-r-new",
"relay_endpoint": "quic://node-r-new:19443",
"transport": "relay_quic",
"connectivity_mode": "relay_required",
"route_ids": []string{"route-ab"},
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
"priority": 5,
"control_plane_only": true,
"issued_at": now,
"expires_at": now.Add(10 * time.Minute),
"reason": "stale_relay_replacement",
},
},
"route_path_decisions": map[string]any{
"schema_version": "c17z20.route_path_decisions.v1",
"decision_mode": "control_plane_effective_path_from_relay_policy",
"generation": "new-config",
"decision_count": 1,
"replacement_decision_count": 1,
"control_plane_only": true,
"production_forwarding": false,
"decisions": []map[string]any{
{
"decision_id": "route-ab-path-node-a-via-node-r-new",
"route_id": "route-ab",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"source_node_id": "node-a",
"destination_node_id": "node-b",
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
"next_hop_id": "node-r-new",
"local_role": "entry",
"selected_relay_id": "node-r-new",
"selected_relay_endpoint": "quic://node-r-new:19443",
"stale_relay_node_id": "node-r-old",
"rendezvous_peer_node_id": "node-b",
"rendezvous_lease_id": "lease-node-b-via-node-r-new",
"rendezvous_lease_reason": "stale_relay_replacement",
"decision_source": "stale_relay_replacement",
"generation": "new-config",
"path_score": 900,
"score_reasons": []string{"route_health_feedback"},
"control_plane_only": true,
"production_forwarding": false,
"expires_at": now.Add(10 * time.Minute),
},
},
},
"routes": []map[string]any{
{
"route_id": "route-ab",
"cluster_id": "cluster-1",
"source_node_id": "node-a",
"destination_node_id": "node-b",
"hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
"allowed_channels": []string{mesh.SyntheticChannelFabricControl},
"expires_at": now.Add(10 * time.Minute),
"max_ttl": 6,
"max_hops": 6,
},
},
"production_forwarding": false,
},
}
if err := json.NewEncoder(w).Encode(response); err != nil {
t.Fatalf("write response: %v", err)
}
}))
defer server.Close()
trigger := meshRouteHealthFeedbackTrigger{
Reason: "synthetic_route_health_drift",
RouteID: "route-ab",
PeerNodeID: "node-b",
SelectedRelayID: "node-r-old",
LinkStatus: "reachable",
DriftDetected: true,
ObservedAt: now,
}
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, identity, client.New(""), meshState, trigger, now)
if err == nil || !strings.Contains(err.Error(), "fabric control synthetic mesh config route unavailable") {
t.Fatalf("refresh route health feedback error = %v, want fabric route failure", err)
}
if requests != 0 {
t.Fatalf("requests = %d, want 0 backend requests", requests)
}
if meshState.ConfigVersion != "old-config" {
t.Fatalf("config version = %s, want old-config", meshState.ConfigVersion)
}
return
if meshState.RouteHealthRefreshAttempts != 1 || meshState.RouteHealthRefreshSuccesses != 1 || meshState.RouteHealthRefreshFailures != 0 {
t.Fatalf("unexpected refresh counters: attempts=%d successes=%d failures=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuccesses, meshState.RouteHealthRefreshFailures)
}
if meshState.LastRouteHealthRefresh == nil ||
meshState.LastRouteHealthRefresh.Status != "succeeded" ||
meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" ||
meshState.LastRouteHealthRefresh.RefreshedConfigVersion != "new-config" {
t.Fatalf("unexpected refresh state: %+v", meshState.LastRouteHealthRefresh)
}
report := meshRouteHealthFeedbackRefreshReport(meshState, identity, now.Add(time.Second))
if report["schema_version"] != meshRouteHealthFeedbackRefreshSchema ||
report["feedback_refresh_attempt_count"] != 1 ||
report["feedback_refresh_success_count"] != 1 ||
report["last_feedback_refresh_status"] != "succeeded" ||
report["last_feedback_refresh_reason"] != "synthetic_route_health_drift" ||
report["production_payload_forwarding"] != false {
t.Fatalf("unexpected feedback refresh report: %+v", report)
}
routeHealthConfigReport := meshRouteHealthConfigReport(meshState, identity, now.Add(time.Second))
if routeHealthConfigReport["schema_version"] != meshRouteHealthConfigReportSchema ||
routeHealthConfigReport["feedback_refresh_backoff_ms"] != int64(meshRouteHealthFeedbackRefreshBackoff/time.Millisecond) ||
routeHealthConfigReport["feedback_refresh_attempt_count"] != 1 ||
routeHealthConfigReport["feedback_refresh_success_count"] != 1 ||
routeHealthConfigReport["production_payload_forwarding"] != false {
t.Fatalf("unexpected route health config report: %+v", routeHealthConfigReport)
}
}
func TestRouteHealthFeedbackRefreshBackoffSuppressesRepeatedTrigger(t *testing.T) {
now := time.Date(2026, 4, 28, 22, 5, 0, 0, time.UTC)
local := mesh.PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
cache := mesh.NewPeerCache(mesh.PeerCacheConfig{
Local: local,
PeerEndpoints: map[string]string{"node-b": "quic://node-b:19443"},
WarmPeerLimit: 1,
Now: now,
})
meshState := &syntheticMeshState{
PeerCache: cache,
Source: "control_plane",
LastRouteHealthRefresh: &meshRouteHealthFeedbackRefreshState{
Status: "succeeded",
Reason: "synthetic_route_health_drift",
AttemptedAt: now.Add(-time.Second),
RouteID: "route-ab",
},
}
trigger := meshRouteHealthFeedbackTrigger{
Reason: "synthetic_route_health_failure",
RouteID: "route-ab",
PeerNodeID: "node-b",
LinkStatus: "unreachable",
FailureReason: "probe failed",
ObservedAt: now,
}
err := refreshSyntheticMeshConfigForRouteHealthFeedback(context.Background(), config.Config{}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, client.New("quic://127.0.0.1:1"), meshState, trigger, now)
if err != nil {
t.Fatalf("refresh should have been suppressed without backend call: %v", err)
}
if meshState.RouteHealthRefreshAttempts != 0 || meshState.RouteHealthRefreshSuppressed != 1 {
t.Fatalf("unexpected counters: attempts=%d suppressed=%d", meshState.RouteHealthRefreshAttempts, meshState.RouteHealthRefreshSuppressed)
}
if meshState.LastRouteHealthRefresh.Reason != "synthetic_route_health_drift" {
t.Fatalf("suppressed refresh should not replace last state: %+v", meshState.LastRouteHealthRefresh)
}
}
func TestRouteHealthFeedbackTriggerFromObservation(t *testing.T) {
now := time.Date(2026, 4, 28, 22, 10, 0, 0, time.UTC)
route := mesh.SyntheticRoute{
RouteID: "route-ab",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-r-old", "node-b"},
}
decision := client.RoutePathDecision{
RouteID: "route-ab",
RendezvousPeerNodeID: "node-b",
SelectedRelayID: "node-r-old",
RendezvousLeaseID: "lease-old",
RendezvousLeaseReason: "auto_outbound_only",
}
trigger, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
"route_path_drift_detected": true,
}, now)
if !ok ||
trigger.Reason != "synthetic_route_health_drift" ||
trigger.RouteID != "route-ab" ||
trigger.PeerNodeID != "node-b" ||
trigger.SelectedRelayID != "node-r-old" ||
!trigger.DriftDetected {
t.Fatalf("unexpected drift trigger: %+v ok=%t", trigger, ok)
}
if _, ok := routeHealthFeedbackTriggerFromObservation(route, decision, true, "reachable", map[string]any{
"route_path_drift_detected": false,
}, now); ok {
t.Fatal("healthy route-health observation should not trigger refresh")
}
}
func TestMeshRouteGenerationTrackerReportsReplacementWithdrawOnFirstApply(t *testing.T) {
now := time.Date(2026, 4, 28, 13, 50, 0, 0, time.UTC)
report := &client.RoutePathDecisionReport{
SchemaVersion: "c17z18.route_path_decisions.v1",
Generation: "config-replacement",
ReplacementDecisionCount: 1,
Decisions: []client.RoutePathDecision{
{
DecisionID: "route-1-path-node-a-via-node-r-new",
RouteID: "route-1",
LocalNodeID: "node-a",
OriginalHops: []string{"node-a", "node-r-old", "node-r-new", "node-c"},
EffectiveHops: []string{"node-a", "node-r-new", "node-c"},
DecisionSource: "stale_relay_replacement",
Generation: "config-replacement",
LocalRole: "entry",
NextHopID: "node-r-new",
SelectedRelayID: "node-r-new",
StaleRelayNodeID: "node-r-old",
RendezvousLeaseID: "lease-node-c-via-node-r-new",
PathScore: 760,
ControlPlaneOnly: true,
ProductionForwarding: false,
RendezvousLeaseReason: "stale_relay_replacement",
SelectedRelayEndpoint: "quic://node-r-new:19443",
},
},
}
meshState := &syntheticMeshState{
Source: "control_plane",
ConfigVersion: "config-replacement",
RoutePathDecisions: report,
RouteGenerationTracker: newMeshRouteGenerationTracker(report, now),
}
generationReport := meshRouteGenerationReport(meshState, state.Identity{
ClusterID: "cluster-1",
NodeID: "node-a",
}, now.Add(time.Second))
if generationReport["active_decision_count"] != 1 ||
generationReport["applied_decision_count"] != 1 ||
generationReport["withdrawn_decision_count"] != 1 ||
generationReport["total_withdrawn_decision_count"] != 1 ||
generationReport["generation_changed"] != true {
t.Fatalf("unexpected first-apply route generation report: %+v", generationReport)
}
}
func TestProductionEnvelopeObservationSinkFromConfigCreatesBoundedSink(t *testing.T) {
sink := productionEnvelopeObservationSinkFromConfig(config.Config{
MeshProductionObservationSinkCapacity: 2,
})
if sink == nil {
t.Fatal("sink is nil")
}
if sink.Capacity() != 2 {
t.Fatalf("sink capacity = %d, want 2", sink.Capacity())
}
}
func TestProductionForwardingLogStateDistinguishesGateFromRuntime(t *testing.T) {
gateEnabled, runtimeEnabled := productionForwardingLogState(config.Config{
MeshProductionForwardingEnabled: true,
}, false)
if !gateEnabled {
t.Fatal("gateEnabled = false, want true")
}
if !runtimeEnabled {
t.Fatal("runtimeEnabled = false, want true")
}
gateEnabled, runtimeEnabled = productionForwardingLogState(config.Config{}, false)
if gateEnabled || runtimeEnabled {
t.Fatalf("default log state = gate:%t runtime:%t, want false/false", gateEnabled, runtimeEnabled)
}
gateEnabled, runtimeEnabled = productionForwardingLogState(config.Config{}, true)
if !gateEnabled || !runtimeEnabled {
t.Fatalf("signed control-plane log state = gate:%t runtime:%t, want true/true", gateEnabled, runtimeEnabled)
}
}
func TestMeshLinkStatusFromPeerProbeMapsDeferredForLatestLinks(t *testing.T) {
cases := map[string]string{
mesh.PeerConnectionProbeReachable: "reachable",
mesh.PeerConnectionProbeUnreachable: "unreachable",
mesh.PeerConnectionProbeDeferred: "degraded",
mesh.PeerConnectionProbeSkipped: "unknown",
"unexpected": "unknown",
}
for input, want := range cases {
if got := meshLinkStatusFromPeerProbe(input); got != want {
t.Fatalf("meshLinkStatusFromPeerProbe(%q) = %q, want %q", input, got, want)
}
}
}
func TestLogProductionObservationSinkMetricsToleratesNilState(t *testing.T) {
logProductionObservationSinkMetrics(nil)
logProductionObservationSinkMetrics(&syntheticMeshState{})
}
func TestLogProductionObservationSinkMetricsOnlyWhenChanged(t *testing.T) {
sink := mesh.NewProductionEnvelopeObservationSink(2)
meshState := &syntheticMeshState{ProductionObservationSink: sink}
var logs strings.Builder
previousOutput := log.Writer()
log.SetOutput(&logs)
defer log.SetOutput(previousOutput)
defer log.SetOutput(io.Discard)
logProductionObservationSinkMetrics(meshState)
firstLen := logs.Len()
if firstLen == 0 {
t.Fatal("first metrics log was not written")
}
logProductionObservationSinkMetrics(meshState)
if logs.Len() != firstLen {
t.Fatal("metrics log was written again without metric changes")
}
if err := sink.Observe(context.Background(), mesh.ProductionEnvelopeObservation{MessageID: "message-1"}); err != nil {
t.Fatalf("observe: %v", err)
}
logProductionObservationSinkMetrics(meshState)
if logs.Len() == firstLen {
t.Fatal("metrics log was not written after metric changes")
}
}
func TestProductionObservationSinkMetricsEqual(t *testing.T) {
a := mesh.ProductionEnvelopeObservationSinkMetrics{
Capacity: 2,
CurrentDepth: 1,
AcceptedTotal: 1,
DroppedOldest: 0,
}
if !productionObservationSinkMetricsEqual(a, a) {
t.Fatal("identical metrics were not equal")
}
b := a
b.DroppedOldest = 1
if productionObservationSinkMetricsEqual(a, b) {
t.Fatal("different metrics were equal")
}
}
func TestWebIngressForwardHandlerFromConfigVerifiesSignedEnvelope(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
keyID := "web-key-1"
handler := webIngressForwardHandlerFromConfig(config.Config{
WebIngressTrustedKeysJSON: webingress.TrustedKeysJSONForPublicKey(keyID, publicKey),
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-1"}, nil, nil)
if handler == nil {
t.Fatal("handler is nil")
}
envelope := webingress.FabricServiceChannelEnvelope{
SchemaVersion: webingress.FabricServiceChannelEnvelopeSchema,
RequestSchema: "rap.web_ingress.fabric_request.v1",
Method: http.MethodGet,
Path: "/admin/root",
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClass: "admin-ingress",
ObservedAt: time.Now().UTC().Format(time.RFC3339Nano),
EnvelopedAt: time.Now().UTC().Format(time.RFC3339Nano),
}
canonical, err := json.Marshal(envelope)
if err != nil {
t.Fatalf("marshal envelope: %v", err)
}
payload, err := json.Marshal(webingress.SignedFabricServiceChannelEnvelope{
SchemaVersion: webingress.SignedFabricServiceChannelEnvelopeSchema,
Envelope: envelope,
Signature: webingress.FabricEnvelopeSignature{
KeyID: keyID,
Alg: "ed25519",
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
},
})
if err != nil {
t.Fatalf("marshal signed envelope: %v", err)
}
responsePayload, err := handler(context.Background(), payload)
if err != nil {
t.Fatalf("handler: %v", err)
}
var response struct {
SchemaVersion string `json:"schema_version"`
StatusCode int `json:"status_code"`
BodyBase64 string `json:"body_b64"`
}
if err := json.Unmarshal(responsePayload, &response); err != nil {
t.Fatalf("decode response: %v", err)
}
if response.SchemaVersion != webingress.FabricRuntimeResponseSchema || response.StatusCode != http.StatusBadGateway || response.BodyBase64 == "" {
t.Fatalf("response = %+v", response)
}
}
func TestWebIngressForwardHandlerFromConfigDisabledWithoutTrustedKeys(t *testing.T) {
if handler := webIngressForwardHandlerFromConfig(config.Config{}, state.Identity{}, nil, nil); handler != nil {
t.Fatal("handler should be nil without trusted keys")
}
if handler := webIngressForwardHandlerFromConfig(config.Config{WebIngressTrustedKeysJSON: `{"bad":"key"}`}, state.Identity{}, nil, nil); handler != nil {
t.Fatal("handler should be nil with invalid trusted keys")
}
}
func TestWebIngressRuntimeServiceClassesFromConfig(t *testing.T) {
defaultClasses := webIngressRuntimeServiceClassesFromConfig(config.Config{})
if !containsString(defaultClasses, "admin-ingress") || !containsString(defaultClasses, "public-ingress") {
t.Fatalf("default classes = %v", defaultClasses)
}
limited := webIngressRuntimeServiceClassesFromConfig(config.Config{
WebIngressRuntimeServiceClasses: " admin-ingress, unknown, admin-ingress ",
})
if len(limited) != 1 || limited[0] != "admin-ingress" {
t.Fatalf("limited classes = %v", limited)
}
}
func TestWebIngressRuntimeReceiverReport(t *testing.T) {
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
report := webIngressRuntimeReceiverReport(config.Config{
WebIngressTrustedKeysJSON: webingress.TrustedKeysJSONForPublicKey("web-key-1", publicKey),
WebIngressRuntimeServiceClasses: "admin-ingress",
}, &syntheticMeshState{QUICFabricListenAddr: "127.0.0.1:19443"}, time.Date(2026, 5, 17, 1, 2, 3, 0, time.UTC))
if report["schema_version"] != "rap.web_ingress.runtime_receiver_report.v1" ||
report["enabled"] != true ||
report["handler_installed"] != true ||
report["status"] != "degraded" ||
report["reason"] != "quic_fabric_not_ready" ||
report["trusted_key_count"] != 1 ||
report["quic_stream_id"] != mesh.WebIngressForwardQUICStreamID ||
report["quic_fabric_enabled"] != true {
t.Fatalf("report = %+v", report)
}
classes, ok := report["service_classes"].([]string)
if !ok || len(classes) != 1 || classes[0] != "admin-ingress" {
t.Fatalf("service_classes = %#v", report["service_classes"])
}
}
func TestWebIngressRuntimeReceiverReportWithoutTrustedKeys(t *testing.T) {
report := webIngressRuntimeReceiverReport(config.Config{}, nil, time.Date(2026, 5, 17, 1, 2, 3, 0, time.UTC))
if report["enabled"] != false ||
report["handler_installed"] != false ||
report["status"] != "blocked" ||
report["reason"] != "trusted_keys_required" {
t.Fatalf("report = %+v", report)
}
}