Files
rdp-proxy/backend/internal/modules/cluster/service_test.go
T
m 20d361a886
build / backend (push) Has been cancelled
build / node-agent (push) Has been cancelled
build / worker (push) Has been cancelled
рабочий вариант, но скороть 10 МБит
2026-05-22 21:46:49 +03:00

12412 lines
450 KiB
Go

package cluster
import (
"context"
"encoding/json"
"errors"
"reflect"
"sort"
"strconv"
"strings"
"testing"
"time"
"github.com/example/remote-access-platform/backend/internal/platform/clusterauth"
"github.com/example/remote-access-platform/backend/internal/platform/secrets"
"github.com/jackc/pgx/v5"
)
func TestHashJoinTokenDoesNotStoreRawToken(t *testing.T) {
raw := "rap_join_example"
hashed, err := hashJoinToken(raw)
if err != nil {
t.Fatalf("hash join token: %v", err)
}
if hashed == raw {
t.Fatal("hash must not equal raw token")
}
if got, wantPrefix := hashed[:len(joinTokenHashPrefix)], joinTokenHashPrefix; got != wantPrefix {
t.Fatalf("hash prefix = %q, want %q", got, wantPrefix)
}
hashedAgain, err := hashJoinToken(raw)
if err != nil {
t.Fatalf("hash join token again: %v", err)
}
if hashed != hashedAgain {
t.Fatal("hash must be deterministic")
}
}
func TestClusterAuthorityPrivateKeyEncodingUsesSecretEncryptor(t *testing.T) {
encryptor, err := secrets.NewEncryptor("MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY=", "test-key")
if err != nil {
t.Fatalf("NewEncryptor: %v", err)
}
store := (&PostgresStore{}).WithClusterKeyEncryptor(encryptor)
encoded, err := store.encodeClusterAuthorityPrivateKey("cluster-1", "private-key")
if err != nil {
t.Fatalf("encodeClusterAuthorityPrivateKey: %v", err)
}
if encoded == "private-key" || !strings.HasPrefix(encoded, encryptedClusterAuthorityKeyPrefix) {
t.Fatalf("private key was not encrypted: %q", encoded)
}
decoded, err := store.decodeClusterAuthorityPrivateKey("cluster-1", encoded)
if err != nil {
t.Fatalf("decodeClusterAuthorityPrivateKey: %v", err)
}
if decoded != "private-key" {
t.Fatalf("decoded private key = %q", decoded)
}
if _, err := store.decodeClusterAuthorityPrivateKey("cluster-2", encoded); err == nil {
t.Fatal("expected wrong cluster AAD to fail")
}
}
func TestNodeUpdateHintAssignsUpdateServiceSubscription(t *testing.T) {
targetVersion := "0.2.15"
now := time.Date(2026, 5, 2, 8, 0, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "dev",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: now,
},
},
updateServiceCandidates: []NodeUpdateServiceCandidate{{
NodeID: "update-1",
NodeName: "update-cache-1",
Endpoint: "quic://10.0.0.5:19131",
Region: "office",
}},
}
service := NewService(repo)
service.now = func() time.Time { return now }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if !hint.CheckNow || hint.Generation == "" {
t.Fatalf("expected update hint generation, got %+v", hint)
}
if hint.DeliveryMode != "update_service_subscription" || hint.SubscriptionStatus != "subscribed" {
t.Fatalf("unexpected subscription state: %+v", hint)
}
if hint.RescuePollSeconds != 21600 {
t.Fatalf("rescue poll seconds = %d", hint.RescuePollSeconds)
}
if hint.UpdateService == nil || hint.UpdateService.NodeID != "update-1" || hint.UpdateService.Status != "assigned" {
t.Fatalf("unexpected update service assignment: %+v", hint.UpdateService)
}
if len(hint.UpdateServiceCandidates) != 1 || hint.UpdateServiceCandidates[0].NodeID != "update-1" || hint.UpdateServiceCandidates[0].Status != "assigned" {
t.Fatalf("unexpected update service candidates: %+v", hint.UpdateServiceCandidates)
}
if hint.AuthoritySignature == nil || len(hint.AuthorityPayload) == 0 {
t.Fatalf("expected signed update hint: %+v", hint)
}
if err := clusterauth.VerifyRaw(repo.clusterAuthority.PublicKey, hint.AuthorityPayload, *hint.AuthoritySignature); err != nil {
t.Fatalf("verify update hint authority signature: %v", err)
}
}
func TestTriggerNodeUpdateHintBumpsGeneration(t *testing.T) {
targetVersion := "0.2.15"
now := time.Date(2026, 5, 2, 8, 0, 0, 0, time.UTC)
repo := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "node-1",
RegistrationStatus: NodeRegistrationActive,
MembershipStatus: "active",
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "dev",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: now,
},
},
}
service := NewService(repo)
service.now = func() time.Time { return now }
before := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
service.now = func() time.Time { return now.Add(time.Second) }
after, err := service.TriggerNodeUpdateHint(context.Background(), "admin-1", "cluster-1", "node-1")
if err != nil {
t.Fatalf("trigger update hint: %v", err)
}
if !after.CheckNow || after.Generation == "" {
t.Fatalf("expected check-now hint, got %+v", after)
}
if after.Generation == before.Generation {
t.Fatalf("expected manual trigger to bump generation, before=%q after=%q", before.Generation, after.Generation)
}
if len(repo.auditEvents) != 1 || repo.auditEvents[0].EventType != "node_update_hint.triggered" {
t.Fatalf("expected trigger audit event, got %+v", repo.auditEvents)
}
}
func TestNodeUpdateHintIncludesOrderedFallbackUpdateServices(t *testing.T) {
targetVersion := "0.2.15"
now := time.Date(2026, 5, 2, 8, 0, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-2|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
Channel: "dev",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: now,
},
},
updateServiceCandidates: []NodeUpdateServiceCandidate{
{NodeID: "update-1", NodeName: "update-cache-1", Endpoint: "quic://10.0.0.5:19443", Region: "office"},
{NodeID: "node-2", NodeName: "self-update-cache", Endpoint: "quic://10.0.0.9:19443", Region: "office"},
{NodeID: "update-3", NodeName: "update-cache-3", Endpoint: "quic://10.0.0.6:19443", Region: "office"},
},
}
service := NewService(repo)
service.now = func() time.Time { return now }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-2")
if hint.UpdateService == nil || hint.UpdateService.NodeID != "node-2" {
t.Fatalf("unexpected primary update service: %+v", hint.UpdateService)
}
if len(hint.UpdateServiceCandidates) != 3 {
t.Fatalf("candidate count = %d, want 3", len(hint.UpdateServiceCandidates))
}
if hint.UpdateServiceCandidates[0].NodeID != "node-2" || hint.UpdateServiceCandidates[0].Status != "assigned" {
t.Fatalf("unexpected primary candidate ordering: %+v", hint.UpdateServiceCandidates)
}
if hint.UpdateServiceCandidates[1].NodeID != "update-1" || hint.UpdateServiceCandidates[1].Status != "standby" {
t.Fatalf("unexpected fallback candidate[1]: %+v", hint.UpdateServiceCandidates[1])
}
if hint.UpdateServiceCandidates[2].NodeID != "update-3" || hint.UpdateServiceCandidates[2].Status != "standby" {
t.Fatalf("unexpected fallback candidate[2]: %+v", hint.UpdateServiceCandidates[2])
}
}
func TestNodeUpdateHintFallsBackWhenNoUpdateServiceHealthy(t *testing.T) {
targetVersion := "0.2.15"
now := time.Date(2026, 5, 2, 8, 0, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "dev",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: now,
},
},
fabricRebuildAttempts: []FabricServiceChannelRouteRebuildAttempt{{
ID: "fsc-rebuild-guard-1",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
ServiceClass: FabricServiceClassVPNPackets,
RouteID: "route-bad",
ReplacementRouteID: "route-outside-exit",
RebuildRequestID: "fsc-remediation:channel-guard:rebuild_route:route-outside-exit",
RebuildStatus: "rejected",
RebuildReason: "replacement_exit_outside_signed_pool_policy",
DecisionSource: "service_channel_remediation_command",
Outcome: "policy_guard_rejected",
PolicyFingerprint: "pool-fingerprint-1",
CreatedAt: now,
UpdatedAt: now,
}},
}
service := NewService(repo)
service.now = func() time.Time { return now }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if !hint.CheckNow || hint.UpdateService == nil {
t.Fatalf("expected fallback hint with update service object, got %+v", hint)
}
if hint.UpdateService.Status != "control_plane_fallback" {
t.Fatalf("fallback status = %q", hint.UpdateService.Status)
}
}
func TestNodeUpdateHintBumpsGenerationForDisallowedLaggingNode(t *testing.T) {
targetVersion := "0.2.327-registryjoinrewrite"
policyUpdatedAt := time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: policyUpdatedAt,
},
},
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "ifcm-rufms-s-mo1cr",
HealthStatus: "healthy",
LastSeenAt: func() *time.Time {
value := time.Date(2026, 5, 19, 1, 0, 0, 0, time.UTC)
return &value
}(),
ReportedVersion: func() *string {
value := "0.2.322-controlendpointsrewrite"
return &value
}(),
}},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.322-controlendpointsrewrite",
TargetVersion: "0.2.327-registryjoinrewrite",
Phase: "plan",
Status: "noop",
ObservedAt: time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC),
}},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 19, 1, 0, 0, 0, time.UTC) }
hintA := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if !hintA.CheckNow || hintA.Generation == "" {
t.Fatalf("expected wake-capable generation for lagging standard node, got %+v", hintA)
}
if hintA.Reason != "update_execution_retry" {
t.Fatalf("hint reason = %q, want update_execution_retry", hintA.Reason)
}
service.now = func() time.Time { return time.Date(2026, 5, 19, 1, 11, 0, 0, time.UTC) }
hintB := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if hintB.Generation == hintA.Generation {
t.Fatalf("expected generation bump across wake epochs, got %q", hintB.Generation)
}
}
func TestNodeUpdateHintBumpsGenerationForWakeCapableLaggingNode(t *testing.T) {
targetVersion := "0.2.332-relaycertintentfix"
policyUpdatedAt := time.Date(2026, 5, 19, 2, 40, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: policyUpdatedAt,
},
},
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "home-1",
HealthStatus: "healthy",
LastSeenAt: func() *time.Time {
value := time.Date(2026, 5, 19, 2, 20, 0, 0, time.UTC)
return &value
}(),
ReportedVersion: func() *string {
value := "0.2.327-registryjoinrewrite"
return &value
}(),
}},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {{
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: strPtr("0.2.327-registryjoinrewrite"),
ObservedAt: time.Date(2026, 5, 19, 2, 48, 0, 0, time.UTC),
}},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.327-registryjoinrewrite",
TargetVersion: "0.2.332-relaycertintentfix",
Phase: "plan",
Status: "noop",
ObservedAt: time.Date(2026, 5, 19, 2, 30, 0, 0, time.UTC),
}},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 19, 2, 48, 0, 0, time.UTC) }
hintA := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if !hintA.CheckNow || hintA.Generation == "" {
t.Fatalf("expected wake-capable generation for lagging node, got %+v", hintA)
}
if hintA.Reason != "update_execution_retry" {
t.Fatalf("hint reason = %q, want update_execution_retry", hintA.Reason)
}
service.now = func() time.Time { return time.Date(2026, 5, 19, 3, 1, 0, 0, time.UTC) }
hintB := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if hintB.Generation == hintA.Generation {
t.Fatalf("expected generation bump across wake epochs for wake-capable node, got %q", hintB.Generation)
}
}
func TestNodeUpdateHintUsesCurrentPolicyTargetForWakeCapableLaggingNode(t *testing.T) {
targetVersion := "0.2.333-dockerjoinfix"
policyUpdatedAt := time.Date(2026, 5, 19, 3, 20, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: policyUpdatedAt,
},
},
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "home-1",
HealthStatus: "healthy",
LastSeenAt: func() *time.Time {
value := time.Date(2026, 5, 19, 3, 27, 0, 0, time.UTC)
return &value
}(),
ReportedVersion: func() *string {
value := "0.2.332-relaycertintentfix"
return &value
}(),
}},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.327-registryjoinrewrite",
TargetVersion: "0.2.332-relaycertintentfix",
Phase: "health_check",
Status: "succeeded",
ObservedAt: time.Date(2026, 5, 19, 3, 4, 0, 0, time.UTC),
}},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 19, 3, 27, 0, 0, time.UTC) }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if !hint.CheckNow || hint.Generation == "" {
t.Fatalf("expected update hint generation for node lagging behind current policy target, got %+v", hint)
}
}
func TestNodeUpdateHintMarksFreshNodeWithOldUpdateStatusAsExecutorStalled(t *testing.T) {
targetVersion := "0.2.346-updaterrescueintent"
policyUpdatedAt := time.Date(2026, 5, 19, 13, 0, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: policyUpdatedAt,
},
},
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "ifcm-rufms-s-mo1cr",
HealthStatus: "healthy",
LastSeenAt: ptrTime(time.Date(2026, 5, 19, 13, 30, 0, 0, time.UTC)),
ReportedVersion: func() *string {
value := "0.2.339-dockerretagfix"
return &value
}(),
}},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {{
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: strPtr("0.2.339-dockerretagfix"),
ObservedAt: time.Date(2026, 5, 19, 13, 30, 0, 0, time.UTC),
}},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.322-controlendpointsrewrite",
TargetVersion: "0.2.339-dockerretagfix",
Phase: "health_check",
Status: "succeeded",
ObservedAt: time.Date(2026, 5, 19, 9, 27, 36, 0, time.UTC),
}},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 19, 13, 30, 0, 0, time.UTC) }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if hint.Reason != "update_executor_stalled" {
t.Fatalf("hint reason = %q, want update_executor_stalled: %+v", hint.Reason, hint)
}
}
func TestNodeUpdateHintMarksFabricExecutorUpgradeRequired(t *testing.T) {
targetVersion := "0.2.354-relay-quic-labels"
now := time.Date(2026, 5, 20, 16, 40, 0, 0, time.UTC)
reportedVersion := "0.2.347-quicfarmupdate"
repo := &fakeRepository{
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-15 * time.Second)),
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: now.Add(-time.Hour),
},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353-sticky-fabric-control",
TargetVersion: targetVersion,
Phase: "apply",
Status: "failed",
Payload: json.RawMessage(`{"failure_class":"fabric_executor_upgrade_required","rescue_required":true}`),
ObservedAt: now.Add(-time.Minute),
}},
}
service := NewService(repo)
service.now = func() time.Time { return now }
hint := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if hint.Reason != "fabric_executor_upgrade_required" {
t.Fatalf("hint reason = %q, want fabric_executor_upgrade_required: %+v", hint.Reason, hint)
}
if !hint.CheckNow || hint.Generation == "" {
t.Fatalf("expected signed rescue hint generation: %+v", hint)
}
}
func TestNodeUpdateHintBumpsGenerationForDockerLaggingNode(t *testing.T) {
targetVersion := "0.2.333-dockerjoinfix"
policyUpdatedAt := time.Date(2026, 5, 19, 3, 20, 0, 0, time.UTC)
repo := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
UpdatedAt: policyUpdatedAt,
},
},
clusterNodes: []ClusterNode{{
ID: "node-1",
Name: "home-1",
HealthStatus: "healthy",
LastSeenAt: func() *time.Time {
value := time.Date(2026, 5, 19, 3, 27, 0, 0, time.UTC)
return &value
}(),
ReportedVersion: func() *string {
value := "0.2.332-relaycertintentfix"
return &value
}(),
}},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.327-registryjoinrewrite",
TargetVersion: "0.2.332-relaycertintentfix",
Phase: "health_check",
Status: "succeeded",
ObservedAt: time.Date(2026, 5, 19, 3, 4, 0, 0, time.UTC),
Payload: json.RawMessage(`{
"container_id":"docker-container-1",
"image":"rap-node-agent:0.2.332-relaycertintentfix"
}`),
}},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 19, 3, 27, 0, 0, time.UTC) }
hintA := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
service.now = func() time.Time { return time.Date(2026, 5, 19, 3, 31, 0, 0, time.UTC) }
hintB := service.GetNodeUpdateHint(context.Background(), "cluster-1", "node-1")
if hintA.Generation == hintB.Generation {
t.Fatalf("expected docker lagging node to get repeated wake-epoch bump, got %q", hintA.Generation)
}
}
func TestInferUpdateProfileDetectsDockerNodeAgentFromStatusPayload(t *testing.T) {
osValue, arch, installType, known := inferUpdateProfile("rap-node-agent", []NodeUpdateStatus{{
Product: "rap-node-agent",
Payload: json.RawMessage(`{
"container_id":"docker-container-1",
"image":"rap-node-agent:0.2.332-relaycertintentfix"
}`),
}})
if !known {
t.Fatal("expected docker node-agent profile to be known")
}
if osValue != "linux" || arch != "amd64" || installType != "docker" {
t.Fatalf("unexpected docker profile: os=%q arch=%q install=%q", osValue, arch, installType)
}
}
func TestGetStaleNodeRiskReportTreatsDockerLaggingNodeAsWakeUnsupported(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.332-relaycertintentfix"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-2 * time.Minute)),
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-node",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.334-updaterruntimestate",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "node-docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.2.334-updaterruntimestate", OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image_tar", URL: "/downloads/rap-node-agent.tar", SHA256: "node-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "canary",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
TargetVersion: strPtr("0.2.334-updaterruntimestate"),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"home","peer_cache_peers":7,"endpoint_candidates":[{"transport":"direct_quic","reachability":"public"}]},
"mesh_peer_connection_manager_report":{"probe_results":[{"node_id":"peer-a","candidate_results":[{"transport":"direct_quic","connection_state":"ready"}]}]},
"fabric_registry_runtime_report":{"status":"missing","resolved_service_count":0},
"update_runtime":{"reason":"host-agent updater active","current_version":"0.2.332-relaycertintentfix"}
}`),
}},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.327-registryjoinrewrite",
TargetVersion: "0.2.332-relaycertintentfix",
Phase: "health_check",
Status: "succeeded",
ObservedAt: now.Add(-2 * time.Hour),
Payload: json.RawMessage(`{
"container_id":"docker-container-1",
"image":"rap-node-agent:0.2.332-relaycertintentfix"
}`),
}},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-user",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("stale risk report: %v", err)
}
if len(report.Nodes) != 1 {
t.Fatalf("expected one node, got %+v", report.Nodes)
}
node := report.Nodes[0]
if node.Products[0].DetectedInstallType != "docker" {
t.Fatalf("expected docker install type, got %+v", node.Products[0])
}
if !node.Products[0].CompatibleArtifactFound {
t.Fatalf("expected compatible docker artifact to be reported, got %+v", node.Products[0])
}
if !node.UpdaterWakeUnsupported {
t.Fatalf("expected updater wake unsupported for docker lagging node, got %+v", node)
}
if !containsString(node.Alerts, "updater_wake_unsupported") {
t.Fatalf("expected updater_wake_unsupported alert, got %+v", node.Alerts)
}
}
func TestUpdateRuntimePresentFromHeartbeatMetadataRequiresRealStateOrTrigger(t *testing.T) {
if updateRuntimePresentFromHeartbeatMetadata(map[string]any{"reason": "host-agent updater active"}) {
t.Fatal("static update runtime reason alone must not count as runtime present")
}
if !updateRuntimePresentFromHeartbeatMetadata(map[string]any{"trigger_generation": "gen-1"}) {
t.Fatal("trigger generation should count as runtime present")
}
if !updateRuntimePresentFromHeartbeatMetadata(map[string]any{"host_agent_state_present": true}) {
t.Fatal("host agent state presence should count as runtime present")
}
}
func TestStaleNodeRiskUsesFreshHeartbeatHealthOverStoredOfflineState(t *testing.T) {
now := time.Date(2026, 5, 19, 2, 30, 0, 0, time.UTC)
reportedVersion := "0.2.327-registryjoinrewrite"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{{
ID: "node-home-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-5 * time.Minute)),
MembershipStatus: "active",
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-home-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-home-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &reportedVersion,
Enabled: true,
UpdatedAt: now.Add(-time.Hour),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-home-1": {{
ClusterID: "cluster-1",
NodeID: "node-home-1",
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"home","peer_cache_peers":7},
"fabric_registry_runtime_report":{"status":"active","resolved_service_count":3}
}`),
}},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-home-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.327-registryjoinrewrite",
TargetVersion: "0.2.327-registryjoinrewrite",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-time.Minute),
}},
}
service := NewService(store)
service.now = func() time.Time { return now }
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("GetStaleNodeRiskReport: %v", err)
}
if len(report.Nodes) != 1 {
t.Fatalf("nodes len = %d, want 1", len(report.Nodes))
}
if report.Nodes[0].HealthStatus != "healthy" {
t.Fatalf("node health status = %q, want healthy", report.Nodes[0].HealthStatus)
}
if report.Nodes[0].Blocked {
t.Fatalf("fresh healthy heartbeat should clear stale blocker, got %+v", report.Nodes[0])
}
}
func TestCreateJoinTokenRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.CreateJoinToken(context.Background(), CreateJoinTokenInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestCreateJoinTokenStoresHashOnlyAndReturnsRawOnce(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
service.now = func() time.Time { return time.Date(2026, 4, 26, 12, 0, 0, 0, time.UTC) }
created, err := service.CreateJoinToken(context.Background(), CreateJoinTokenInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Scope: json.RawMessage(`{"roles":["rdp-worker"]}`),
MaxUses: 1,
})
if err != nil {
t.Fatalf("create join token: %v", err)
}
if created.Token == "" {
t.Fatal("raw token must be returned to caller once")
}
if store.lastTokenHash == "" || store.lastTokenHash == created.Token {
t.Fatalf("stored token hash = %q, raw token = %q", store.lastTokenHash, created.Token)
}
if created.AuthoritySignature == nil || len(created.AuthorityPayload) == 0 {
t.Fatalf("created token missing authority signature: %+v", created.NodeJoinToken)
}
if err := clusterauth.VerifyRaw(store.clusterAuthority.PublicKey, created.AuthorityPayload, *created.AuthoritySignature); err != nil {
t.Fatalf("verify token authority signature: %v", err)
}
}
func TestUpdateClusterRequiresMutableAuthority(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "minority",
MutationMode: "read_only",
},
}
service := NewService(store)
_, err := service.UpdateCluster(context.Background(), UpdateClusterInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "Cluster One",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{}`),
})
if !errors.Is(err, ErrClusterReadOnly) {
t.Fatalf("err = %v, want ErrClusterReadOnly", err)
}
}
func TestUpdateClusterValidatesStatusAndMetadata(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.UpdateCluster(context.Background(), UpdateClusterInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "Cluster One",
Status: "unknown",
Metadata: json.RawMessage(`{}`),
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
_, err = service.UpdateCluster(context.Background(), UpdateClusterInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "Cluster One",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{`),
})
if err == nil || !strings.Contains(err.Error(), "metadata") {
t.Fatalf("err = %v, want metadata validation error", err)
}
}
func TestCreateNodeGroupValidatesNameAndMetadata(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.CreateNodeGroup(context.Background(), CreateNodeGroupInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: " ",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
_, err = service.CreateNodeGroup(context.Background(), CreateNodeGroupInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "DC-1",
Metadata: json.RawMessage(`{`),
})
if err == nil || !strings.Contains(err.Error(), "metadata") {
t.Fatalf("err = %v, want metadata validation error", err)
}
}
func TestAssignNodeToGroupPreservesConcreteMembership(t *testing.T) {
groupID := "group-1"
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
node, err := service.AssignNodeToGroup(context.Background(), AssignNodeGroupInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
GroupID: &groupID,
})
if err != nil {
t.Fatalf("assign node group: %v", err)
}
if node.ID != "node-1" || node.NodeGroupID == nil || *node.NodeGroupID != groupID {
t.Fatalf("unexpected node group assignment: %+v", node)
}
if store.lastAssignGroupInput.NodeID != "node-1" || store.lastAssignGroupInput.GroupID == nil {
t.Fatalf("assignment input not preserved: %+v", store.lastAssignGroupInput)
}
}
func TestCreateFabricEntryPointValidatesControlPlanePayload(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.CreateFabricEntryPoint(context.Background(), CreateFabricEntryPointInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: " ",
EndpointType: "client_access",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
_, err = service.CreateFabricEntryPoint(context.Background(), CreateFabricEntryPointInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "Main Entry",
EndpointType: "client_access",
Policy: json.RawMessage(`{`),
})
if err == nil || !strings.Contains(err.Error(), "valid json") {
t.Fatalf("err = %v, want json validation error", err)
}
}
func TestCreateFabricEgressPoolDefaultsAndAudits(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
item, err := service.CreateFabricEgressPool(context.Background(), CreateFabricEgressPoolInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "Office Moscow",
})
if err != nil {
t.Fatalf("create egress pool: %v", err)
}
if item.Status != "active" || string(item.RouteScope) != "{}" {
t.Fatalf("unexpected egress pool defaults: %+v", item)
}
if len(store.auditEvents) == 0 || store.auditEvents[len(store.auditEvents)-1].EventType != "fabric.egress_pool.created" {
t.Fatalf("missing egress pool audit event: %+v", store.auditEvents)
}
}
func TestAssignNodeRoleRejectsUnknownRole(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.AssignNodeRole(context.Background(), AssignNodeRoleInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Role: "can_run_rdp_worker",
})
if !errors.Is(err, ErrInvalidNodeRole) {
t.Fatalf("err = %v, want ErrInvalidNodeRole", err)
}
}
func TestAssignNodeRoleAllowsFabricNodeFunctions(t *testing.T) {
roles := []string{
"entry-node",
"relay-node",
"core-mesh",
"update-cache",
"file-storage-cache",
"public-ingress",
"admin-ingress",
"rdp-worker",
"vnc-worker",
"vpn-exit",
"vpn-connector",
"vpn-client",
"ipv4-egress",
"video-relay",
}
for _, role := range roles {
t.Run(role, func(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
item, err := service.AssignNodeRole(context.Background(), AssignNodeRoleInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Role: role,
})
if err != nil {
t.Fatalf("assign role: %v", err)
}
if item.Role != role {
t.Fatalf("role = %q, want %q", item.Role, role)
}
})
}
}
func TestFabricAdminServiceClassesAreScopedToAdminRoles(t *testing.T) {
cases := []struct {
serviceClass string
requiredRole string
pathNeedle string
}{
{FabricServiceClassAdminIngress, "admin-ingress", "admin"},
{FabricServiceClassAdminIngress, "admin-ingress", "clusters"},
{FabricServiceClassPublicIngress, "public-ingress", "organizations"},
{FabricServiceClassPublicIngress, "public-ingress", "users"},
}
for _, tc := range cases {
t.Run(tc.serviceClass, func(t *testing.T) {
if !isAllowedFabricServiceClass(tc.serviceClass) {
t.Fatalf("service class %q is not allowed", tc.serviceClass)
}
roles := normalizeFabricRequiredRoles(nil, tc.serviceClass)
if len(roles) != 1 || !containsString(roles, tc.requiredRole) {
t.Fatalf("required roles = %+v", roles)
}
channels := normalizeFabricServiceChannels(nil, tc.serviceClass)
if !containsString(channels, FabricChannelControl) || !containsString(channels, FabricChannelInteractive) || !containsString(channels, FabricChannelReliable) {
t.Fatalf("channels = %+v", channels)
}
ingress := fabricServiceChannelHTTPIngress(tc.serviceClass)
if ingress.Type != "fabric_quic_only" || ingress.PathTemplate != "" || ingress.WebSocketPathTemplate != "" || len(ingress.SupportedMethods) != 0 {
t.Fatalf("ingress must not expose HTTP/WebSocket transport paths: %+v", ingress)
}
})
}
}
func TestAttachExistingNodeRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.AttachExistingNodeToCluster(context.Background(), AttachExistingNodeInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
NodeID: "node-1",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestAttachExistingNodeRejectsUnknownRole(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.AttachExistingNodeToCluster(context.Background(), AttachExistingNodeInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Roles: []string{"can_run_rdp_worker"},
})
if !errors.Is(err, ErrInvalidNodeRole) {
t.Fatalf("err = %v, want ErrInvalidNodeRole", err)
}
}
func TestAttachExistingNodeUsesConcreteNodeAndRoles(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
node, err := service.AttachExistingNodeToCluster(context.Background(), AttachExistingNodeInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Roles: []string{"entry-node", "rdp-worker"},
})
if err != nil {
t.Fatalf("attach existing node: %v", err)
}
if node.ID != "node-1" || node.MembershipStatus != "active" {
t.Fatalf("unexpected node: %+v", node)
}
if store.lastAttachInput.NodeID != "node-1" || len(store.lastAttachInput.Roles) != 2 {
t.Fatalf("attach input not preserved: %+v", store.lastAttachInput)
}
}
func TestGetDockerInstallProfileBuildsRuntimeProfileFromTokenScope(t *testing.T) {
rawToken := "rap_join_profile"
tokenHash, err := hashJoinToken(rawToken)
if err != nil {
t.Fatalf("hash token: %v", err)
}
store := &fakeRepository{
validJoinToken: NodeJoinToken{
ID: "token-1",
ClusterID: "cluster-1",
Status: "active",
Scope: json.RawMessage(`{
"roles": ["core-mesh"],
"image": "registry.example.test/rap-node-agent:1",
"artifact_endpoints": ["quic://cache-a.example.test/artifacts/"],
"fabric_registry_records": [{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api","service_id":"control-a"}],
"docker_image_artifact_sha256": "abc123",
"mesh_connectivity_mode": "outbound_only",
"mesh_region": "customer-a",
"pull_image": true
}`),
},
}
service := NewService(store)
profile, err := service.GetDockerInstallProfile(context.Background(), DockerInstallProfileRequest{
ClusterID: "cluster-1",
InstallToken: rawToken,
NodeName: "Customer Node 1",
})
if err != nil {
t.Fatalf("profile: %v", err)
}
if store.lastLookupTokenHash != tokenHash {
t.Fatalf("token hash lookup = %q, want %q", store.lastLookupTokenHash, tokenHash)
}
if profile.JoinToken != rawToken ||
profile.ClusterAuthorityPublicKey == "" ||
profile.NodeName != "Customer Node 1" ||
profile.ContainerName != "rap-node-agent-customer-node-1" ||
profile.MeshConnectivityMode != "outbound_only" ||
profile.MeshRegion != "customer-a" ||
len(profile.ArtifactEndpoints) != 1 ||
profile.DockerImageArtifact == nil ||
profile.DockerImageArtifact.FileName != "registry.example.test-rap-node-agent-1.tar" ||
profile.DockerImageArtifact.URLs[0] != "quic://cache-a.example.test/artifacts/registry.example.test-rap-node-agent-1.tar" ||
profile.DockerImageArtifact.SHA256 != "abc123" ||
string(profile.FabricRegistryRecords) != `[{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api","service_id":"control-a"}]` ||
profile.EnrollmentPollTimeoutSeconds != 0 ||
!profile.PullImage ||
profile.FabricRuntimeEnabled ||
profile.FabricListenAddr != "" ||
profile.MeshProductionForwardingEnabled {
t.Fatalf("unexpected profile: %+v", profile)
}
}
func TestGetDockerJoinBundleBuildsSignedBundleFromTokenScope(t *testing.T) {
rawToken := "rap_join_profile"
store := &fakeRepository{
validJoinToken: NodeJoinToken{
ID: "token-1",
ClusterID: "cluster-1",
Status: "active",
Scope: json.RawMessage(`{
"roles": ["core-mesh"],
"image": "registry.example.test/rap-node-agent:1",
"artifact_endpoints": ["quic://cache-a.example.test/artifacts/"],
"fabric_registry_records": [{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api","service_id":"control-a"}]
}`),
},
}
service := NewService(store)
bundle, err := service.GetDockerJoinBundle(context.Background(), DockerInstallProfileRequest{
ClusterID: "cluster-1",
InstallToken: rawToken,
NodeName: "bundle-node-1",
})
if err != nil {
t.Fatalf("bundle: %v", err)
}
if bundle.SchemaVersion != "rap.install_join_bundle.v1" ||
bundle.BundleKind != "docker" ||
bundle.ClusterID != "cluster-1" ||
bundle.ClusterAuthority == nil ||
bundle.AuthoritySignature == nil ||
bundle.DockerInstallProfile == nil ||
bundle.DockerInstallProfile.NodeName != "bundle-node-1" ||
bundle.DockerInstallProfile.ClusterAuthorityPublicKey == "" {
t.Fatalf("unexpected bundle: %+v", bundle)
}
var payload struct {
DockerInstallProfile DockerInstallProfile `json:"docker_install_profile"`
}
if err := json.Unmarshal(bundle.AuthorityPayload, &payload); err != nil {
t.Fatalf("decode authority payload: %v", err)
}
if payload.DockerInstallProfile.NodeName != "bundle-node-1" {
t.Fatalf("unexpected authority payload: %+v", payload)
}
if err := clusterauth.VerifyRaw(bundle.ClusterAuthority.PublicKey, bundle.AuthorityPayload, *bundle.AuthoritySignature); err != nil {
t.Fatalf("verify authority signature: %v", err)
}
}
func TestGetDockerInstallProfileRequiresArtifactEndpoints(t *testing.T) {
rawToken := "rap_join_profile"
store := &fakeRepository{
validJoinToken: NodeJoinToken{
ID: "token-1",
ClusterID: "cluster-1",
Status: "active",
Scope: json.RawMessage(`{
"image": "rap-node-agent:dev"
}`),
},
}
service := NewService(store)
_, err := service.GetDockerInstallProfile(context.Background(), DockerInstallProfileRequest{
ClusterID: "cluster-1",
InstallToken: rawToken,
NodeName: "node-a",
})
if err == nil {
t.Fatal("expected missing artifact endpoints to be rejected")
}
}
func TestGetDockerInstallProfileDoesNotPinFloatingDevArtifactMetadata(t *testing.T) {
rawToken := "rap_join_profile"
store := &fakeRepository{
validJoinToken: NodeJoinToken{
ID: "token-1",
ClusterID: "cluster-1",
Status: "active",
Scope: json.RawMessage(`{
"artifact_endpoints": ["quic://cache-a.example.test/artifacts/"],
"image": "rap-node-agent:dev-enrollment-join-smoke"
}`),
},
}
service := NewService(store)
profile, err := service.GetDockerInstallProfile(context.Background(), DockerInstallProfileRequest{
ClusterID: "cluster-1",
InstallToken: rawToken,
NodeName: "node-a",
})
if err != nil {
t.Fatalf("profile: %v", err)
}
if profile.DockerImageArtifact == nil ||
profile.DockerImageArtifact.SHA256 != "" ||
profile.DockerImageArtifact.SizeBytes != 0 {
t.Fatalf("unexpected artifact metadata: %+v", profile.DockerImageArtifact)
}
}
func TestCreateJoinRequestRejectsExpiredOrRevokedToken(t *testing.T) {
store := &fakeRepository{validTokenErr: ErrInvalidJoinToken}
service := NewService(store)
_, err := service.CreateJoinRequest(context.Background(), CreateJoinRequestInput{
ClusterID: "cluster-1",
JoinToken: "rap_join_invalid",
NodeName: "node-a",
NodeFingerprint: "fingerprint-a",
PublicKey: "public-key",
})
if !errors.Is(err, ErrInvalidJoinToken) {
t.Fatalf("err = %v, want ErrInvalidJoinToken", err)
}
}
func TestRevokeJoinTokenRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.RevokeJoinToken(context.Background(), RevokeJoinTokenInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
TokenID: "token-1",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestApproveJoinRequestReturnsJoinContractContract(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
approved, err := service.ApproveJoinRequest(context.Background(), ApproveJoinRequestInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
JoinRequestID: "join-request-1",
NodeKey: "node-key-1",
})
if err != nil {
t.Fatalf("approve join request: %v", err)
}
if approved.JoinContract.ClusterID != "cluster-1" || approved.JoinContract.IdentityStatus == "" {
t.Fatalf("unexpected join contract: %+v", approved.JoinContract)
}
if approved.JoinContract.ClusterAuthority == nil || approved.JoinContract.AuthoritySignature == nil || len(approved.JoinContract.AuthorityPayload) == 0 {
t.Fatalf("join missing authority contract: %+v", approved.JoinContract)
}
if err := clusterauth.VerifyRaw(store.clusterAuthority.PublicKey, approved.JoinContract.AuthorityPayload, *approved.JoinContract.AuthoritySignature); err != nil {
t.Fatalf("verify approval authority signature: %v", err)
}
}
func TestApproveJoinRequestReturnsSignedQuorumDescriptor(t *testing.T) {
keys, err := clusterauth.GenerateKeyPair()
if err != nil {
t.Fatalf("generate key: %v", err)
}
quorum := &QuorumDescriptor{
SchemaVersion: clusterauth.QuorumSchemaVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 1,
Members: []clusterauth.QuorumMember{
{
NodeID: "authority-1",
Role: "update-authority",
PublicKey: keys.PublicKeyB64,
PublicKeyFingerprint: keys.Fingerprint,
Scopes: []string{"update-authority"},
},
},
}
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterAuthority: ClusterAuthorityKey{
ClusterAuthorityDescriptor: ClusterAuthorityDescriptor{
SchemaVersion: clusterauth.AuthoritySchemaVersion,
ClusterID: "cluster-1",
AuthorityState: "active",
KeyAlgorithm: clusterauth.AlgorithmEd25519,
PublicKey: keys.PublicKeyB64,
PublicKeyFingerprint: keys.Fingerprint,
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
},
PrivateKey: keys.PrivateKeyB64,
QuorumDescriptor: quorum,
},
}
service := NewService(store)
approved, err := service.ApproveJoinRequest(context.Background(), ApproveJoinRequestInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
JoinRequestID: "join-request-1",
NodeKey: "node-key-1",
})
if err != nil {
t.Fatalf("approve join request: %v", err)
}
if approved.JoinContract.ClusterAuthorityQuorum == nil {
t.Fatalf("join missing quorum descriptor: %+v", approved.JoinContract)
}
var payload clusterNodeApprovalAuthorityPayload
if err := json.Unmarshal(approved.JoinContract.AuthorityPayload, &payload); err != nil {
t.Fatalf("decode authority payload: %v", err)
}
quorumHash, err := clusterauth.QuorumDescriptorHash(*quorum)
if err != nil {
t.Fatalf("hash quorum: %v", err)
}
if payload.ClusterAuthorityQuorumSHA256 != quorumHash {
t.Fatalf("quorum hash = %q, want %q", payload.ClusterAuthorityQuorumSHA256, quorumHash)
}
}
func TestGetJoinRequestJoinReturnsSignedApproval(t *testing.T) {
nodeID := "node-1"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
joinJoinRequest: NodeJoinRequest{
ID: "join-request-1",
ClusterID: "cluster-1",
NodeFingerprint: "node-fp",
PublicKey: "node-public-key",
Status: JoinRequestStatusApproved,
ApprovedNodeID: &nodeID,
},
}
service := NewService(store)
result, err := service.GetJoinRequestJoin(context.Background(), GetJoinRequestJoinInput{
ClusterID: "cluster-1",
JoinRequestID: "join-request-1",
NodeFingerprint: "node-fp",
PublicKey: "node-public-key",
})
if err != nil {
t.Fatalf("get join request join: %v", err)
}
if result.JoinContract == nil || result.JoinContract.NodeID != nodeID || result.JoinContract.ClusterAuthority == nil {
t.Fatalf("unexpected join result: %+v", result)
}
if result.JoinContract.AuthoritySignature == nil || len(result.JoinContract.AuthorityPayload) == 0 {
t.Fatalf("join missing authority signature: %+v", result.JoinContract)
}
if err := clusterauth.VerifyRaw(store.clusterAuthority.PublicKey, result.JoinContract.AuthorityPayload, *result.JoinContract.AuthoritySignature); err != nil {
t.Fatalf("verify join authority signature: %v", err)
}
}
func TestSetDesiredWorkloadRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.SetDesiredWorkload(context.Background(), SetDesiredWorkloadInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "rdp-worker",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestSetDesiredWorkloadUnknownActorReturnsAccessDenied(t *testing.T) {
store := &fakeRepository{platformRoleErr: pgx.ErrNoRows}
service := NewService(store)
_, err := service.SetDesiredWorkload(context.Background(), SetDesiredWorkloadInput{
ActorUserID: "missing-user",
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
RuntimeMode: "container",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestSetDesiredWorkloadInvalidActorUUIDReturnsAccessDenied(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
_, err := service.SetDesiredWorkload(context.Background(), SetDesiredWorkloadInput{
ActorUserID: "codex",
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
RuntimeMode: "container",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestSetDesiredWorkloadRejectsUnsupportedRuntimeMode(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.SetDesiredWorkload(context.Background(), SetDesiredWorkloadInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
RuntimeMode: "systemd",
Config: json.RawMessage(`{}`),
Environment: json.RawMessage(`{}`),
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestListDesiredWorkloadsAllowsNodeScopedAgentReadWithoutActor(t *testing.T) {
store := &fakeRepository{
desiredWorkloads: []NodeWorkloadDesiredState{{
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "synthetic.echo",
DesiredState: "enabled",
RuntimeMode: "native",
Config: json.RawMessage(`{}`),
Environment: json.RawMessage(`{}`),
}},
}
service := NewService(store)
items, err := service.ListDesiredWorkloads(context.Background(), "", "cluster-1", "node-1")
if err != nil {
t.Fatalf("list desired workloads: %v", err)
}
if len(items) != 1 || items[0].ServiceType != "synthetic.echo" {
t.Fatalf("unexpected desired workloads: %+v", items)
}
}
func TestReportWorkloadStatusDefaultsToSafeStubState(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
status, err := service.ReportWorkloadStatus(context.Background(), ReportWorkloadStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "rdp-worker",
})
if err != nil {
t.Fatalf("report workload status: %v", err)
}
if status.ReportedState != "unknown" || status.RuntimeMode != "container" {
t.Fatalf("unexpected status defaults: %+v", status)
}
}
func TestReportWorkloadStatusRejectsUnsupportedRuntimeMode(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
_, err := service.ReportWorkloadStatus(context.Background(), ReportWorkloadStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
ServiceType: "fabric-listener",
RuntimeMode: "systemd",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestReportMeshLinkDoesNotRequirePlatformAdmin(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
link, err := service.ReportMeshLink(context.Background(), ReportMeshLinkInput{
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetNodeID: "node-b",
LinkStatus: "reachable",
})
if err != nil {
t.Fatalf("report mesh link: %v", err)
}
if link.LinkStatus != "reachable" {
t.Fatalf("LinkStatus = %q", link.LinkStatus)
}
}
func TestCreateRouteIntentRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.CreateRouteIntent(context.Background(), CreateRouteIntentInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
ServiceClass: "input",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestGetVPNClientProfileEnsuresFabricVPNPacketRouteIntents(t *testing.T) {
repo := &fakeRepository{
vpnClientProfile: VPNClientProfile{
SchemaVersion: "rap.vpn_client_profile.v1",
Connections: []VPNClientConnection{{
ID: "vpn-1",
TargetEndpoint: json.RawMessage(`{"type":"fabric_ipv4_exit_pool","exit_pool_ids":["home-ipv4"]}`),
ClientConfig: json.RawMessage(`{
"vpn_fabric_route": {
"status": "planned",
"selected_entry_node_id": "entry-1",
"selected_exit_node_id": "exit-1"
},
"vpn_entry_endpoint_candidates": [{
"node_id": "entry-1",
"endpoint_id": "public-http",
"transport": "direct_quic",
"address": "quic://entry.example.test:19131",
"reachability": "public",
"priority": 0
}]
}`),
}},
},
}
service := NewService(repo)
service.now = func() time.Time { return time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC) }
profile, err := service.GetVPNClientProfile(context.Background(), "cluster-1", "org-1", "user-1", "entry-1")
if err != nil {
t.Fatalf("GetVPNClientProfile: %v", err)
}
if len(profile.Connections) != 1 {
t.Fatalf("profile connections = %d, want 1", len(profile.Connections))
}
var cfg map[string]any
if err := json.Unmarshal(profile.Connections[0].ClientConfig, &cfg); err != nil {
t.Fatalf("unmarshal client config: %v", err)
}
session, ok := cfg["vpn_dataplane_session"].(map[string]any)
if !ok {
t.Fatalf("missing vpn_dataplane_session in %#v", cfg)
}
if session["preferred_transport"] != "fabric_mesh_node_route_v1" || session["fallback_transport"] != "none" || session["backend_relay_allowed"] != false {
t.Fatalf("unexpected dataplane session transports: %#v", session)
}
request, ok := session["fabric_service_channel_request"].(map[string]any)
if !ok {
t.Fatalf("missing fabric service channel request in %#v", session)
}
if request["service_class"] != "vpn_packets" || request["source_role"] != "ipv4-ingress" {
t.Fatalf("unexpected fabric service channel request: %#v", request)
}
target := request["target"].(map[string]any)
poolIDs := target["pool_ids"].([]any)
if target["kind"] != "pool" || target["service_role"] != "ipv4-egress" || len(poolIDs) != 1 || poolIDs[0] != "home-ipv4" {
t.Fatalf("unexpected fabric service channel target: %#v", target)
}
adapter := request["adapter_contract"].(map[string]any)
if adapter["adapter_may_select_endpoint"] != false || adapter["adapter_may_use_degraded_route"] != false {
t.Fatalf("vpn adapter must not own transport decisions: %#v", adapter)
}
if adapter["adapter"] != "ipv4-ingress" || adapter["adapter_role"] != "local_ipv4_packet_adapter" {
t.Fatalf("unexpected ipv4 ingress adapter contract: %#v", adapter)
}
routeBundle, ok := session["fabric_route_bundle"].(map[string]any)
if !ok || routeBundle["standard_visibility"] != "opaque_to_service_adapters" {
t.Fatalf("missing opaque route bundle: %#v", session["fabric_route_bundle"])
}
routeLease, ok := routeBundle["route_lease"].(map[string]any)
if !ok || routeLease["schema_version"] != "rap.fabric_route_lease.v1" || routeLease["service_visibility"] != "opaque_route_lease" {
t.Fatalf("missing route lease: %#v", routeBundle["route_lease"])
}
rebuildPolicy := routeLease["rebuild_policy"].(map[string]any)
if rebuildPolicy["owner"] != "fabric_farm" || rebuildPolicy["service_adapter_action"] != "keep_sending_packets_to_channel" {
t.Fatalf("unexpected route lease rebuild policy: %#v", rebuildPolicy)
}
if session["entry_node_id"] != "entry-1" || session["pool_id"] != "home-ipv4" || session["service_id"] == "" || session["tunnel_id"] == "" {
t.Fatalf("unexpected dataplane session route: %#v", session)
}
if _, ok := session["exit_node_id"]; ok {
t.Fatalf("dataplane session must not expose selected exit node to service adapters: %#v", session)
}
serviceTunnel, ok := session["fabric_service_tunnel"].(map[string]any)
if !ok || serviceTunnel["schema_version"] != "rap.fabric_service_tunnel.v1" || serviceTunnel["pool_id"] != "home-ipv4" {
t.Fatalf("missing service tunnel contract: %#v", session["fabric_service_tunnel"])
}
if serviceTunnel["selected_node_visible"] != false || serviceTunnel["route_visibility"] != "opaque_to_service" {
t.Fatalf("service tunnel must hide node selection: %#v", serviceTunnel)
}
if serviceTunnel["data_plane"] != "fabric_quic_streams" ||
serviceTunnel["transport_owner"] != "fabric_farm" ||
serviceTunnel["stream_shards"] != float64(8) {
t.Fatalf("service tunnel must expose fabric-owned stream policy: %#v", serviceTunnel)
}
if serviceTunnel["route_lease_id"] == "" || serviceTunnel["route_generation"] == "" ||
session["route_lease_id"] != serviceTunnel["route_lease_id"] ||
session["route_generation"] != serviceTunnel["route_generation"] {
t.Fatalf("service tunnel/session must carry route lease epoch: session=%#v tunnel=%#v", session, serviceTunnel)
}
trafficClasses := serviceTunnel["traffic_classes"].([]any)
if len(trafficClasses) != 6 || trafficClasses[1] != "dns" {
t.Fatalf("service tunnel traffic classes missing dns/reliable split: %#v", trafficClasses)
}
entryCandidates := session["entry_candidates"].([]any)
if len(entryCandidates) != 1 {
t.Fatalf("entry candidate count = %d, want 1", len(entryCandidates))
}
entryCandidate := entryCandidates[0].(map[string]any)
if entryCandidate["status"] != "selected_endpoint_public" {
t.Fatalf("unexpected entry candidate: %#v", entryCandidate)
}
transportCandidates := session["transport_candidates"].([]any)
var foundQUICRoute bool
for _, rawCandidate := range transportCandidates {
candidate := rawCandidate.(map[string]any)
if candidate["type"] == "fabric_mesh_node_route_v1" {
foundQUICRoute = true
if candidate["status"] != "contract_ready_quic_fabric_route_required" || candidate["backend_relay_allowed"] != false {
t.Fatalf("unexpected QUIC fabric route candidate: %#v", candidate)
}
if _, ok := candidate["exit_node_id"]; ok {
t.Fatalf("transport candidate must not expose selected exit node to service adapters: %#v", candidate)
}
if candidate["target_kind"] != "pool" {
t.Fatalf("transport candidate must target a pool: %#v", candidate)
}
}
}
if !foundQUICRoute || len(transportCandidates) != 1 {
t.Fatalf("missing single QUIC fabric route candidate in %#v", transportCandidates)
}
auth := session["auth"].(map[string]any)
if auth["type"] != "control_plane_issued_bearer" || auth["node_validation"] != "ipv4_ingress_node_identity_and_policy" {
t.Fatalf("unexpected dataplane session auth: %#v", auth)
}
if got := len(repo.createdRouteIntents); got != 2 {
t.Fatalf("created route intents = %d, want 2", got)
}
for _, input := range repo.createdRouteIntents {
if input.ClusterID != "cluster-1" || input.ServiceClass != "vpn_packets" || input.Priority != 10 {
t.Fatalf("unexpected route intent input: %+v", input)
}
var policy syntheticRoutePolicy
if err := json.Unmarshal(input.Policy, &policy); err != nil {
t.Fatalf("unmarshal policy: %v", err)
}
if !policy.SyntheticEnabled || !containsString(policy.AllowedChannels, "vpn_packet") || !containsString(policy.AllowedChannels, "fabric_control") || len(policy.Hops) != 2 {
t.Fatalf("policy = %+v", policy)
}
}
}
func TestGetVPNClientProfileForwardsPreferredExit(t *testing.T) {
repo := &fakeRepository{
vpnClientProfile: VPNClientProfile{
SchemaVersion: "rap.vpn_client_profile.v1",
Connections: []VPNClientConnection{{
ID: "vpn-1",
ClientConfig: json.RawMessage(`{
"vpn_fabric_route": {
"status": "planned",
"selected_entry_node_id": "entry-1",
"selected_exit_node_id": "exit-1"
}
}`),
}},
},
}
service := NewService(repo)
if _, err := service.GetVPNClientProfile(context.Background(), "cluster-1", "org-1", "user-1", "entry-1", "exit-2"); err != nil {
t.Fatalf("GetVPNClientProfile: %v", err)
}
if repo.lastPreferredEntryNodeID != "entry-1" {
t.Fatalf("preferred entry = %q, want entry-1", repo.lastPreferredEntryNodeID)
}
if repo.lastPreferredExitNodeID != "exit-2" {
t.Fatalf("preferred exit = %q, want exit-2", repo.lastPreferredExitNodeID)
}
}
func TestVPNDirectHTTPEntryTransportUsesFarmLocalRouteWhenEntryIsExit(t *testing.T) {
t.Skip("direct HTTP entry transport removed from the QUIC-only fabric dataplane")
candidate := vpnDirectHTTPEntryTransportCandidate(vpnClientFabricRoute{
SelectedEntryNodeID: "node-1",
SelectedExitNodeID: "node-1",
}, []map[string]any{{
"node_id": "node-1",
"reachability": "public",
}})
if candidate == nil {
t.Fatal("candidate is nil")
}
if candidate["safe_client_switch"] != true || candidate["status"] != "available_farm_local_route" {
t.Fatalf("unexpected farm local route guard: %#v", candidate)
}
}
func TestVPNDirectHTTPEntryTransportIgnoresDisallowedLocalGatewayShortcut(t *testing.T) {
t.Skip("direct HTTP entry transport removed from the QUIC-only fabric dataplane")
candidate := vpnDirectHTTPEntryTransportCandidate(vpnClientFabricRoute{
SelectedEntryNodeID: "node-1",
SelectedExitNodeID: "node-1",
}, []map[string]any{{
"node_id": "node-1",
"reachability": "public",
"local_gateway_shortcut": true,
}})
if candidate == nil {
t.Fatal("candidate is nil")
}
if candidate["safe_client_switch"] != true || candidate["status"] != "available_farm_local_route" {
t.Fatalf("unexpected farm route candidate: %#v", candidate)
}
}
func TestGetNodeSyntheticMeshConfigRequiresTestingFlag(t *testing.T) {
service := NewService(&fakeRepository{})
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.Enabled {
t.Fatal("config must be disabled when synthetic testing flag is off")
}
if len(cfg.Routes) != 0 || len(cfg.PeerEndpoints) != 0 {
t.Fatalf("disabled config must not leak topology: %+v", cfg)
}
}
func TestNodeUpdatePlanSelectsMatchingReleaseArtifact(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
releaseVersions: []ReleaseVersion{
{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.1.0-c17z26",
Channel: "dev",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "linux", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.1.0-c17z26", OS: "linux", Arch: "amd64", InstallType: "service", Kind: "binary", URL: "quic://cache/agent", SHA256: "linux-sha"},
{ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.1.0-c17z26", OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "docker_image_tar", URL: "quic://cache/agent.tar", SHA256: "docker-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "dev",
Strategy: "manual",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.1.0-c17z25",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "update" ||
plan.TargetVersion != "0.1.0-c17z26" ||
plan.Artifact == nil ||
plan.Artifact.ID != "docker" ||
plan.ProductionForwarding {
t.Fatalf("unexpected update plan: %+v", plan)
}
if plan.AuthoritySignature == nil || len(plan.AuthorityPayload) == 0 {
t.Fatalf("update plan must be signed: %+v", plan)
}
}
func TestNodeUpdatePlanRollingWaitsWhenAnotherNodeHasActiveLease(t *testing.T) {
now := time.Now().UTC()
target := "0.2.342-dockerrescuereconcile"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-1", Name: "node-1", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
{ID: "node-2", Name: "node-2", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
},
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: target,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: target,
OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image_tar",
URL: "quic://cache/agent.tar", SHA256: "docker-sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &target,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
UpdatedAt: now,
},
"node-2|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &target,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
UpdatedAt: now,
},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.341-hostrescuepoll",
TargetVersion: target,
Phase: "download",
Status: "started",
ObservedAt: now,
}},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
CurrentVersion: "0.2.341-hostrescuepoll",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "none" || plan.Reason != "rollout_lease_wait" {
t.Fatalf("plan should wait for rolling lease, got %+v", plan)
}
if plan.UpdateIntent == nil || !plan.UpdateIntent.RequiredLease || plan.UpdateIntent.TargetVersion != target {
t.Fatalf("missing signed update intent: %+v", plan.UpdateIntent)
}
if plan.RolloutLease == nil || plan.RolloutLease.Status != "waiting" || plan.RolloutLease.ActiveUpdateCnt != 1 || plan.RolloutLease.MaxParallel != 1 {
t.Fatalf("unexpected rollout lease: %+v", plan.RolloutLease)
}
if plan.Artifact != nil {
t.Fatalf("waiting plan must not include executable artifact: %+v", plan.Artifact)
}
}
func TestNodeUpdatePlanRollingGrantsLeaseWhenNoActiveUpdate(t *testing.T) {
target := "0.2.342-dockerrescuereconcile"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{{ID: "node-1", Name: "node-1", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"}},
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: target,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: target,
OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image_tar",
URL: "quic://cache/agent.tar", SHA256: "docker-sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &target,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.341-hostrescuepoll",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "update" || plan.RolloutLease == nil || plan.RolloutLease.Status != "granted" {
t.Fatalf("plan should grant rolling lease, got %+v", plan)
}
if plan.UpdateIntent == nil || !plan.UpdateIntent.RequiredLease {
t.Fatalf("missing update intent: %+v", plan.UpdateIntent)
}
}
func TestNodeUpdatePlanRollingReservesLeaseAtPlanFetch(t *testing.T) {
target := "0.2.343-updateintentstate"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-1", Name: "node-1", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
{ID: "node-2", Name: "node-2", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
},
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: target,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: target,
OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image_tar",
URL: "quic://cache/agent.tar", SHA256: "docker-sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{},
}
for _, nodeID := range []string{"node-1", "node-2"} {
store.nodeUpdatePolicies[nodeID+"|rap-node-agent"] = NodeUpdatePolicy{
ClusterID: "cluster-1",
NodeID: nodeID,
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &target,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
}
}
service := NewService(store)
first, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.342-dockerrescuereconcile",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("first update plan: %v", err)
}
if first.Action != "update" || first.RolloutLease == nil || first.RolloutLease.Status != "granted" {
t.Fatalf("first plan should reserve granted lease, got %+v", first)
}
second, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
CurrentVersion: "0.2.342-dockerrescuereconcile",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("second update plan: %v", err)
}
if second.Action != "none" || second.Reason != "rollout_lease_wait" || second.Artifact != nil {
t.Fatalf("second plan should wait on reserved lease, got %+v", second)
}
if second.RolloutLease == nil || second.RolloutLease.Status != "waiting" || second.RolloutLease.ActiveUpdateCnt != 1 {
t.Fatalf("second plan should include waiting lease, got %+v", second.RolloutLease)
}
store.updateStatuses = []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
TargetVersion: target,
Phase: "health_check",
Status: "succeeded",
ObservedAt: time.Now().UTC(),
}}
afterTerminal, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
CurrentVersion: "0.2.342-dockerrescuereconcile",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("second update plan after terminal status: %v", err)
}
if afterTerminal.Action != "update" || afterTerminal.RolloutLease == nil || afterTerminal.RolloutLease.Status != "granted" {
t.Fatalf("terminal status should release reserved lease, got %+v", afterTerminal)
}
}
func TestNodeUpdatePlanRollingIgnoresOlderActiveStatusAfterTerminalStatus(t *testing.T) {
now := time.Now().UTC()
target := "0.2.343-updateintentstate"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-1", Name: "node-1", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
{ID: "node-2", Name: "node-2", RegistrationStatus: NodeRegistrationActive, MembershipStatus: "active"},
},
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: target,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: target,
OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image_tar",
URL: "quic://cache/agent.tar", SHA256: "docker-sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-2|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &target,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{ClusterID: "cluster-1", NodeID: "node-1", Product: "rap-node-agent", TargetVersion: target, Phase: "health_check", Status: "succeeded", ObservedAt: now},
{ClusterID: "cluster-1", NodeID: "node-1", Product: "rap-node-agent", TargetVersion: target, Phase: "download", Status: "started", ObservedAt: now.Add(-5 * time.Second)},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-2",
Product: "rap-node-agent",
CurrentVersion: "0.2.342-dockerrescuereconcile",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "update" || plan.RolloutLease == nil || plan.RolloutLease.Status != "granted" {
t.Fatalf("terminal status should release older active status, got %+v", plan)
}
}
func TestNodeUpdatePlanIncludesQuorumAuthorityWhenConfigured(t *testing.T) {
keys, err := clusterauth.GenerateKeyPair()
if err != nil {
t.Fatalf("generate key: %v", err)
}
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterAuthority: ClusterAuthorityKey{
ClusterAuthorityDescriptor: ClusterAuthorityDescriptor{
SchemaVersion: clusterauth.AuthoritySchemaVersion,
ClusterID: "cluster-1",
AuthorityState: "active",
KeyAlgorithm: clusterauth.AlgorithmEd25519,
PublicKey: keys.PublicKeyB64,
PublicKeyFingerprint: keys.Fingerprint,
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
},
PrivateKey: keys.PrivateKeyB64,
QuorumDescriptor: &QuorumDescriptor{
SchemaVersion: clusterauth.QuorumSchemaVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 1,
Members: []clusterauth.QuorumMember{
{
NodeID: "authority-1",
Role: "update-authority",
PublicKey: keys.PublicKeyB64,
PublicKeyFingerprint: keys.Fingerprint,
Scopes: []string{"update-authority"},
},
},
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.1.0-c17z26",
Channel: "dev",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.1.0-c17z26", OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "docker_image_tar", URL: "quic://cache/agent.tar", SHA256: "docker-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "dev",
Strategy: "manual",
Enabled: true,
RollbackAllowed: true,
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.1.0-c17z25",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.AuthorityQuorum == nil {
t.Fatalf("update plan must include quorum envelope: %+v", plan)
}
if err := clusterauth.VerifyQuorumRaw(*store.clusterAuthority.QuorumDescriptor, plan.AuthorityPayload, *plan.AuthorityQuorum, "update-authority"); err != nil {
t.Fatalf("verify quorum authority: %v", err)
}
}
func TestNodeUpdatePlanAbsolutizesRelativeArtifactURLs(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
releaseVersions: []ReleaseVersion{
{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.93",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{
ID: "docker",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.93",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
Kind: "docker_image_tar",
URL: "/downloads/rap-node-agent-0.2.93-docker-amd64.tar",
SHA256: "docker-sha",
Metadata: json.RawMessage(`{"urls":["/downloads/mirror.tar","quic://cdn.example.test/agent.tar"]}`),
},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.92",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
ArtifactOrigin: "quic://vpn.cin.su:19191/api/v1",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Artifact == nil {
t.Fatal("expected artifact")
}
if plan.Artifact.URL != "quic://vpn.cin.su:19191/downloads/rap-node-agent-0.2.93-docker-amd64.tar" {
t.Fatalf("artifact reference was not absolutized: %q", plan.Artifact.URL)
}
wantMirror := "quic://vpn.cin.su:19191/downloads/mirror.tar"
wantCDN := "quic://cdn.example.test/agent.tar"
if len(plan.Artifact.URLs) < 3 || plan.Artifact.URLs[1] != wantMirror || plan.Artifact.URLs[2] != wantCDN {
t.Fatalf("artifact references were not preserved/absolutized: %#v", plan.Artifact.URLs)
}
}
func TestHostAgentUpdatePlanRejectsLinuxArtifactForObservedWindowsNode(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
releaseVersions: []ReleaseVersion{
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.95",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "linux", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.95", OS: "linux", Arch: "amd64", InstallType: "linux_binary", Kind: "binary", URL: "/downloads/rap-host-agent-linux", SHA256: "linux-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Phase: "plan",
Status: "noop",
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\node\\rap-node-agent.exe","task":"RAP Node Agent node"}`),
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.92",
OS: "linux",
Arch: "amd64",
InstallType: "linux_binary",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "none" || plan.Reason != "host_agent_artifact_platform_mismatch" || plan.Artifact != nil {
t.Fatalf("unexpected mismatch plan: %+v", plan)
}
}
func TestHostAgentUpdatePlanAllowsWindowsArtifactForObservedWindowsNode(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
releaseVersions: []ReleaseVersion{
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.95",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "windows", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.95", OS: "windows", Arch: "amd64", InstallType: "windows_binary", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "windows-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Phase: "plan",
Status: "noop",
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\node\\rap-node-agent.exe"}`),
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.92",
OS: "windows",
Arch: "amd64",
InstallType: "windows_binary",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "update" || plan.Artifact == nil || plan.Artifact.ID != "windows" {
t.Fatalf("unexpected windows plan: %+v", plan)
}
}
func TestHostAgentUpdatePlanAcceptsWindowsServiceArtifactForWindowsBinaryRequest(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
releaseVersions: []ReleaseVersion{
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.95",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "windows-service", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.95", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "windows-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Phase: "plan",
Status: "noop",
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\node\\rap-node-agent.exe"}`),
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.92",
OS: "windows",
Arch: "amd64",
InstallType: "windows_binary",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "update" || plan.Artifact == nil || plan.Artifact.ID != "windows-service" {
t.Fatalf("unexpected windows Compatibility plan: %+v", plan)
}
}
func TestStaleNodeRiskReportTracksWindowsRecoveryCompatibility(t *testing.T) {
staleAt := time.Now().UTC().Add(-time.Hour)
reportedVersion := "0.2.309-latencyaware"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: &staleAt,
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-node",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.318-quic-decoupled",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "node-win-svc", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.2.318-quic-decoupled", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-node-agent.exe", SHA256: "node-sha"},
},
},
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.284-quorumauthority",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "host-win-svc", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.284-quorumauthority", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "host-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "manual",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.309-latencyaware",
Phase: "plan",
Status: "noop",
ObservedAt: time.Now().UTC().Add(-30 * time.Minute),
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-node-agent.exe","task":"RAP Node Agent ifcm-rufms-s-mo1cr","reason":"already_current"}`),
},
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.183",
Phase: "plan",
Status: "noop",
ObservedAt: time.Now().UTC().Add(-30 * time.Minute),
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-host-agent.exe","reason":"no_matching_artifact"}`),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"peer_recovery_ready": 2,
"peer_recovery_deficit": 1,
"region": "ifcm"
},
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"fabric_registry_runtime_report": {
"status": "candidate_only",
"resolved_service_count": 0
},
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
},
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
if len(report.Nodes) != 1 {
t.Fatalf("expected one node, got %d", len(report.Nodes))
}
node := report.Nodes[0]
if !node.HeartbeatStale {
t.Fatalf("expected stale heartbeat: %+v", node)
}
if !node.DirectPeerAlert || node.DirectPeerReadyCount != 2 || node.DirectPeerTargetCount != 3 || node.DirectPeerDeficit != 1 {
t.Fatalf("expected direct peer alert 2/3 with deficit 1: %+v", node)
}
if node.Area != "ifcm" {
t.Fatalf("node area = %q, want ifcm", node.Area)
}
if !node.StandardControlDependency || node.StandardControlURL != "quic://vpn.cin.su:19191/api/v1" {
t.Fatalf("expected standard control dependency on node: %+v", node)
}
if node.RegistryRuntimeStatus != "candidate_only" || node.ResolvedServiceCount != 0 {
t.Fatalf("expected candidate-only registry status on node: %+v", node)
}
if !node.Blocked {
t.Fatalf("expected stale node to be marked blocked: %+v", node)
}
if report.FabricStandardCleanupAllowed {
t.Fatalf("fabric standard cleanup must stay blocked while stale nodes remain: %+v", report)
}
if len(report.BlockedOperations) != 4 {
t.Fatalf("expected blocked operations to include standard control and bridge hold removal: %+v", report)
}
if report.Summary.ArtifactGapNodes != 0 {
t.Fatalf("artifact gap nodes = %d, want 0", report.Summary.ArtifactGapNodes)
}
if report.Summary.StandardControlDependencyNodes != 1 {
t.Fatalf("standard control dependency nodes = %d, want 1", report.Summary.StandardControlDependencyNodes)
}
if report.Summary.RegistryCandidateOnlyNodes != 1 {
t.Fatalf("registry candidate-only nodes = %d, want 1", report.Summary.RegistryCandidateOnlyNodes)
}
if report.Summary.DirectPeerAlertNodes != 1 {
t.Fatalf("direct peer alert nodes = %d, want 1", report.Summary.DirectPeerAlertNodes)
}
if !containsString(report.BlockedOperations, "remove_standard_control_overlap") {
t.Fatalf("expected remove_standard_control_overlap to be blocked: %+v", report.BlockedOperations)
}
if report.Summary.UnknownProfileNodes != 0 {
t.Fatalf("unknown profile nodes = %d, want 0", report.Summary.UnknownProfileNodes)
}
if report.Summary.WaitingUpdateStatusNodes != 0 {
t.Fatalf("waiting update status nodes = %d, want 0", report.Summary.WaitingUpdateStatusNodes)
}
if report.Summary.UnknownVersionNodes != 0 {
t.Fatalf("unknown version nodes = %d, want 0", report.Summary.UnknownVersionNodes)
}
if report.Summary.StandardRecoveryContractNodes != 1 {
t.Fatalf("standard recovery contract nodes = %d, want 1", report.Summary.StandardRecoveryContractNodes)
}
if !report.BridgeHoldRequired {
t.Fatalf("expected bridge hold to be active: %+v", report)
}
if len(report.BridgeHoldNodeIDs) != 1 || report.BridgeHoldNodeIDs[0] != "node-1" {
t.Fatalf("bridge hold node ids = %+v, want node-1", report.BridgeHoldNodeIDs)
}
if !containsString(report.BlockedOperations, "remove_recovery_bridge_overlap") {
t.Fatalf("expected remove_recovery_bridge_overlap to be blocked: %+v", report.BlockedOperations)
}
if report.Summary.WaitingRecoveryHeartbeatNodes != 0 {
t.Fatalf("waiting recovery heartbeat nodes = %d, want 0", report.Summary.WaitingRecoveryHeartbeatNodes)
}
var hostProduct *StaleNodeRiskProduct
for i := range node.Products {
if node.Products[i].Product == "rap-host-agent" {
hostProduct = &node.Products[i]
break
}
}
if hostProduct == nil {
t.Fatalf("host-agent product risk missing: %+v", node)
}
if !hostProduct.CompatibleArtifactFound {
t.Fatalf("expected compatible host-agent artifact via windows Compatibility alias: %+v", hostProduct)
}
if containsAnyRiskWithPrefix(hostProduct.Risks, "stale_node_no_compatible_rap-host-agent_artifact") {
t.Fatalf("host-agent artifact gap risk should be cleared: %+v", hostProduct)
}
if !containsAnyRiskWithPrefix(hostProduct.Risks, "stale_node_standard_recovery_contract_") {
t.Fatalf("expected stale standard recovery contract risk on host-agent: %+v", hostProduct)
}
}
func TestGetStaleNodeRiskReportFlagsStandardControlDependencyOnHealthyNode(t *testing.T) {
now := time.Now().UTC()
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
LastSeenAt: ptrTime(now.Add(-30 * time.Second)),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"region": "ifcm",
"peer_recovery_ready": 3,
"peer_recovery_deficit": 0
},
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"fabric_registry_runtime_report": {
"status": "candidate_only",
"resolved_service_count": 0
},
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
},
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
if report.FabricStandardCleanupAllowed {
t.Fatalf("fabric standard cleanup should stay blocked for active standard control dependency: %+v", report)
}
if report.Summary.StandardControlDependencyNodes != 1 {
t.Fatalf("standard control dependency nodes = %d, want 1", report.Summary.StandardControlDependencyNodes)
}
if !containsString(report.BlockedOperations, "remove_standard_control_overlap") {
t.Fatalf("expected remove_standard_control_overlap to be blocked: %+v", report.BlockedOperations)
}
if len(report.Nodes) != 1 || !report.Nodes[0].StandardControlDependency || report.Nodes[0].HeartbeatStale {
t.Fatalf("unexpected risk node: %+v", report.Nodes)
}
}
func TestCanonicalAreaLabelGroupsHomeAndTest(t *testing.T) {
cases := []struct {
rawArea string
nodeName string
want string
}{
{rawArea: "home", nodeName: "home-1", want: "home-test"},
{rawArea: "docker-test", nodeName: "test-1", want: "home-test"},
{rawArea: "test", nodeName: "test-2", want: "home-test"},
{rawArea: "", nodeName: "home-2", want: "home-test"},
{rawArea: "", nodeName: "test-3", want: "home-test"},
{rawArea: "usa-los", nodeName: "usa-los-1", want: "usa"},
{rawArea: "ifcm", nodeName: "ifcm-rufms-s-mo1cr", want: "ifcm"},
}
for _, tc := range cases {
if got := canonicalAreaLabel(tc.rawArea, tc.nodeName); got != tc.want {
t.Fatalf("canonicalAreaLabel(%q, %q) = %q, want %q", tc.rawArea, tc.nodeName, got, tc.want)
}
}
}
func TestDirectCandidatePeerNodeIDsFromManager(t *testing.T) {
report := heartbeatMeshPeerConnectionManagerReport{
ProbeResults: []heartbeatMeshPeerConnectionProbeResult{
{
NodeID: "node-home",
DirectCandidate: true,
TransportMode: "direct",
CandidateResults: []heartbeatMeshPeerConnectionCandidate{{Transport: "direct_quic"}},
},
{
NodeID: "node-relay-upgrade",
TransportMode: "relay_quic",
DirectCandidate: false,
CandidateResults: []heartbeatMeshPeerConnectionCandidate{
{Transport: "relay_quic"},
{Transport: "direct_quic"},
},
},
{
NodeID: "node-relay-only",
TransportMode: "relay_quic",
DirectCandidate: false,
CandidateResults: []heartbeatMeshPeerConnectionCandidate{
{Transport: "relay_quic"},
},
},
},
}
got := directCandidatePeerNodeIDsFromManager(report)
want := []string{"node-home", "node-relay-upgrade"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("directCandidatePeerNodeIDsFromManager() = %#v, want %#v", got, want)
}
}
func TestGetStaleNodeRiskReportFlagsRegistryJoinContractMissingOnNewNode(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.327-registryjoinrewrite"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
Name: "usa-los-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-30 * time.Second)),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"usa-los","peer_cache_peers":7},
"fabric_registry_runtime_report":{"status":"missing","resolved_service_count":0}
}`),
},
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
if len(report.Nodes) != 1 || !report.Nodes[0].RegistryJoinContractMissing {
t.Fatalf("expected registry join missing node, got %+v", report.Nodes)
}
if report.Summary.RegistryJoinContractMissingNodes != 1 {
t.Fatalf("registry join missing nodes = %d, want 1", report.Summary.RegistryJoinContractMissingNodes)
}
if !containsString(report.Nodes[0].Alerts, "registry_join_missing") {
t.Fatalf("expected registry join missing alert, got %+v", report.Nodes[0].Alerts)
}
}
func TestGetStaleNodeRiskReportFlagsPostUpdateHeartbeatGap(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.329-updatepathjoinfix"
targetVersion := "0.2.330-joindiag"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
Name: "usa-los-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-3 * time.Minute)),
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "manual",
Enabled: true,
TargetVersion: &targetVersion,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: reportedVersion,
TargetVersion: targetVersion,
Phase: "health_check",
Status: "succeeded",
ObservedAt: now.Add(-150 * time.Second),
Payload: json.RawMessage(`{"systemd_unit":"rap-node-agent-usa-los-1.service"}`),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
ReportedVersion: &reportedVersion,
ObservedAt: now.Add(-200 * time.Second),
Metadata: json.RawMessage(`{"mesh_endpoint_report":{"region":"usa","peer_cache_peers":7},"fabric_registry_runtime_report":{"status":"missing","resolved_service_count":0}}`),
},
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
if report.Summary.PostUpdateHeartbeatGapNodes != 1 {
t.Fatalf("post update heartbeat gap nodes = %d, want 1", report.Summary.PostUpdateHeartbeatGapNodes)
}
if len(report.Nodes) != 1 || !report.Nodes[0].PostUpdateHeartbeatGap {
t.Fatalf("expected post update heartbeat gap node, got %+v", report.Nodes)
}
nodeProduct := report.Nodes[0].Products[0]
if !nodeProduct.PostUpdateHeartbeatGap {
t.Fatalf("expected product post update heartbeat gap, got %+v", nodeProduct)
}
if !containsString(report.Nodes[0].Risks, "post_update_heartbeat_gap_rap-node-agent") {
t.Fatalf("expected post update heartbeat gap risk, got %+v", report.Nodes[0].Risks)
}
}
func TestFabricRegistryRecordsForUpdatePlanIncludeDesiredFabricListenerPublicSeeds(t *testing.T) {
store := &fakeRepository{
clusterNodes: []ClusterNode{
{
ID: "node-home",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
MembershipStatus: "active",
},
{
ID: "node-usa",
Name: "usa-los-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
MembershipStatus: "active",
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-home": {
{
ClusterID: "cluster-1",
NodeID: "node-home",
ObservedAt: time.Now().UTC(),
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{
"region":"home",
"peer_endpoint":"quic://192.168.200.85:18080",
"endpoint_candidates":[
{
"endpoint_id":"home-1-lan",
"address":"quic://192.168.200.85:18080",
"transport":"direct_quic",
"reachability":"private",
"connectivity_mode":"private_lan",
"region":"home"
}
]
}
}`),
},
},
"node-usa": {
{
ClusterID: "cluster-1",
NodeID: "node-usa",
ObservedAt: time.Now().UTC(),
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{
"region":"usa",
"peer_endpoint":"quic://195.123.240.88:19131",
"endpoint_candidates":[
{
"endpoint_id":"usa-public",
"address":"quic://195.123.240.88:19131",
"transport":"direct_quic",
"reachability":"public",
"connectivity_mode":"direct",
"region":"usa",
"metadata":{"tls_cert_sha256":"usa-cert"}
}
]
}
}`),
},
},
},
desiredWorkloads: []NodeWorkloadDesiredState{
{
ClusterID: "cluster-1",
NodeID: "node-home",
ServiceType: "fabric-listener",
DesiredState: "enabled",
Config: json.RawMessage(`{
"listen_addr":"0.0.0.0:18080",
"region":"home",
"endpoint_candidates":[
{
"endpoint_id":"home-1-public",
"address":"quic://94.141.118.222:19199",
"transport":"direct_quic",
"reachability":"public",
"connectivity_mode":"direct",
"region":"home",
"priority":20,
"metadata":{"tls_cert_sha256":"home-cert"}
}
]
}`),
},
},
}
service := NewService(store)
raw, err := service.fabricRegistryRecordsForUpdatePlan(context.Background(), "cluster-1")
if err != nil {
t.Fatalf("fabricRegistryRecordsForUpdatePlan: %v", err)
}
var records []fabricRegistryJoinContractRecord
if err := json.Unmarshal(raw, &records); err != nil {
t.Fatalf("unmarshal records: %v", err)
}
if len(records) != 3 {
t.Fatalf("records len = %d, want 3", len(records))
}
for _, record := range records {
addresses := []string{}
for _, endpoint := range record.Endpoints {
addresses = append(addresses, endpoint.Address)
}
if !containsString(addresses, "quic://94.141.118.222:19199") {
t.Fatalf("record %s missing desired public home endpoint: %+v", record.Service, addresses)
}
if !containsString(addresses, "quic://195.123.240.88:19131") {
t.Fatalf("record %s missing usa endpoint: %+v", record.Service, addresses)
}
}
}
func TestGetStaleNodeRiskReportFlagsUpdaterSubscriptionGap(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.324-wakeupdater"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-2 * time.Minute)),
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-node",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.324-wakeupdater",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "node-win-svc", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.2.324-wakeupdater", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-node-agent.exe", SHA256: "node-sha"},
},
},
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.287-wakeupdater",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "host-win-svc", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.287-wakeupdater", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "host-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
TargetVersion: strPtr("0.2.324-wakeupdater"),
},
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
TargetVersion: strPtr("0.2.287-wakeupdater"),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"ifcm","peer_cache_peers":7,"endpoint_candidates":[{"transport":"direct_quic","reachability":"public"}]},
"mesh_peer_connection_manager_report":{"probe_results":[{"node_id":"peer-a","candidate_results":[{"transport":"direct_quic","connection_state":"ready"}]}]},
"fabric_registry_runtime_report":{"status":"missing","resolved_service_count":0}
}`),
},
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.322-controlendpointsrewrite",
TargetVersion: "0.2.322-controlendpointsrewrite",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-2 * time.Hour),
},
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.285-controlendpointsrewrite",
TargetVersion: "0.2.285-controlendpointsrewrite",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-2 * time.Hour),
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-user",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("stale risk report: %v", err)
}
if report.Summary.UpdaterSubscriptionAlertNodes != 1 {
t.Fatalf("expected updater subscription summary count 1, got %d", report.Summary.UpdaterSubscriptionAlertNodes)
}
if report.Summary.UpdaterWakeUnsupportedNodes != 1 {
t.Fatalf("expected updater wake unsupported summary count 1, got %d", report.Summary.UpdaterWakeUnsupportedNodes)
}
if report.Summary.UpdaterRuntimeMissingNodes != 1 {
t.Fatalf("expected updater runtime missing summary count 1, got %d", report.Summary.UpdaterRuntimeMissingNodes)
}
if len(report.Nodes) != 1 || !report.Nodes[0].UpdaterSubscriptionAlert {
t.Fatalf("expected node updater subscription alert, got %+v", report.Nodes)
}
if !report.Nodes[0].UpdaterWakeUnsupported {
t.Fatalf("expected updater wake unsupported node flag, got %+v", report.Nodes[0])
}
if !report.Nodes[0].UpdaterRuntimeMissing {
t.Fatalf("expected updater runtime missing node flag, got %+v", report.Nodes[0])
}
if !containsString(report.Nodes[0].Alerts, "updater_subscription_gap") {
t.Fatalf("expected updater subscription alert tag, got %+v", report.Nodes[0].Alerts)
}
if !containsString(report.Nodes[0].Alerts, "updater_wake_unsupported") {
t.Fatalf("expected updater wake unsupported alert tag, got %+v", report.Nodes[0].Alerts)
}
if !containsString(report.Nodes[0].Alerts, "updater_runtime_missing") {
t.Fatalf("expected updater runtime missing alert tag, got %+v", report.Nodes[0].Alerts)
}
}
func TestGetStaleNodeRiskReportIgnoresUpdaterSubscriptionGapWhenCurrentVersionIsNewerThanTarget(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.339-dockerretagfix"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{{
ID: "node-1",
NodeKey: "node-key-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-2 * time.Minute)),
MembershipStatus: "active",
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
TargetVersion: strPtr("0.2.327-registryjoinrewrite"),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"home","peer_cache_peers":7},
"fabric_registry_runtime_report":{"status":"active","resolved_service_count":3},
"update_runtime":{"host_agent_state_present":true}
}`),
}},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.334-updaterruntimestate",
TargetVersion: "0.2.327-registryjoinrewrite",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-3 * time.Hour),
}},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-user",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("stale risk report: %v", err)
}
if report.Summary.UpdaterSubscriptionAlertNodes != 0 {
t.Fatalf("expected no updater subscription alert nodes, got %d", report.Summary.UpdaterSubscriptionAlertNodes)
}
if len(report.Nodes) != 1 || report.Nodes[0].UpdaterSubscriptionAlert {
t.Fatalf("did not expect updater subscription alert, got %+v", report.Nodes)
}
}
func TestGetStaleNodeRiskReportFlagsStagedSelfUpdatePending(t *testing.T) {
now := time.Now().UTC()
reportedVersion := "0.2.322-controlendpointsrewrite"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-2 * time.Minute)),
MembershipStatus: "active",
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
TargetVersion: strPtr("0.2.287-wakeupdater"),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report":{"region":"ifcm","peer_cache_peers":7},
"fabric_registry_runtime_report":{"status":"missing","resolved_service_count":0}
}`),
},
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.285-controlendpointsrewrite",
TargetVersion: "0.2.287-wakeupdater",
Phase: "apply",
Status: "staged",
ObservedAt: now.Add(-2 * time.Hour),
Payload: json.RawMessage(`{
"staged_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-host-agent.exe.next",
"replace_error":"rename rap-host-agent.exe.tmp: Access is denied.",
"restart_needed":true
}`),
},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-user",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("stale risk report: %v", err)
}
if report.Summary.StagedSelfUpdatePendingNodes != 1 {
t.Fatalf("expected staged self update pending summary count 1, got %d", report.Summary.StagedSelfUpdatePendingNodes)
}
if len(report.Nodes) != 1 || !report.Nodes[0].StagedSelfUpdatePending {
t.Fatalf("expected node staged self update pending, got %+v", report.Nodes)
}
if !containsString(report.Nodes[0].Alerts, "staged_self_update_pending") {
t.Fatalf("expected staged self update alert tag, got %+v", report.Nodes[0].Alerts)
}
if len(report.Nodes[0].Products) != 1 || !report.Nodes[0].Products[0].StagedSelfUpdatePending {
t.Fatalf("expected product staged self update pending, got %+v", report.Nodes[0].Products)
}
if !containsString(report.Nodes[0].Risks, "staged_self_update_pending_rap-host-agent") {
t.Fatalf("expected staged self update pending risk, got %+v", report.Nodes[0].Risks)
}
}
func TestGetStaleNodeRiskReportTracksAreaDiversityFromDirectPeers(t *testing.T) {
now := time.Now().UTC()
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-home", Name: "home-1", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", LastSeenAt: ptrTime(now.Add(-30 * time.Second))},
{ID: "node-home-2", Name: "home-2", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", LastSeenAt: ptrTime(now.Add(-30 * time.Second))},
{ID: "node-usa", Name: "usa-los-1", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", LastSeenAt: ptrTime(now.Add(-30 * time.Second))},
{ID: "node-ifcm", Name: "ifcm-rufms-s-mo1cr", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", LastSeenAt: ptrTime(now.Add(-30 * time.Second))},
},
heartbeats: map[string][]NodeHeartbeat{
"node-home": {{
ClusterID: "cluster-1",
NodeID: "node-home",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"region": "home-lan",
"endpoint_candidates": [
{"transport":"direct_quic","address":"94.141.118.222:19199","connectivity_mode":"direct","reachability":"public"}
]
},
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"mesh_peer_connection_manager_report": {
"probe_results": [
{"node_id":"node-home-2","link_status":"reachable","transport_mode":"direct"},
{"node_id":"node-usa","link_status":"reachable","transport_mode":"direct"}
]
},
"fabric_registry_runtime_report": {
"status": "ready",
"resolved_service_count": 3
}
}`),
}},
"node-home-2": {{ClusterID: "cluster-1", NodeID: "node-home-2", Metadata: json.RawMessage(`{"mesh_endpoint_report":{"region":"home-lan"}}`)}},
"node-usa": {{ClusterID: "cluster-1", NodeID: "node-usa", Metadata: json.RawMessage(`{"mesh_endpoint_report":{"region":"usa-los"}}`)}},
"node-ifcm": {{ClusterID: "cluster-1", NodeID: "node-ifcm", Metadata: json.RawMessage(`{"mesh_endpoint_report":{"region":"iFCM"}}`)}},
},
}
service := NewService(store)
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
var homeNode *StaleNodeRiskNode
for i := range report.Nodes {
if report.Nodes[i].NodeID == "node-home" {
homeNode = &report.Nodes[i]
break
}
}
if homeNode == nil {
t.Fatalf("home node missing from report: %+v", report.Nodes)
}
if homeNode.AreaDiversityAlert {
t.Fatalf("did not expect area diversity alert when only one external area is directly available: %+v", homeNode)
}
if homeNode.ExternalAreaReadyCount != 1 || homeNode.RequiredExternalAreaCount != 1 {
t.Fatalf("unexpected external area counts on home node: %+v", homeNode)
}
if len(homeNode.DirectReadyAreas) != 1 || homeNode.DirectReadyAreas[0] != "usa" {
t.Fatalf("unexpected direct ready areas on home node: %+v", homeNode.DirectReadyAreas)
}
if homeNode.Area != "home-test" {
t.Fatalf("unexpected canonical area on home node: %q", homeNode.Area)
}
if homeNode.IndependentIngressCount != 1 {
t.Fatalf("independent ingress count = %d, want 1", homeNode.IndependentIngressCount)
}
if !homeNode.IndependentIngressAlert {
t.Fatalf("expected independent ingress alert on home node: %+v", homeNode)
}
if homeNode.RequiredIndependentIngressCount != 2 {
t.Fatalf("required independent ingress count = %d, want 2", homeNode.RequiredIndependentIngressCount)
}
if report.Summary.AreaDiversityAlertNodes != 0 {
t.Fatalf("area diversity alert nodes = %d, want 0", report.Summary.AreaDiversityAlertNodes)
}
if report.Summary.IndependentIngressAlertNodes != 1 {
t.Fatalf("independent ingress alert nodes = %d, want 1", report.Summary.IndependentIngressAlertNodes)
}
if !containsString(homeNode.Alerts, "independent_ingress_deficit:1_of_2") {
t.Fatalf("expected independent ingress deficit alert, got %+v", homeNode.Alerts)
}
}
func TestDirectPeerRecoveryFromHeartbeatCountsReverseQUICAsDirect(t *testing.T) {
heartbeat := NodeHeartbeat{
Metadata: json.RawMessage(`{
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"mesh_peer_connection_manager_report": {
"probe_results": [
{
"node_id": "peer-a",
"link_status": "reachable",
"transport_mode": "direct"
},
{
"node_id": "peer-b",
"link_status": "reachable",
"transport_mode": "relay_quic",
"candidate_results": [
{
"transport": "reverse_quic",
"link_status": "reachable"
}
]
},
{
"node_id": "peer-c",
"link_status": "reachable",
"transport_mode": "relay_quic",
"candidate_results": [
{
"transport": "relay_quic",
"link_status": "reachable"
}
]
}
]
}
}`),
}
readyCount, targetCount, deficit, alert := directPeerRecoveryFromHeartbeat(heartbeat)
if readyCount != 2 || targetCount != 3 || deficit != 1 || !alert {
t.Fatalf("unexpected direct peer recovery metrics: ready=%d target=%d deficit=%d alert=%t", readyCount, targetCount, deficit, alert)
}
}
func TestDirectPeerRecoveryFromHeartbeatUsesManagerReadyCountWithoutFreshProbeResults(t *testing.T) {
heartbeat := NodeHeartbeat{
Metadata: json.RawMessage(`{
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"mesh_peer_connection_manager_report": {
"peer_connection_ready": 1,
"probe_results": []
}
}`),
}
readyCount, targetCount, deficit, alert := directPeerRecoveryFromHeartbeat(heartbeat)
if readyCount != 1 || targetCount != 3 || deficit != 2 || !alert {
t.Fatalf("unexpected direct peer recovery metrics: ready=%d target=%d deficit=%d alert=%t", readyCount, targetCount, deficit, alert)
}
}
func TestDirectPeerRecoveryFromHeartbeatClearsStaleDeficitWhenReadyCountMeetsTarget(t *testing.T) {
heartbeat := NodeHeartbeat{
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"peer_recovery_deficit": 3
},
"mesh_peer_recovery_report": {
"target_ready_peers": 3
},
"mesh_peer_connection_manager_report": {
"peer_connection_ready": 3,
"probe_results": [
{"node_id":"peer-a","link_status":"reachable","transport_mode":"direct_quic","candidate_results":[{"transport":"direct_quic","link_status":"reachable"}]},
{"node_id":"peer-b","link_status":"reachable","transport_mode":"direct_quic","candidate_results":[{"transport":"direct_quic","link_status":"reachable"}]},
{"node_id":"peer-c","link_status":"reachable","transport_mode":"direct_quic","candidate_results":[{"transport":"direct_quic","link_status":"reachable"}]}
]
}
}`),
}
readyCount, targetCount, deficit, alert := directPeerRecoveryFromHeartbeat(heartbeat)
if readyCount != 3 || targetCount != 3 || deficit != 0 || alert {
t.Fatalf("unexpected direct peer recovery metrics: ready=%d target=%d deficit=%d alert=%t", readyCount, targetCount, deficit, alert)
}
}
func TestGetNodeUpdatePlanPrefersHeartbeatControlOriginForLocalRequests(t *testing.T) {
store := &fakeRepository{
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
Strategy: "pinned",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-node",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.318-quic-decoupled",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "node-win-svc", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.2.318-quic-decoupled", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "quic://192.168.200.61:18080/downloads/rap-node-agent.exe", SHA256: "node-sha"},
},
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
},
},
},
}
service := NewService(store)
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.309-latencyaware",
OS: "windows",
Arch: "amd64",
InstallType: "windows_service",
Channel: "stable",
ArtifactOrigin: "quic://192.168.200.61:18121",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Artifact == nil || !strings.HasPrefix(plan.Artifact.URL, "quic://vpn.cin.su:19191/") {
t.Fatalf("expected node-reachable artifact origin, got %+v", plan.Artifact)
}
}
func TestStaleNodeRiskReportKeepsRecoveryBridgeForOfflineNodeWithoutHeartbeatStale(t *testing.T) {
now := time.Now().UTC()
recentSeen := now.Add(-5 * time.Minute)
reportedVersion := "0.2.309-latencyaware"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: &recentSeen,
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.284-quorumauthority",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "host-win-svc", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.284-quorumauthority", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "host-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.183",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-10 * time.Minute),
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-host-agent.exe","reason":"no_matching_artifact"}`),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
report, err := service.GetStaleNodeRiskReport(context.Background(), GetStaleNodeRiskReportInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("risk report: %v", err)
}
if len(report.Nodes) != 1 {
t.Fatalf("expected one node, got %d", len(report.Nodes))
}
node := report.Nodes[0]
if node.HeartbeatStale {
t.Fatalf("heartbeat should be fresh in this test: %+v", node)
}
if !node.RecoveryBridgeRequired {
t.Fatalf("expected recovery bridge to remain required for offline node: %+v", node)
}
if !report.BridgeHoldRequired {
t.Fatalf("expected report-level bridge hold to stay active: %+v", report)
}
if report.Summary.RecoveryBridgeRequiredNodes != 1 {
t.Fatalf("recovery bridge required nodes = %d, want 1", report.Summary.RecoveryBridgeRequiredNodes)
}
if !containsAnyRiskWithPrefix(node.Risks, "stale_node_standard_recovery_contract_") {
t.Fatalf("expected standard recovery contract risk to survive without heartbeat stale: %+v", node.Risks)
}
}
func TestGetNodeBridgeReplayPlanBuildsReplayForDisallowedContractNode(t *testing.T) {
now := time.Now().UTC()
staleAt := now.Add(-time.Hour)
reportedVersion := "0.2.309-latencyaware"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: &staleAt,
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.284-quorumauthority",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{
{ID: "host-win-svc", ClusterID: "cluster-1", Product: "rap-host-agent", Version: "0.2.284-quorumauthority", OS: "windows", Arch: "amd64", InstallType: "windows_service", Kind: "binary", URL: "/downloads/rap-host-agent.exe", SHA256: "host-sha"},
},
},
},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{
{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.183",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-10 * time.Minute),
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-host-agent.exe","reason":"no_matching_artifact"}`),
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
if origin := artifactOriginFromHeartbeat(store.heartbeats["node-1"][0]); origin != "quic://vpn.cin.su:19191" {
t.Fatalf("artifact origin = %q, want %q", origin, "quic://vpn.cin.su:19191")
}
plan, err := service.GetNodeBridgeReplayPlan(context.Background(), GetNodeBridgeReplayPlanInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
})
if err != nil {
t.Fatalf("bridge replay plan: %v", err)
}
if !plan.BridgeHoldRequired {
t.Fatalf("expected bridge hold required: %+v", plan)
}
if !plan.RecoveryBridgeReplayReady {
t.Fatalf("expected replay ready: %+v", plan)
}
if len(plan.Products) != 1 {
t.Fatalf("expected one replay product, got %+v", plan.Products)
}
product := plan.Products[0]
if product.Product != "rap-host-agent" {
t.Fatalf("unexpected replay product: %+v", product)
}
if product.RecoveryBridgeMode != "standard_contract_overlap" {
t.Fatalf("unexpected replay mode: %+v", product)
}
if product.UpdatePlan.Action != "update" {
t.Fatalf("expected update action, got %+v", product.UpdatePlan)
}
if product.UpdatePlan.TargetVersion != "0.2.284-quorumauthority" {
t.Fatalf("unexpected target version: %+v", product.UpdatePlan)
}
if product.UpdatePlan.Artifact == nil || product.UpdatePlan.Artifact.SHA256 != "host-sha" {
t.Fatalf("expected matching artifact in replay plan: %+v", product.UpdatePlan)
}
if product.UpdatePlan.Artifact.URL != "quic://vpn.cin.su:19191/downloads/rap-host-agent.exe" {
t.Fatalf("unexpected replay artifact url: %+v", product.UpdatePlan.Artifact)
}
}
func TestCreateReleaseVersionBlocksDisallowedRemovalWhileStaleNodesRemain(t *testing.T) {
staleAt := time.Now().UTC().Add(-time.Hour)
reportedVersion := "0.2.309-latencyaware"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: &staleAt,
MembershipStatus: "active",
},
},
}
service := NewService(store)
_, err := service.CreateReleaseVersion(context.Background(), CreateReleaseVersionInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.400-breaking",
Channel: "stable",
Status: "active",
Compatibility: json.RawMessage(`{
"fabric_standard_cleanup": true
}`),
Artifacts: []ReleaseArtifactInput{
{
OS: "linux",
Arch: "amd64",
InstallType: "docker",
Kind: "image",
URL: "quic://example.test/rap-node-agent.tar",
SHA256: "sha256-1",
SizeBytes: 123,
Metadata: json.RawMessage(`{}`),
},
},
})
if !errors.Is(err, ErrFabricStandardCleanupBlocked) {
t.Fatalf("err = %v, want ErrFabricStandardCleanupBlocked", err)
}
if len(store.auditEvents) == 0 || store.auditEvents[0].EventType != "fabric_standard_cleanup.blocked" {
t.Fatalf("expected blocked audit event, got %+v", store.auditEvents)
}
var payload map[string]any
if err := json.Unmarshal(store.auditEvents[0].Payload, &payload); err != nil {
t.Fatalf("unmarshal audit payload: %v", err)
}
value, ok := payload["waiting_recovery_heartbeat_nodes"].(float64)
if !ok || int(value) != 1 {
t.Fatalf("waiting_recovery_heartbeat_nodes = %v, want 1 in audit payload", payload["waiting_recovery_heartbeat_nodes"])
}
value, ok = payload["standard_recovery_contract_nodes"].(float64)
if !ok || int(value) != 0 {
t.Fatalf("standard_recovery_contract_nodes = %v, want 0 in audit payload", payload["standard_recovery_contract_nodes"])
}
bridgeHoldRequired, ok := payload["bridge_hold_required"].(bool)
if !ok || bridgeHoldRequired {
t.Fatalf("bridge_hold_required = %v, want false in audit payload", payload["bridge_hold_required"])
}
}
func TestAbsolutizeArtifactURLPreservesPublicAbsoluteURL(t *testing.T) {
raw := "quic://94.141.118.222:19191/downloads/rap-node-agent.exe"
origin := "quic://vpn.cin.su:19191"
want := "quic://94.141.118.222:19191/downloads/rap-node-agent.exe"
if got := absolutizeArtifactURL(raw, origin); got != want {
t.Fatalf("artifact url = %q, want %q", got, want)
}
}
func TestAbsolutizeReleaseArtifactAddsPrivateMirrorBeforeCanonicalPublicURL(t *testing.T) {
artifact := ReleaseArtifact{
URL: "quic://vpn.cin.su/downloads/rap-node-agent.exe",
}
resolved := absolutizeReleaseArtifact(artifact, "quic://192.168.200.61:18080")
if resolved.URL != "quic://192.168.200.61:18090/downloads/rap-node-agent.exe" {
t.Fatalf("primary artifact url = %q", resolved.URL)
}
if len(resolved.URLs) < 2 {
t.Fatalf("artifact urls = %+v, want local + public mirrors", resolved.URLs)
}
if resolved.URLs[0] != "quic://192.168.200.61:18090/downloads/rap-node-agent.exe" {
t.Fatalf("first artifact url = %q", resolved.URLs[0])
}
if resolved.URLs[1] != "quic://vpn.cin.su/downloads/rap-node-agent.exe" {
t.Fatalf("second artifact url = %q", resolved.URLs[1])
}
}
func TestAbsolutizeReleaseArtifactKeepsFabricArtifactOnly(t *testing.T) {
artifact := ReleaseArtifact{
URL: "fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar",
URLs: []string{"http://192.168.200.61:18090/legacy.tar", "fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar"},
}
resolved := absolutizeReleaseArtifact(artifact, "http://192.168.200.61:18080")
if resolved.URL != "fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar" {
t.Fatalf("primary artifact url = %q", resolved.URL)
}
if len(resolved.URLs) != 1 {
t.Fatalf("artifact urls = %+v, want only canonical fabric reference", resolved.URLs)
}
if resolved.URLs[0] != "fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar" {
t.Fatalf("first artifact url = %q", resolved.URLs[0])
}
}
func TestReportNodeUpdateStatusRemovesHTTPArtifactPayload(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
item, err := service.ReportNodeUpdateStatus(context.Background(), ReportNodeUpdateStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353",
TargetVersion: "0.2.354",
Phase: "download",
Status: "started",
Payload: json.RawMessage(`{
"artifact_url":"http://192.168.200.61:18090/legacy.tar",
"artifact_urls":[
"http://192.168.200.61:18090/legacy.tar",
"fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar"
],
"image":"rap-node-agent:0.2.354"
}`),
})
if err != nil {
t.Fatalf("report update status: %v", err)
}
payload := jsonObject(item.Payload)
if got := jsonString(payload, "artifact_url"); got != "fabric-artifact://release/rap-node-agent-0.2.354-docker-amd64.tar" {
t.Fatalf("artifact_url = %q", got)
}
raw := string(item.Payload)
if strings.Contains(raw, "http://") || strings.Contains(raw, "https://") {
t.Fatalf("payload still contains HTTP artifact reference: %s", raw)
}
}
func TestReportNodeUpdateStatusClassifiesFabricArtifactUnsupportedExecutor(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
errMessage := `download artifact fabric-artifact://release/rap-node-agent.tar: Get "fabric-artifact://release/rap-node-agent.tar": unsupported protocol scheme "fabric-artifact"`
item, err := service.ReportNodeUpdateStatus(context.Background(), ReportNodeUpdateStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353",
TargetVersion: "0.2.354",
Phase: "apply",
Status: "failed",
ErrorMessage: &errMessage,
Payload: json.RawMessage(`{}`),
})
if err != nil {
t.Fatalf("report update status: %v", err)
}
payload := jsonObject(item.Payload)
if got := jsonString(payload, "failure_class"); got != "fabric_executor_upgrade_required" {
t.Fatalf("failure_class = %q", got)
}
if got := jsonString(payload, "operator_action"); got != "rescue_update_executor_without_http" {
t.Fatalf("operator_action = %q", got)
}
if value, ok := payload["rescue_required"].(bool); !ok || !value {
t.Fatalf("rescue_required = %#v", payload["rescue_required"])
}
}
func TestEvaluateStaleNodeRiskProductFlagsFabricExecutorUpgradeRequired(t *testing.T) {
targetVersion := "0.2.354-relay-quic-labels"
now := time.Date(2026, 5, 20, 16, 45, 0, 0, time.UTC)
reportedVersion := "0.2.347-quicfarmupdate"
store := &fakeRepository{
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: targetVersion,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: targetVersion,
OS: "linux",
Arch: "amd64",
InstallType: "docker",
Kind: "docker_image_tar",
URL: "fabric-artifact://release/rap-node-agent.tar",
SHA256: "sha",
}},
}},
}
service := NewService(store)
policy := NodeUpdatePolicy{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Enabled: true,
RollbackAllowed: true,
}
statuses := []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353-sticky-fabric-control",
TargetVersion: targetVersion,
Phase: "apply",
Status: "failed",
Payload: json.RawMessage(`{"image":"rap-node-agent:0.2.354-relay-quic-labels","failure_class":"fabric_executor_upgrade_required","operator_action":"rescue_update_executor_without_http","rescue_required":true}`),
ObservedAt: now,
}}
item, err := service.evaluateStaleNodeRiskProduct(context.Background(), "cluster-1", ClusterNode{
ID: "node-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
ReportedVersion: &reportedVersion,
}, now, nil, true, "rap-node-agent", policy, statuses, map[string][]ReleaseVersion{})
if err != nil {
t.Fatalf("evaluate risk product: %v", err)
}
if !containsString(item.Risks, "fabric_executor_upgrade_required_rap-node-agent") {
t.Fatalf("expected fabric executor rescue risk: %+v", item)
}
if !item.RecoveryBridgeRequired || item.RecoveryBridgeMode != "rescue_update_executor_without_http" {
t.Fatalf("expected rescue bridge requirement: %+v", item)
}
}
func TestNodeUpdatePlanHoldsFabricArtifactForExecutorWithoutCapability(t *testing.T) {
targetVersion := "0.2.354-relay-quic-labels"
now := time.Date(2026, 5, 20, 17, 5, 0, 0, time.UTC)
store := &fakeRepository{
releaseVersions: []ReleaseVersion{{
ID: "release-1",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: targetVersion,
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "docker",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: targetVersion,
OS: "linux",
Arch: "amd64",
InstallType: "docker",
Kind: "docker_image_tar",
URL: "fabric-artifact://release/rap-node-agent.tar",
SHA256: "sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-node-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
UpdatedAt: now.Add(-time.Hour),
},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353-sticky-fabric-control",
TargetVersion: targetVersion,
Phase: "apply",
Status: "failed",
Payload: json.RawMessage(`{"failure_class":"fabric_executor_upgrade_required","rescue_required":true}`),
ObservedAt: now.Add(-time.Minute),
}},
}
service := NewService(store)
service.now = func() time.Time { return now }
held, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353-sticky-fabric-control",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("held update plan: %v", err)
}
if held.Action != "none" || held.Reason != "fabric_executor_upgrade_required" || held.Artifact != nil {
t.Fatalf("unexpected held plan: %+v", held)
}
allowed, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.2.353-sticky-fabric-control",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
ExecutorCapabilities: []string{"fabric_artifact"},
})
if err != nil {
t.Fatalf("allowed update plan: %v", err)
}
if allowed.Action != "update" || allowed.Artifact == nil {
t.Fatalf("expected fabric-capable executor to receive update plan: %+v", allowed)
}
}
func TestBridgeReplayPlanCanonicalizesDisallowedPublicArtifactURL(t *testing.T) {
now := time.Date(2026, 5, 18, 15, 0, 0, 0, time.UTC)
reportedVersion := "0.2.318-quic-decoupled"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: ptrTime(now.Add(-time.Hour)),
MembershipStatus: "active",
}},
releaseVersions: []ReleaseVersion{{
ID: "release-host",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.284-quorumauthority",
Channel: "stable",
Status: "active",
Artifacts: []ReleaseArtifact{{
ID: "host-win-svc",
ClusterID: "cluster-1",
Product: "rap-host-agent",
Version: "0.2.284-quorumauthority",
OS: "windows",
Arch: "amd64",
InstallType: "windows_service",
Kind: "binary",
URL: "quic://94.141.118.222:19191/downloads/rap-host-agent.exe",
SHA256: "host-sha",
}},
}},
nodeUpdatePolicies: map[string]NodeUpdatePolicy{
"node-1|rap-host-agent": {
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
Channel: "stable",
Strategy: "rolling",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
},
},
updateStatuses: []NodeUpdateStatus{{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-host-agent",
CurrentVersion: "0.2.183",
Phase: "plan",
Status: "noop",
ObservedAt: now.Add(-10 * time.Minute),
Payload: json.RawMessage(`{"binary_path":"C:\\Program Files\\RAP\\ifcm-rufms-s-mo1cr\\rap-host-agent.exe","reason":"no_matching_artifact"}`),
}},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {{
ClusterID: "cluster-1",
NodeID: "node-1",
Metadata: json.RawMessage(`{
"mesh_outbound_session_report": {
"fabric_control_endpoint": "quic://vpn.cin.su:19191/api/v1",
"status": "ready"
}
}`),
}},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
plan, err := service.GetNodeBridgeReplayPlan(context.Background(), GetNodeBridgeReplayPlanInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
})
if err != nil {
t.Fatalf("bridge replay plan: %v", err)
}
if len(plan.Products) != 1 || plan.Products[0].UpdatePlan.Artifact == nil {
t.Fatalf("unexpected replay plan: %+v", plan)
}
if got := plan.Products[0].UpdatePlan.Artifact.URL; got != "quic://94.141.118.222:19191/downloads/rap-host-agent.exe" {
t.Fatalf("artifact url = %q, want operator public artifact url", got)
}
}
func TestUpsertNodeUpdatePolicyBlocksTargetingBreakingReleaseWhileStaleNodesRemain(t *testing.T) {
staleAt := time.Now().UTC().Add(-time.Hour)
reportedVersion := "0.2.309-latencyaware"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "ifcm-rufms-s-mo1cr",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "offline",
ReportedVersion: &reportedVersion,
LastSeenAt: &staleAt,
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-breaking",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.400-breaking",
Channel: "stable",
Status: "active",
Compatibility: json.RawMessage(`{"fabric_standard_cleanup": true}`),
Artifacts: []ReleaseArtifact{
{ID: "docker", ClusterID: "cluster-1", Product: "rap-node-agent", Version: "0.2.400-breaking", OS: "linux", Arch: "amd64", InstallType: "docker", Kind: "image", URL: "quic://example.test/rap-node-agent.tar", SHA256: "sha256-1"},
},
},
},
}
service := NewService(store)
targetVersion := "0.2.400-breaking"
_, err := service.UpsertNodeUpdatePolicy(context.Background(), UpsertNodeUpdatePolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Strategy: "manual",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
})
if !errors.Is(err, ErrFabricStandardCleanupBlocked) {
t.Fatalf("err = %v, want ErrFabricStandardCleanupBlocked", err)
}
if len(store.auditEvents) == 0 || store.auditEvents[0].EventType != "fabric_standard_cleanup.blocked" {
t.Fatalf("expected blocked audit event, got %+v", store.auditEvents)
}
}
func TestUpsertNodeUpdatePolicyRejectsUnknownTargetVersion(t *testing.T) {
service := NewService(&fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{
ID: "node-1",
NodeKey: "node-key-1",
Name: "home-1",
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
MembershipStatus: "active",
},
},
releaseVersions: []ReleaseVersion{
{
ID: "release-known",
ClusterID: "cluster-1",
Product: "rap-node-agent",
Version: "0.2.309-latencyaware",
Channel: "stable",
Status: "active",
},
},
})
targetVersion := "0.2.400-breaking"
_, err := service.UpsertNodeUpdatePolicy(context.Background(), UpsertNodeUpdatePolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
Channel: "stable",
TargetVersion: &targetVersion,
Strategy: "manual",
Enabled: true,
RollbackAllowed: true,
HealthWindowSec: 180,
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestNodeUpdatePlanNoopsWhenPolicyMissing(t *testing.T) {
service := NewService(&fakeRepository{})
plan, err := service.GetNodeUpdatePlan(context.Background(), GetNodeUpdatePlanInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Product: "rap-node-agent",
CurrentVersion: "0.1.0-c17z25",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("update plan: %v", err)
}
if plan.Action != "none" || plan.Reason != "no_update_policy" || plan.ProductionForwarding {
t.Fatalf("unexpected missing-policy plan: %+v", plan)
}
}
func TestGetNodeSyntheticMeshConfigIncludesDesiredFabricListener(t *testing.T) {
now := time.Date(2026, 4, 30, 6, 0, 0, 0, time.UTC)
version := "listener-v1"
updatedBy := "admin-1"
service := NewService(&fakeRepository{
desiredWorkloads: []NodeWorkloadDesiredState{
{
ClusterID: "cluster-1",
NodeID: "node-a",
ServiceType: "fabric-listener",
DesiredState: "enabled",
Version: &version,
Config: json.RawMessage(`{"listen_addr":":19140","listen_port_mode":"manual","auto_port_start":19140,"auto_port_end":19149,"connectivity_mode":"private_lan","nat_type":"none","region":"site-a"}`),
UpdatedByUserID: &updatedBy,
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.FabricListener == nil {
t.Fatal("expected fabric listener desired config")
}
if cfg.FabricListener.ListenAddr != ":19140" ||
cfg.FabricListener.ListenPortMode != "manual" ||
cfg.FabricListener.ConnectivityMode != "private_lan" ||
cfg.FabricListener.ConfigVersion != "listener-v1" ||
!cfg.FabricListener.ControlPlaneOnly ||
cfg.FabricListener.ProductionForwarding {
t.Fatalf("unexpected listener config: %+v", cfg.FabricListener)
}
if cfg.AuthoritySignature == nil || len(cfg.AuthorityPayload) == 0 {
t.Fatal("listener-bearing synthetic config must remain signed")
}
}
func TestGetNodeSyntheticMeshConfigIsNodeScoped(t *testing.T) {
now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-r", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"peer_endpoints": {
"node-r": "quic://node-r:19000",
"node-b": "quic://node-b:19000",
"node-y": "quic://node-y:19000"
},
"peer_endpoint_candidates": {
"node-r": [
{
"endpoint_id": "node-r-public",
"node_id": "node-r",
"transport": "direct_quic",
"address": "203.0.113.10:443",
"address_family": "ipv4",
"reachability": "public",
"nat_type": "none",
"connectivity_mode": "direct",
"region": "eu",
"priority": 10,
"policy_tags": ["fast-path"],
"metadata": {"source":"test"}
}
],
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 20
}
]
},
"recovery_seeds": [
{
"node_id": "node-r",
"endpoint": "quic://node-r.example.test:443",
"transport": "direct_quic",
"connectivity_mode": "direct",
"region": "eu",
"priority": 10,
"metadata": {"role":"stable-recovery"}
},
{
"node_id": "node-seed",
"endpoint": "quic://seed.example.test/mesh",
"transport": "reverse_quic",
"connectivity_mode": "direct",
"priority": 20
}
],
"route_version": "route-v1",
"policy_version": "policy-v1",
"peer_directory_version": "peers-v1"
}`),
UpdatedAt: now,
},
{
ID: "route-x-y",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-x"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-y"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-x", "node-y"],
"peer_endpoints": {"node-y": "quic://node-y:19000"}
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if !cfg.Enabled {
t.Fatal("config should be enabled")
}
if len(cfg.Routes) != 1 || cfg.Routes[0].RouteID != "route-a-b" {
t.Fatalf("routes = %+v", cfg.Routes)
}
if cfg.PeerEndpoints["node-r"] == "" || cfg.PeerEndpoints["node-b"] == "" {
t.Fatalf("peer endpoints missing: %+v", cfg.PeerEndpoints)
}
if _, leaked := cfg.PeerEndpoints["node-y"]; leaked {
t.Fatalf("unrelated topology leaked: %+v", cfg.PeerEndpoints)
}
nodeRCandidates := cfg.PeerEndpointCandidates["node-r"]
if len(nodeRCandidates) != 1 {
t.Fatalf("node-r candidates = %+v", cfg.PeerEndpointCandidates)
}
if got := nodeRCandidates[0]; got.EndpointID != "node-r-public" ||
got.Transport != "direct_quic" ||
got.Reachability != "public" ||
got.NATType != "none" ||
got.ConnectivityMode != "direct" ||
got.Priority != 10 {
t.Fatalf("unexpected node-r candidate: %+v", got)
}
if _, leaked := cfg.PeerEndpointCandidates["node-y"]; leaked {
t.Fatalf("unrelated candidate topology leaked: %+v", cfg.PeerEndpointCandidates)
}
if len(cfg.RecoverySeeds) != 2 || cfg.RecoverySeeds[0].NodeID != "node-r" || cfg.RecoverySeeds[1].NodeID != "node-seed" {
t.Fatalf("unexpected recovery seeds: %+v", cfg.RecoverySeeds)
}
nodeRDirectory, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-r")
if !ok || nodeRDirectory.CandidateCount != 1 || !nodeRDirectory.RecoverySeed {
t.Fatalf("node-r peer directory missing recovery/candidate metadata: %+v", cfg.PeerDirectory)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-a"); ok {
t.Fatalf("local node leaked into peer directory: %+v", cfg.PeerDirectory)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-y"); ok {
t.Fatalf("unrelated node leaked into peer directory: %+v", cfg.PeerDirectory)
}
if cfg.ProductionForwarding {
t.Fatal("production forwarding must remain false")
}
}
func TestGetNodeSyntheticMeshConfigSkipsExpiredRouteIntent(t *testing.T) {
now := time.Date(2026, 5, 7, 18, 20, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "expired-route",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "vpn_packets",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-b"],
"allowed_channels": ["vpn_packet"],
"expires_at": "2026-05-07T18:19:00Z"
}`),
UpdatedAt: now.Add(-time.Minute),
},
{
ID: "fresh-route",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "vpn_packets",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-b"],
"allowed_channels": ["vpn_packet"],
"expires_at": "2026-05-07T18:25:00Z"
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if containsRouteID(cfg.Routes, "expired-route") {
t.Fatalf("expired route leaked into synthetic config: %+v", cfg.Routes)
}
if !containsRouteID(cfg.Routes, "fresh-route") {
t.Fatalf("fresh route missing from synthetic config: %+v", cfg.Routes)
}
}
func TestRouteIntentLifecycleActionsMarkExpiredAndDisabled(t *testing.T) {
now := time.Date(2026, 5, 7, 18, 30, 0, 0, time.UTC)
repo := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "authoritative",
MutationMode: "normal",
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "vpn_packets",
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true}`),
UpdatedAt: now,
},
{
ID: "route-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-c"}`),
ServiceClass: "vpn_packets",
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true}`),
UpdatedAt: now,
},
},
}
service := NewService(repo)
service.now = func() time.Time { return now }
expired, err := service.ExpireRouteIntent(context.Background(), RouteIntentLifecycleInput{
ActorUserID: "admin",
ClusterID: "cluster-1",
RouteIntentID: "route-a",
Reason: "test cleanup",
})
if err != nil {
t.Fatalf("expire route intent: %v", err)
}
if expired.LifecycleStatus != "expired" || !expired.IsExpired || expired.PolicyExpiresAt == nil {
t.Fatalf("expired lifecycle = %+v", expired)
}
disabled, err := service.DisableRouteIntent(context.Background(), RouteIntentLifecycleInput{
ActorUserID: "admin",
ClusterID: "cluster-1",
RouteIntentID: "route-b",
Reason: "test cleanup",
})
if err != nil {
t.Fatalf("disable route intent: %v", err)
}
if disabled.Status != "disabled" || disabled.LifecycleStatus != "disabled" {
t.Fatalf("disabled lifecycle = %+v", disabled)
}
items, err := service.ListRouteIntents(context.Background(), "admin", "cluster-1")
if err != nil {
t.Fatalf("list route intents: %v", err)
}
if len(items) != 2 || items[0].LifecycleStatus == "" || items[1].LifecycleStatus == "" {
t.Fatalf("list lifecycle enrichment missing: %+v", items)
}
}
func TestGetNodeSyntheticMeshConfigUsesReportedMeshEndpoint(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-b"]
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-b": {
{
ClusterID: "cluster-1",
NodeID: "node-b",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"schema_version": "c17z6.mesh_endpoint_report.v1",
"cluster_id": "cluster-1",
"node_id": "node-b",
"peer_endpoint": "quic://node-b.dynamic.example.test:443",
"transport": "direct_quic",
"connectivity_mode": "direct",
"nat_type": "none",
"endpoint_candidates": [
{
"endpoint_id": "node-b-dynamic",
"node_id": "node-b",
"transport": "direct_quic",
"address": "quic://node-b.dynamic.example.test:443",
"reachability": "public",
"connectivity_mode": "direct",
"nat_type": "none",
"priority": 1,
"metadata": {"source":"heartbeat"}
}
]
}
}`),
ObservedAt: now,
},
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.PeerEndpoints["node-b"] != "quic://node-b.dynamic.example.test:443" {
t.Fatalf("reported endpoint not projected: %+v", cfg.PeerEndpoints)
}
if got := cfg.PeerEndpointCandidates["node-b"]; len(got) != 1 || got[0].EndpointID != "node-b-dynamic" {
t.Fatalf("reported candidates not projected: %+v", cfg.PeerEndpointCandidates)
}
entry, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-b")
if !ok || entry.EndpointCount != 1 || entry.CandidateCount != 1 {
t.Fatalf("peer directory did not include reported endpoint/candidate: %+v", cfg.PeerDirectory)
}
}
func TestGetNodeSyntheticMeshConfigUsesDesiredFabricListenerAdvertiseEndpointForPeer(t *testing.T) {
now := time.Date(2026, 5, 1, 9, 0, 0, 0, time.UTC)
version := "home-1-external-19199"
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-home",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"home-1"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true,"hops":["node-a","home-1"]}`),
UpdatedAt: now,
},
},
desiredWorkloads: []NodeWorkloadDesiredState{
{
ClusterID: "cluster-1",
NodeID: "home-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
Version: &version,
Config: json.RawMessage(`{
"listen_addr":"0.0.0.0:19131",
"listen_port_mode":"manual",
"advertise_endpoint":"quic://94.141.118.222:19199",
"advertise_transport":"direct_quic",
"connectivity_mode":"direct",
"nat_type":"port_restricted",
"region":"home"
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"home-1": {
{
ClusterID: "cluster-1",
NodeID: "home-1",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "home-1",
"peer_endpoint": "quic://192.168.200.85:19131",
"transport": "direct_quic",
"connectivity_mode": "private_lan",
"nat_type": "none",
"endpoint_candidates": [
{
"endpoint_id": "home-1-private",
"node_id": "home-1",
"transport": "direct_quic",
"address": "quic://192.168.200.85:19131",
"reachability": "private",
"connectivity_mode": "private_lan",
"nat_type": "none",
"priority": 35
}
]
}
}`),
ObservedAt: now,
},
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.PeerEndpoints["home-1"] != "quic://94.141.118.222:19199" {
t.Fatalf("desired advertise endpoint should win over private heartbeat endpoint: %+v", cfg.PeerEndpoints)
}
got := cfg.PeerEndpointCandidates["home-1"]
if len(got) != 2 {
t.Fatalf("expected desired and reported candidates: %+v", got)
}
if got[0].EndpointID != "home-1-desired-fabric-listener" ||
got[0].Address != "quic://94.141.118.222:19199" ||
got[0].Reachability != "public" ||
got[0].ConnectivityMode != "direct" ||
got[0].NATType != "port_restricted" ||
got[0].Priority != 0 {
t.Fatalf("unexpected desired candidate: %+v", got[0])
}
}
func TestGetNodeSyntheticMeshConfigUsesDesiredFabricListenerEndpointCandidates(t *testing.T) {
now := time.Date(2026, 5, 1, 9, 15, 0, 0, time.UTC)
version := "home-1-multi-provider"
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-home",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"home-1"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true,"hops":["node-a","home-1"]}`),
UpdatedAt: now,
},
},
desiredWorkloads: []NodeWorkloadDesiredState{
{
ClusterID: "cluster-1",
NodeID: "home-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
Version: &version,
Config: json.RawMessage(`{
"listen_addr":"0.0.0.0:18080",
"listen_port_mode":"manual",
"advertise_transport":"direct_quic",
"connectivity_mode":"private_lan",
"nat_type":"none",
"region":"home",
"endpoint_candidates":[
{
"endpoint_id":"home-1-lan",
"address":"quic://192.168.200.85:18080",
"transport":"direct_quic",
"reachability":"private",
"connectivity_mode":"private_lan",
"nat_type":"none",
"priority":1
},
{
"endpoint_id":"home-1-isp1",
"address":"quic://94.141.118.222:19199",
"transport":"direct_quic",
"reachability":"public",
"connectivity_mode":"direct",
"nat_type":"port_restricted",
"priority":2,
"metadata":{"provider":"isp1","maps_to":"192.168.200.85:18080"}
}
]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
candidates := cfg.PeerEndpointCandidates["home-1"]
if len(candidates) != 2 {
t.Fatalf("expected two desired candidates: %+v", candidates)
}
if cfg.PeerEndpoints["home-1"] != "quic://192.168.200.85:18080" {
t.Fatalf("expected first candidate as primary endpoint: %+v", cfg.PeerEndpoints)
}
if candidates[1].Address != "quic://94.141.118.222:19199" ||
candidates[1].Reachability != "public" ||
candidates[1].ConnectivityMode != "direct" ||
candidates[1].NATType != "port_restricted" {
t.Fatalf("unexpected public NAT candidate: %+v", candidates[1])
}
if !json.Valid(candidates[1].Metadata) || !strings.Contains(string(candidates[1].Metadata), "maps_to") {
t.Fatalf("expected NAT mapping metadata to survive: %s", candidates[1].Metadata)
}
}
func TestEnrichPeerEndpointCandidateCertPinsUsesMapsToAlias(t *testing.T) {
candidates := []PeerEndpointCandidate{
{
EndpointID: "home-1-public-isp1-19199",
NodeID: "home-1",
Transport: "direct_quic",
Address: "quic://94.141.118.222:19199",
Reachability: "public",
ConnectivityMode: "direct",
Metadata: json.RawMessage(`{"provider":"isp1","maps_to":"192.168.200.85:18080"}`),
},
{
EndpointID: "home-1-advertised",
NodeID: "home-1",
Transport: "direct_quic",
Address: "quic://192.168.200.85:18080",
Reachability: "private",
ConnectivityMode: "private_lan",
Metadata: json.RawMessage(`{"tls_cert_sha256":"98dedb5916486f97fb732b1603c9850f806be6954b96dc24968da7caca4090ef"}`),
},
}
enriched := enrichPeerEndpointCandidateCertPins(candidates)
if len(enriched) != 2 {
t.Fatalf("enriched candidates = %+v", enriched)
}
if got := peerEndpointCandidateTLSCertSHA256(enriched[0]); got != "98dedb5916486f97fb732b1603c9850f806be6954b96dc24968da7caca4090ef" {
t.Fatalf("public NAT-mapped candidate tls cert = %q", got)
}
if !strings.Contains(string(enriched[0].Metadata), "maps_to") {
t.Fatalf("maps_to metadata lost after enrichment: %s", enriched[0].Metadata)
}
}
func TestGetNodeSyntheticMeshConfigKeepsOperatorPublicJoinContractPeerBeyondWarmPeerTarget(t *testing.T) {
now := time.Date(2026, 5, 1, 9, 30, 0, 0, time.UTC)
version := "home-1-external-19199"
privateHeartbeat := func(nodeID string, port string) []NodeHeartbeat {
return []NodeHeartbeat{{
ClusterID: "cluster-1",
NodeID: nodeID,
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "` + nodeID + `",
"peer_endpoint": "quic://192.168.200.61:` + port + `",
"transport": "direct_quic",
"connectivity_mode": "private_lan",
"nat_type": "none",
"endpoint_candidates": [{
"endpoint_id": "` + nodeID + `-private",
"node_id": "` + nodeID + `",
"transport": "direct_quic",
"address": "quic://192.168.200.61:` + port + `",
"reachability": "private",
"connectivity_mode": "private_lan",
"nat_type": "none",
"priority": 35
}]
}
}`),
}}
}
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
clusterNodes: []ClusterNode{
{ID: "remote", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-5 * time.Hour), LastSeenAt: ptrTime(now)},
{ID: "test-1", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-4 * time.Hour), LastSeenAt: ptrTime(now.Add(-time.Second))},
{ID: "test-2", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-3 * time.Hour), LastSeenAt: ptrTime(now.Add(-2 * time.Second))},
{ID: "test-3", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-2 * time.Hour), LastSeenAt: ptrTime(now.Add(-3 * time.Second))},
{ID: "home-1", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-time.Hour), LastSeenAt: ptrTime(now.Add(-4 * time.Second))},
},
nodeRoles: map[string][]NodeRoleAssignment{
"remote": {{NodeID: "remote", Role: "core-mesh", Status: "active"}},
},
heartbeats: map[string][]NodeHeartbeat{
"remote": {{
ClusterID: "cluster-1",
NodeID: "remote",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "remote",
"connectivity_mode": "outbound_only",
"region": "office"
},
"fabric_listener_report": {
"inbound_reachability": "outbound_only",
"one_way_connectivity": true
},
"mesh_outbound_session_report": {
"status": "ready",
"fabric_control_endpoint": "quic://control.example.test/api/v1"
}
}`),
}},
"test-1": privateHeartbeat("test-1", "19131"),
"test-2": privateHeartbeat("test-2", "19132"),
"test-3": privateHeartbeat("test-3", "19133"),
"home-1": privateHeartbeat("home-1", "19131"),
},
desiredWorkloads: []NodeWorkloadDesiredState{{
ClusterID: "cluster-1",
NodeID: "home-1",
ServiceType: "fabric-listener",
DesiredState: "enabled",
Version: &version,
Config: json.RawMessage(`{
"listen_addr":"0.0.0.0:19131",
"listen_port_mode":"manual",
"advertise_endpoint":"quic://94.141.118.222:19199",
"advertise_transport":"direct_quic",
"connectivity_mode":"direct",
"nat_type":"port_restricted",
"region":"home"
}`),
UpdatedAt: now,
}},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "remote",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.PeerEndpoints["home-1"] != "quic://94.141.118.222:19199" {
t.Fatalf("operator public home peer should survive warm-peer target: %+v", cfg.PeerEndpoints)
}
homeCandidates := cfg.PeerEndpointCandidates["home-1"]
if len(homeCandidates) == 0 || homeCandidates[0].EndpointID != "home-1-desired-fabric-listener" {
t.Fatalf("home desired public candidate missing: %+v", homeCandidates)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "home-1"); !ok {
t.Fatalf("home peer directory entry missing: %+v", cfg.PeerDirectory)
}
}
func TestGetNodeSyntheticMeshConfigFiltersLoopbackReportedMeshEndpoint(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-b"]
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"node-b": {
{
ClusterID: "cluster-1",
NodeID: "node-b",
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"schema_version": "c17z25.mesh_endpoint_report.v1",
"cluster_id": "cluster-1",
"node_id": "node-b",
"peer_endpoint": "quic://127.0.0.1:19131",
"transport": "direct_quic",
"connectivity_mode": "private_lan",
"nat_type": "none",
"endpoint_candidates": [
{
"endpoint_id": "node-b-loopback",
"node_id": "node-b",
"transport": "direct_quic",
"address": "quic://127.0.0.1:19131",
"reachability": "private",
"connectivity_mode": "private_lan",
"nat_type": "none",
"priority": 1
},
{
"endpoint_id": "node-b-lan",
"node_id": "node-b",
"transport": "direct_quic",
"address": "quic://192.168.10.20:19131",
"reachability": "private",
"connectivity_mode": "private_lan",
"nat_type": "none",
"priority": 2
}
]
}
}`),
ObservedAt: now,
},
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if _, leaked := cfg.PeerEndpoints["node-b"]; leaked {
t.Fatalf("loopback peer endpoint leaked: %+v", cfg.PeerEndpoints)
}
if got := cfg.PeerEndpointCandidates["node-b"]; len(got) != 1 || got[0].EndpointID != "node-b-lan" || got[0].Address != "quic://192.168.10.20:19131" {
t.Fatalf("loopback candidates not filtered correctly: %+v", cfg.PeerEndpointCandidates)
}
entry, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-b")
if !ok || entry.EndpointCount != 0 || entry.CandidateCount != 1 {
t.Fatalf("peer directory should expose only usable candidate: %+v", cfg.PeerDirectory)
}
}
func TestScopedPeerEndpointsFiltersLoopbackPolicyEndpoints(t *testing.T) {
got := scopedPeerEndpoints(map[string]string{
"node-a": "quic://127.0.0.1:19131",
"node-b": "quic://0.0.0.0:19132",
"node-c": "quic://192.168.10.20:19133",
"node-d": "quic://localhost:19134",
}, []string{"node-a", "node-b", "node-c", "node-d"})
if len(got) != 1 || got["node-c"] != "quic://192.168.10.20:19133" {
t.Fatalf("loopback/wildcard policy endpoints leaked: %+v", got)
}
}
func TestGetNodeSyntheticMeshConfigJoinContractsCoreMeshPeersFromHealthyNodes(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
clusterNodes: []ClusterNode{
{ID: "node-a", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-2 * time.Hour), LastSeenAt: ptrTime(now)},
{ID: "node-b", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-time.Hour), LastSeenAt: ptrTime(now.Add(-time.Second))},
{ID: "node-c", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-30 * time.Minute), LastSeenAt: ptrTime(now.Add(-2 * time.Second))},
},
nodeRoles: map[string][]NodeRoleAssignment{
"node-a": {{NodeID: "node-a", Role: "core-mesh", Status: "active"}},
},
heartbeats: map[string][]NodeHeartbeat{
"node-b": {{
ClusterID: "cluster-1",
NodeID: "node-b",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "node-b",
"peer_endpoint": "quic://10.0.0.2:19131",
"transport": "direct_quic",
"connectivity_mode": "private_lan"
}
}`),
}},
"node-c": {{
ClusterID: "cluster-1",
NodeID: "node-c",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "node-c",
"endpoint_candidates": [{
"endpoint_id": "node-c-lan",
"node_id": "node-c",
"transport": "direct_quic",
"address": "quic://10.0.0.3:19131",
"reachability": "private",
"connectivity_mode": "private_lan",
"priority": 1
}]
}
}`),
}},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.PeerEndpoints["node-b"] != "quic://10.0.0.2:19131" {
t.Fatalf("reported peer endpoint not joinped: %+v", cfg.PeerEndpoints)
}
if got := cfg.PeerEndpointCandidates["node-c"]; len(got) != 1 || got[0].EndpointID != "node-c-lan" {
t.Fatalf("reported peer candidates not joinped: %+v", cfg.PeerEndpointCandidates)
}
if len(cfg.RecoverySeeds) != 2 {
t.Fatalf("RecoverySeeds = %+v, want two core mesh join seeds", cfg.RecoverySeeds)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-b"); !ok {
t.Fatalf("peer directory missing node-b: %+v", cfg.PeerDirectory)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-c"); !ok {
t.Fatalf("peer directory missing node-c: %+v", cfg.PeerDirectory)
}
}
func TestGetNodeSyntheticMeshConfigScopesPrivateJoinContractPeersForOutboundOnlyNode(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
clusterNodes: []ClusterNode{
{ID: "node-local", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-2 * time.Hour), LastSeenAt: ptrTime(now)},
{ID: "node-peer", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-time.Hour), LastSeenAt: ptrTime(now.Add(-time.Second))},
{ID: "node-relay", RegistrationStatus: NodeRegistrationActive, HealthStatus: "healthy", MembershipStatus: "active", CreatedAt: now.Add(-90 * time.Minute), LastSeenAt: ptrTime(now.Add(-2 * time.Second))},
},
nodeRoles: map[string][]NodeRoleAssignment{
"node-local": {{NodeID: "node-local", Role: "core-mesh", Status: "active"}},
},
heartbeats: map[string][]NodeHeartbeat{
"node-local": {{
ClusterID: "cluster-1",
NodeID: "node-local",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "node-local",
"connectivity_mode": "outbound_only",
"region": "office"
},
"fabric_listener_report": {
"inbound_reachability": "outbound_only",
"one_way_connectivity": true
},
"mesh_outbound_session_report": {
"status": "ready",
"fabric_control_endpoint": "quic://control.example.test/api/v1"
}
}`),
}},
"node-peer": {{
ClusterID: "cluster-1",
NodeID: "node-peer",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "node-peer",
"peer_endpoint": "quic://192.168.200.61:19133",
"transport": "direct_quic",
"connectivity_mode": "private_lan",
"endpoint_candidates": [{
"endpoint_id": "node-peer-lan",
"node_id": "node-peer",
"transport": "direct_quic",
"address": "quic://192.168.200.61:19133",
"reachability": "private",
"connectivity_mode": "private_lan",
"priority": 1
}]
}
}`),
}},
"node-relay": {{
ClusterID: "cluster-1",
NodeID: "node-relay",
ObservedAt: now,
Metadata: json.RawMessage(`{
"mesh_endpoint_report": {
"cluster_id": "cluster-1",
"node_id": "node-relay",
"peer_endpoint": "quic://relay.example.test:19131",
"transport": "direct_quic",
"connectivity_mode": "direct",
"region": "public",
"endpoint_candidates": [{
"endpoint_id": "node-relay-public",
"node_id": "node-relay",
"transport": "direct_quic",
"address": "quic://relay.example.test:19131",
"reachability": "public",
"connectivity_mode": "direct",
"priority": 1
}]
}
}`),
}},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-local",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if endpoint := cfg.PeerEndpoints["node-peer"]; endpoint != "" {
t.Fatalf("private peer endpoint leaked to outbound-only node: %q", endpoint)
}
candidates := cfg.PeerEndpointCandidates["node-peer"]
if len(candidates) != 1 {
t.Fatalf("peer candidates = %+v, want relay-required candidate", cfg.PeerEndpointCandidates)
}
candidate := candidates[0]
if candidate.Transport != "relay_quic" || candidate.Reachability != "relay" || candidate.ConnectivityMode != "relay_required" {
t.Fatalf("candidate not converted to relay required: %+v", candidate)
}
if !containsString(candidate.PolicyTags, "offsite-private-lan-blocked") || !containsString(candidate.PolicyTags, "relay-required") {
t.Fatalf("candidate missing offsite relay tags: %+v", candidate.PolicyTags)
}
for _, seed := range cfg.RecoverySeeds {
if seed.NodeID == "node-peer" {
t.Fatalf("private recovery seed leaked to outbound-only node: %+v", cfg.RecoverySeeds)
}
}
entry, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-peer")
if !ok || entry.EndpointCount != 0 || entry.CandidateCount != 2 {
t.Fatalf("peer directory should show relay-required candidate and join lease: %+v", cfg.PeerDirectory)
}
if len(cfg.RendezvousLeases) != 1 {
t.Fatalf("rendezvous leases = %+v, want one control-plane join lease", cfg.RendezvousLeases)
}
lease := cfg.RendezvousLeases[0]
if lease.PeerNodeID != "node-peer" ||
lease.RelayNodeID != "node-relay" ||
lease.RelayEndpoint != "quic://relay.example.test:19131" ||
lease.Transport != "relay_quic" ||
lease.Reason != "farm_mesh_join_relay" ||
!lease.ControlPlaneOnly {
t.Fatalf("unexpected join rendezvous lease: %+v", lease)
}
}
func TestGetNodeSyntheticMeshConfigIssuesRendezvousRelayLeases(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-r", "node-b"],
"allowed_channels": ["fabric_control", "route_control", "service_payload"],
"peer_endpoints": {
"node-r": "quic://node-r:19000"
},
"peer_endpoint_candidates": {
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 20
}
]
},
"rendezvous_leases": [
{
"peer_node_id": "node-b",
"relay_node_id": "node-r",
"relay_endpoint": "quic://node-r:19000",
"priority": 5
}
]
}`),
UpdatedAt: now,
},
{
ID: "route-x-y",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-x"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-y"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-x", "node-y"],
"peer_endpoints": {"node-x": "quic://node-x:19000"},
"rendezvous_leases": [
{
"peer_node_id": "node-y",
"relay_node_id": "node-x",
"relay_endpoint": "quic://node-x:19000"
}
]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if cfg.SchemaVersion != "c17z18.synthetic.v1" {
t.Fatalf("schema version = %s, want c17z18.synthetic.v1", cfg.SchemaVersion)
}
if len(cfg.RendezvousLeases) != 1 {
t.Fatalf("unexpected rendezvous leases: %+v", cfg.RendezvousLeases)
}
lease := cfg.RendezvousLeases[0]
if lease.LeaseID != "route-a-b-rv-node-b-via-node-r" ||
lease.PeerNodeID != "node-b" ||
lease.RelayNodeID != "node-r" ||
lease.RelayEndpoint != "quic://node-r:19000" ||
lease.Transport != "relay_quic" ||
lease.Priority != 5 ||
!lease.ControlPlaneOnly ||
!containsString(lease.AllowedChannels, "fabric_control") ||
containsString(lease.AllowedChannels, "service_payload") {
t.Fatalf("unexpected rendezvous lease contract: %+v", lease)
}
if _, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-y"); ok {
t.Fatalf("unrelated rendezvous lease leaked into peer directory: %+v", cfg.PeerDirectory)
}
nodeB, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-b")
if !ok || !containsString(nodeB.ConnectivityModes, "relay_required") {
t.Fatalf("peer directory missing rendezvous peer mode: %+v", cfg.PeerDirectory)
}
nodeR, ok := findPeerDirectoryEntry(cfg.PeerDirectory, "node-r")
if !ok || !containsString(nodeR.ConnectivityModes, "relay_quic") {
t.Fatalf("peer directory missing relay control mode: %+v", cfg.PeerDirectory)
}
}
func TestGetNodeSyntheticMeshConfigReplacesStaleRendezvousRelay(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 30, 0, 0, time.UTC)
staleHeartbeatMetadata, err := json.Marshal(map[string]any{
"mesh_rendezvous_lease_report": map[string]any{
"schema_version": "c17z18.mesh_rendezvous_lease_report.v1",
"cluster_id": "cluster-1",
"node_id": "node-a",
"observed_at": now.Format(time.RFC3339Nano),
"leases": []map[string]any{
{
"lease_id": "route-a-b-rv-node-b-via-node-r-old",
"peer_node_id": "node-b",
"relay_node_id": "node-r-old",
"route_ids": []string{"route-a-b"},
"stale_relay": true,
"reselection_needed": true,
"connection_state": "degraded",
"reason": "auto_outbound_only",
},
},
},
})
if err != nil {
t.Fatalf("marshal heartbeat metadata: %v", err)
}
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
heartbeats: map[string][]NodeHeartbeat{
"node-a": {
{
ClusterID: "cluster-1",
NodeID: "node-a",
Metadata: staleHeartbeatMetadata,
ObservedAt: now.Add(-10 * time.Second),
},
},
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-r-old", "node-r-new", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"peer_endpoints": {
"node-r-old": "quic://node-r-old:19000",
"node-r-new": "quic://node-r-new:19000"
},
"peer_endpoint_candidates": {
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 5
}
]
},
"rendezvous_leases": [
{
"lease_id": "route-a-b-rv-node-b-via-node-r-old",
"peer_node_id": "node-b",
"relay_node_id": "node-r-old",
"relay_endpoint": "quic://node-r-old:19000",
"priority": 4
}
]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if len(cfg.RendezvousLeases) != 1 {
t.Fatalf("unexpected rendezvous leases: %+v", cfg.RendezvousLeases)
}
lease := cfg.RendezvousLeases[0]
if lease.RelayNodeID != "node-r-new" ||
lease.LeaseID != "route-a-b-rv-node-b-via-node-r-new" ||
lease.Reason != "stale_relay_replacement" {
t.Fatalf("stale relay was not replaced: %+v", lease)
}
var metadata map[string]any
if err := json.Unmarshal(lease.Metadata, &metadata); err != nil {
t.Fatalf("unmarshal lease metadata: %v", err)
}
if metadata["replacement_for_stale_relay"] != true ||
metadata["relay_replacement_contract"] != "stale_relay_feedback_policy" {
t.Fatalf("replacement metadata missing: %+v", metadata)
}
if cfg.RendezvousRelayPolicy == nil ||
cfg.RendezvousRelayPolicy.StaleRelayCount != 1 ||
cfg.RendezvousRelayPolicy.WithdrawnLeaseCount != 1 ||
cfg.RendezvousRelayPolicy.ReplacementLeaseCount != 1 {
t.Fatalf("unexpected relay policy report: %+v", cfg.RendezvousRelayPolicy)
}
var decision RendezvousRelayPolicyDecision
for _, item := range cfg.RendezvousRelayPolicy.Decisions {
if item.Reason == "stale_relay_replacement" {
decision = item
break
}
}
if decision.SelectedRelayID != "node-r-new" || decision.StaleRelayNodeID != "node-r-old" {
t.Fatalf("unexpected relay replacement decision: %+v", cfg.RendezvousRelayPolicy.Decisions)
}
if cfg.RoutePathDecisions == nil ||
cfg.RoutePathDecisions.SchemaVersion != "c17z18.route_path_decisions.v1" ||
cfg.RoutePathDecisions.DecisionCount != 1 ||
cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("unexpected route path decisions: %+v", cfg.RoutePathDecisions)
}
pathDecision := cfg.RoutePathDecisions.Decisions[0]
if pathDecision.DecisionSource != "stale_relay_replacement" ||
pathDecision.SelectedRelayID != "node-r-new" ||
pathDecision.StaleRelayNodeID != "node-r-old" ||
pathDecision.RendezvousPeerNodeID != "node-b" ||
pathDecision.RendezvousLeaseID != "route-a-b-rv-node-b-via-node-r-new" ||
pathDecision.NextHopID != "node-r-new" ||
pathDecision.ProductionForwarding ||
!pathDecision.ControlPlaneOnly ||
strings.Join(pathDecision.EffectiveHops, ",") != "node-a,node-r-new,node-b" {
t.Fatalf("unexpected route path decision: %+v", pathDecision)
}
}
func TestGetNodeSyntheticMeshConfigAppliesReplacementPathHintForExit(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 30, 0, 0, time.UTC)
hintMetadata, err := json.Marshal(map[string]any{
"mesh_route_path_decision_report": map[string]any{
"cluster_id": "cluster-1",
"node_id": "node-a",
"decisions": []map[string]any{
{
"decision_id": "route-a-b-path-node-a-via-node-r-new",
"route_id": "route-a-b",
"cluster_id": "cluster-1",
"local_node_id": "node-a",
"source_node_id": "node-a",
"destination_node_id": "node-b",
"original_hops": []string{"node-a", "node-r-old", "node-r-new", "node-b"},
"effective_hops": []string{"node-a", "node-r-new", "node-b"},
"next_hop_id": "node-r-new",
"local_role": "entry",
"selected_relay_id": "node-r-new",
"selected_relay_endpoint": "quic://node-r-new:19000",
"stale_relay_node_id": "node-r-old",
"rendezvous_peer_node_id": "node-b",
"rendezvous_lease_id": "route-a-b-rv-node-b-via-node-r-new",
"rendezvous_lease_reason": "stale_relay_replacement",
"decision_source": "stale_relay_replacement",
"generation": "hint-generation",
"path_score": 900,
"score_reasons": []string{"route_path_decision_hint"},
"control_plane_only": true,
"production_forwarding": false,
"expires_at": now.Add(time.Hour).UTC().Format(time.RFC3339Nano),
},
},
},
})
if err != nil {
t.Fatalf("marshal hint metadata: %v", err)
}
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
heartbeats: map[string][]NodeHeartbeat{
"node-a": {
{
ClusterID: "cluster-1",
NodeID: "node-a",
Metadata: hintMetadata,
ObservedAt: now.Add(-10 * time.Second),
},
},
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-r-old", "node-r-new", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"peer_endpoints": {
"node-r-old": "quic://node-r-old:19000",
"node-r-new": "quic://node-r-new:19000"
},
"peer_endpoint_candidates": {
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 5
}
]
},
"rendezvous_leases": [
{
"lease_id": "route-a-b-rv-node-b-via-node-r-old",
"peer_node_id": "node-b",
"relay_node_id": "node-r-old",
"relay_endpoint": "quic://node-r-old:19000",
"priority": 4
}
]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-b",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if len(cfg.RendezvousLeases) != 1 ||
cfg.RendezvousLeases[0].RelayNodeID != "node-r-new" ||
cfg.RendezvousLeases[0].Reason != "stale_relay_replacement" {
t.Fatalf("replacement hint did not withdraw stale relay lease: %+v", cfg.RendezvousLeases)
}
if cfg.RoutePathDecisions == nil ||
cfg.RoutePathDecisions.ReplacementDecisionCount != 1 ||
len(cfg.RoutePathDecisions.Decisions) != 1 {
t.Fatalf("unexpected route path decisions: %+v", cfg.RoutePathDecisions)
}
decision := cfg.RoutePathDecisions.Decisions[0]
if decision.DecisionSource != "stale_relay_replacement" ||
decision.LocalRole != "exit" ||
decision.PreviousHopID != "node-r-new" ||
decision.SelectedRelayID != "node-r-new" ||
decision.StaleRelayNodeID != "node-r-old" ||
decision.RendezvousPeerNodeID != "node-b" ||
strings.Join(decision.EffectiveHops, ",") != "node-a,node-r-new,node-b" {
t.Fatalf("unexpected hinted route path decision: %+v", decision)
}
}
func TestRoutePathDecisionUsesRendezvousLeaseForPassiveNATRoute(t *testing.T) {
now := time.Date(2026, 5, 17, 3, 45, 0, 0, time.UTC)
route := SyntheticMeshRouteConfig{
RouteID: "route-a-b",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-b"},
AllowedChannels: []string{"fabric_control", "route_control"},
ExpiresAt: now.Add(time.Hour),
}
decision := routePathDecisionForRoute(route, "node-a", []PeerRendezvousLease{{
LeaseID: "route-a-b-rv-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r.example.test:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-a-b"},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now,
ExpiresAt: now.Add(time.Hour),
Reason: "auto_rendezvous_required",
}}, newRendezvousRelayPolicy("node-a", nil, now), "generation-1", fabricServiceChannelRouteFeedback{})
if decision.DecisionSource != "rendezvous_relay_required" ||
decision.SelectedRelayID != "node-r" ||
decision.SelectedRelayEndpoint != "quic://node-r.example.test:19443" ||
decision.RendezvousPeerNodeID != "node-b" ||
decision.RendezvousLeaseID != "route-a-b-rv-node-b-via-node-r" ||
decision.RendezvousLeaseReason != "auto_rendezvous_required" ||
decision.NextHopID != "node-r" ||
decision.LocalRole != "entry" ||
strings.Join(decision.EffectiveHops, ",") != "node-a,node-r,node-b" ||
!decision.ControlPlaneOnly ||
decision.ProductionForwarding {
t.Fatalf("unexpected rendezvous route path decision: %+v", decision)
}
}
func TestScopedRendezvousLeasesKeepsOperatorPassiveNATLeaseWhenRelayFeedbackIsStale(t *testing.T) {
now := time.Date(2026, 5, 17, 5, 15, 0, 0, time.UTC)
route := SyntheticMeshRouteConfig{
RouteID: "route-a-b",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-b"},
AllowedChannels: []string{"fabric_control", "route_control"},
ExpiresAt: now.Add(time.Hour),
}
lease := PeerRendezvousLease{
LeaseID: "route-a-b-rv-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "quic://node-r.example.test:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-a-b"},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now,
ExpiresAt: now.Add(time.Hour),
Reason: "operator_rendezvous_required_for_passive_nat",
}
relayPolicy := newRendezvousRelayPolicy("node-a", nil, now)
relayPolicy.addFeedback([]rendezvousRelayFeedbackEntry{{
RouteIDs: []string{"route-a-b"},
PeerNodeID: "node-b",
RelayNodeID: "node-r",
LeaseID: "route-a-b-rv-node-b-via-node-r",
ReporterNodeID: "node-a",
}})
leases := scopedRendezvousLeases([]PeerRendezvousLease{lease}, route, "node-a", relayPolicy, now)
if len(leases) != 1 || leases[0].LeaseID != lease.LeaseID {
t.Fatalf("operator passive NAT lease must remain scoped despite stale feedback: %+v", leases)
}
if report := relayPolicy.report(); report != nil && report.WithdrawnLeaseCount != 0 {
t.Fatalf("operator passive NAT lease must not be withdrawn: %+v", report)
}
}
func TestDerivedRendezvousLeaseCanSelectRelayOutsideOriginalPath(t *testing.T) {
now := time.Date(2026, 5, 17, 4, 30, 0, 0, time.UTC)
route := SyntheticMeshRouteConfig{
RouteID: "route-a-b",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []string{"node-a", "node-b"},
AllowedChannels: []string{"fabric_control", "route_control"},
ExpiresAt: now.Add(time.Hour),
}
leases := derivedRendezvousLeases(route, map[string]string{}, map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-private",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://10.10.10.20:19131",
Reachability: "private",
ConnectivityMode: "private_lan",
Region: "remote-lan",
Priority: 5,
LastVerifiedAt: &now,
},
},
"node-r": {
{
EndpointID: "node-r-public",
NodeID: "node-r",
Transport: "direct_quic",
Address: "quic://203.0.113.10:19131",
Reachability: "public",
ConnectivityMode: "direct",
Region: "internet",
Priority: 10,
PolicyTags: []string{"fast-path"},
LastVerifiedAt: &now,
},
},
}, "node-a", endpointPerspective{Region: "home-lan"}, newRendezvousRelayPolicy("node-a", nil, now), now)
if len(leases) != 1 ||
leases[0].PeerNodeID != "node-b" ||
leases[0].RelayNodeID != "node-r" ||
leases[0].RelayEndpoint != "quic://203.0.113.10:19131" ||
leases[0].Reason != "auto_rendezvous_required" {
t.Fatalf("unexpected derived rendezvous leases: %+v", leases)
}
}
func TestGetNodeSyntheticMeshConfigIncludesRendezvousRelayOutsideOriginalHops(t *testing.T) {
now := time.Date(2026, 5, 17, 4, 15, 0, 0, time.UTC)
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "vpn_packets",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"expires_at": "2026-05-17T05:15:00Z",
"rendezvous_leases": [
{
"lease_id": "route-a-b-rv-node-b-via-node-r",
"peer_node_id": "node-b",
"relay_node_id": "node-r",
"relay_endpoint": "quic://node-r.example.test:19443",
"transport": "relay_quic",
"connectivity_mode": "relay_required",
"route_ids": ["route-a-b"],
"allowed_channels": ["fabric_control", "route_control"],
"priority": 10,
"control_plane_only": true,
"expires_at": "2026-05-17T05:15:00Z",
"reason": "auto_rendezvous_required"
}
]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-r",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if len(cfg.Routes) != 1 || strings.Join(cfg.Routes[0].Hops, ",") != "node-a,node-r,node-b" {
t.Fatalf("relay scoped route missing effective hops: %+v", cfg.Routes)
}
if cfg.RoutePathDecisions == nil || len(cfg.RoutePathDecisions.Decisions) != 1 {
t.Fatalf("relay route path decision missing: %+v", cfg.RoutePathDecisions)
}
decision := cfg.RoutePathDecisions.Decisions[0]
if decision.SelectedRelayID != "node-r" ||
decision.LocalRole != "selected_relay" ||
decision.PreviousHopID != "node-a" ||
decision.NextHopID != "node-b" ||
strings.Join(decision.EffectiveHops, ",") != "node-a,node-r,node-b" {
t.Fatalf("unexpected relay scoped decision: %+v", decision)
}
}
func TestGetNodeSyntheticMeshConfigUsesRouteHealthDriftToReselectRelay(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 30, 0, 0, time.UTC)
routeHealthMetadata, err := json.Marshal(map[string]any{
"observation_type": "synthetic_route_health",
"route_id": "route-a-b",
"route_path_decision_applied": true,
"route_path_decision_selected_relay_id": "node-s",
"route_path_decision_rendezvous_peer_node_id": "node-b",
"route_path_decision_rendezvous_lease_id": "route-a-b-rv-node-b-via-node-s",
"route_path_decision_rendezvous_lease_reason": "auto_rendezvous_required",
"expected_effective_hops": []string{"node-a", "node-s", "node-b"},
"observed_ack_path": []string{"node-a", "node-t", "node-b"},
"route_path_drift_detected": true,
"control_plane_only": true,
"production_forwarding": false,
"production_payload_forwarding": false,
"route_health_production_payload_forwarding": false,
"route_health_service_payload_forwarding": false,
"synthetic_route_health_route_path_runtime": true,
"production_route_path_forwarding_runtime": false,
"route_health_route_config_contract": "control_plane_route_path_decisions_to_synthetic_route_health",
})
if err != nil {
t.Fatalf("marshal route health metadata: %v", err)
}
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
meshLinks: []MeshLinkObservation{
{
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetNodeID: "node-b",
LinkStatus: "reachable",
Metadata: routeHealthMetadata,
ObservedAt: now.Add(-10 * time.Second),
},
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-s", "node-t", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"peer_endpoint_candidates": {
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 5
}
],
"node-s": [
{
"endpoint_id": "node-s-public",
"node_id": "node-s",
"transport": "direct_quic",
"address": "quic://node-s:19000",
"reachability": "public",
"nat_type": "none",
"connectivity_mode": "direct",
"priority": 1,
"policy_tags": ["fast-path"]
}
],
"node-t": [
{
"endpoint_id": "node-t-public",
"node_id": "node-t",
"transport": "direct_quic",
"address": "quic://node-t:19000",
"reachability": "public",
"nat_type": "none",
"connectivity_mode": "direct",
"priority": 50
}
]
}
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if len(cfg.RendezvousLeases) != 1 {
t.Fatalf("unexpected rendezvous leases: %+v", cfg.RendezvousLeases)
}
lease := cfg.RendezvousLeases[0]
if lease.RelayNodeID != "node-t" || lease.Reason != "stale_relay_replacement" {
t.Fatalf("route health drift did not reselect relay: %+v", lease)
}
if cfg.RendezvousRelayPolicy == nil ||
cfg.RendezvousRelayPolicy.StaleRelayCount != 1 ||
cfg.RendezvousRelayPolicy.ReplacementLeaseCount != 1 ||
cfg.RendezvousRelayPolicy.ScoringMode != "route_adjacency_endpoint_priority_mesh_link_health_synthetic_route_health_feedback" {
t.Fatalf("unexpected relay policy report: %+v", cfg.RendezvousRelayPolicy)
}
var policyDecision RendezvousRelayPolicyDecision
for _, item := range cfg.RendezvousRelayPolicy.Decisions {
if item.Reason == "stale_relay_replacement" {
policyDecision = item
break
}
}
if policyDecision.StaleRelayNodeID != "node-s" || policyDecision.SelectedRelayID != "node-t" || policyDecision.PeerNodeID != "node-b" {
t.Fatalf("unexpected route health replacement decision: %+v", cfg.RendezvousRelayPolicy.Decisions)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("expected replacement route path decision: %+v", cfg.RoutePathDecisions)
}
decision := cfg.RoutePathDecisions.Decisions[0]
if decision.SelectedRelayID != "node-t" ||
decision.StaleRelayNodeID != "node-s" ||
decision.RendezvousPeerNodeID != "node-b" ||
strings.Join(decision.EffectiveHops, ",") != "node-a,node-t,node-b" ||
decision.ProductionForwarding ||
!decision.ControlPlaneOnly {
t.Fatalf("unexpected route path decision from route health feedback: %+v", decision)
}
}
func TestGetNodeSyntheticMeshConfigUsesRouteHealthLatencyForRelayScore(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 30, 0, 0, time.UTC)
routeHealthMetadata, err := json.Marshal(map[string]any{
"observation_type": "synthetic_route_health",
"route_id": "route-a-b",
"route_path_decision_applied": true,
"route_path_decision_selected_relay_id": "node-t",
"route_path_decision_rendezvous_peer_node_id": "node-b",
"expected_effective_hops": []string{"node-a", "node-t", "node-b"},
"observed_ack_path": []string{"node-a", "node-t", "node-b"},
"route_path_drift_detected": false,
"control_plane_only": true,
"production_forwarding": false,
"production_payload_forwarding": false,
"route_health_production_payload_forwarding": false,
"route_health_service_payload_forwarding": false,
})
if err != nil {
t.Fatalf("marshal route health metadata: %v", err)
}
latency := 5
quality := 99
service := NewService(&fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
meshLinks: []MeshLinkObservation{
{
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetNodeID: "node-b",
LinkStatus: "reachable",
LatencyMs: &latency,
QualityScore: &quality,
Metadata: routeHealthMetadata,
ObservedAt: now.Add(-10 * time.Second),
},
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"node-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"node-b"}`),
ServiceClass: "synthetic",
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["node-a", "node-s", "node-t", "node-b"],
"allowed_channels": ["fabric_control", "route_control"],
"peer_endpoint_candidates": {
"node-b": [
{
"endpoint_id": "node-b-outbound",
"node_id": "node-b",
"transport": "reverse_quic",
"address": "node-b.reverse.local",
"reachability": "outbound_only",
"nat_type": "symmetric",
"connectivity_mode": "outbound_only",
"priority": 5
}
],
"node-s": [
{
"endpoint_id": "node-s-public",
"node_id": "node-s",
"transport": "direct_quic",
"address": "quic://node-s:19000",
"reachability": "public",
"nat_type": "none",
"connectivity_mode": "direct",
"priority": 1,
"policy_tags": ["fast-path"]
}
],
"node-t": [
{
"endpoint_id": "node-t-public",
"node_id": "node-t",
"transport": "direct_quic",
"address": "quic://node-t:19000",
"reachability": "public",
"nat_type": "none",
"connectivity_mode": "direct",
"priority": 50
}
]
}
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "node-a",
})
if err != nil {
t.Fatalf("get synthetic config: %v", err)
}
if len(cfg.RendezvousLeases) != 1 {
t.Fatalf("unexpected rendezvous leases: %+v", cfg.RendezvousLeases)
}
lease := cfg.RendezvousLeases[0]
if lease.RelayNodeID != "node-t" || lease.Reason == "stale_relay_replacement" {
t.Fatalf("route health latency did not influence relay score: %+v", lease)
}
var metadata map[string]any
if err := json.Unmarshal(lease.Metadata, &metadata); err != nil {
t.Fatalf("unmarshal lease metadata: %v", err)
}
reasons, _ := metadata["relay_selection_score_reasons"].([]any)
if !anyString(reasons, "route_health_reachable") ||
!anyString(reasons, "route_health_no_drift") ||
!anyString(reasons, "route_health_latency") {
t.Fatalf("route health score reasons missing: %+v", metadata)
}
}
func anyString(values []any, want string) bool {
for _, value := range values {
if text, ok := value.(string); ok && text == want {
return true
}
}
return false
}
func findPeerDirectoryEntry(entries []PeerDirectoryEntry, nodeID string) (PeerDirectoryEntry, bool) {
for _, entry := range entries {
if entry.NodeID == nodeID {
return entry, true
}
}
return PeerDirectoryEntry{}, false
}
func TestValidatePeerEndpointCandidates(t *testing.T) {
valid := map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_quic",
Address: "203.0.113.20:443",
AddressFamily: "ipv4",
Reachability: "public",
NATType: "restricted",
ConnectivityMode: "direct",
Priority: 10,
Metadata: json.RawMessage(`{"source":"test"}`),
},
{
EndpointID: "node-b-lan",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://192.168.10.20:19131",
Reachability: "private",
NATType: "private_lan",
ConnectivityMode: "private_lan",
Priority: 5,
Metadata: json.RawMessage(`{"source":"heartbeat"}`),
},
},
}
if err := validatePeerEndpointCandidates(valid, []string{"node-a", "node-b"}); err != nil {
t.Fatalf("validate valid candidates: %v", err)
}
tests := []struct {
name string
candidates map[string][]PeerEndpointCandidate
}{
{
name: "unknown transport",
candidates: map[string][]PeerEndpointCandidate{"node-b": {{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "udp-hole-punch",
Address: "203.0.113.20:443",
Reachability: "public",
ConnectivityMode: "direct",
}}},
},
{
name: "unknown nat",
candidates: map[string][]PeerEndpointCandidate{"node-b": {{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_quic",
Address: "203.0.113.20:443",
Reachability: "public",
NATType: "mystery_nat",
ConnectivityMode: "direct",
}}},
},
{
name: "node outside route path",
candidates: map[string][]PeerEndpointCandidate{"node-y": {{
EndpointID: "node-y-public",
NodeID: "node-y",
Transport: "direct_quic",
Address: "203.0.113.30:443",
Reachability: "public",
ConnectivityMode: "direct",
}}},
},
{
name: "node mismatch",
candidates: map[string][]PeerEndpointCandidate{"node-b": {{
EndpointID: "node-b-public",
NodeID: "node-c",
Transport: "direct_quic",
Address: "203.0.113.20:443",
Reachability: "public",
ConnectivityMode: "direct",
}}},
},
{
name: "invalid metadata",
candidates: map[string][]PeerEndpointCandidate{"node-b": {{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_quic",
Address: "203.0.113.20:443",
Reachability: "public",
ConnectivityMode: "direct",
Metadata: json.RawMessage(`{`),
}}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := validatePeerEndpointCandidates(tt.candidates, []string{"node-a", "node-b"})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
})
}
}
func TestDedupePeerEndpointCandidatesKeepsDistinctFabricTargets(t *testing.T) {
input := map[string][]PeerEndpointCandidate{
"node-b": {
{EndpointID: "node-b-lan", NodeID: "node-b", Address: "quic://192.168.10.20:19131", Transport: "direct_quic", ConnectivityMode: "private_lan", Priority: 10},
{EndpointID: "node-b-lan", NodeID: "node-b", Address: "quic://192.168.10.20:19131", Transport: "direct_quic", ConnectivityMode: "private_lan", Priority: 10},
{EndpointID: "node-b-public", NodeID: "node-b", Address: "quic://203.0.113.20:19131", Transport: "direct_quic", ConnectivityMode: "direct", Priority: 20},
},
}
got := dedupePeerEndpointCandidatesMap(input)
if len(got["node-b"]) != 2 {
t.Fatalf("deduped candidates = %+v, want 2 distinct targets", got["node-b"])
}
if got["node-b"][0].EndpointID != "node-b-lan" {
t.Fatalf("first candidate = %+v, want priority ordered lan candidate", got["node-b"][0])
}
}
func TestMinorityClusterBlocksPolicyMutation(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "minority",
MutationMode: "read_only",
},
}
service := NewService(store)
_, err := service.AssignNodeRole(context.Background(), AssignNodeRoleInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Role: "rdp-worker",
})
if !errors.Is(err, ErrClusterReadOnly) {
t.Fatalf("err = %v, want ErrClusterReadOnly", err)
}
}
func TestRecoveryAdminCanMutateReadOnlyCluster(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleRecoveryAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "isolated",
MutationMode: "read_only",
},
}
service := NewService(store)
_, err := service.AssignNodeRole(context.Background(), AssignNodeRoleInput{
ActorUserID: "recovery-1",
ClusterID: "cluster-1",
NodeID: "node-1",
Role: "rdp-worker",
})
if err != nil {
t.Fatalf("recovery admin mutate: %v", err)
}
}
func TestCreateVPNConnectionRequiresPlatformAdmin(t *testing.T) {
store := &fakeRepository{platformRole: "user"}
service := NewService(store)
_, err := service.CreateVPNConnection(context.Background(), CreateVPNConnectionInput{
ActorUserID: "user-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
Name: "office-a",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestCreateVPNConnectionDefaultsToDisabledSingleActive(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
item, err := service.CreateVPNConnection(context.Background(), CreateVPNConnectionInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
Name: "office-a",
})
if err != nil {
t.Fatalf("create vpn connection: %v", err)
}
if item.Mode != VPNConnectionModeSingleActive || item.DesiredState != VPNConnectionDesiredDisabled {
t.Fatalf("unexpected defaults: %+v", item)
}
if string(store.lastVPNConnectionInput.AllowedNodePolicy) == "" || string(store.lastVPNConnectionInput.RoutingUsage) == "" {
t.Fatalf("expected default json policies, got %+v", store.lastVPNConnectionInput)
}
}
func TestCreateVPNConnectionRequiresClusterAndOrganizationScope(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.CreateVPNConnection(context.Background(), CreateVPNConnectionInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Name: "office-a",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestCreateVPNConnectionBlockedInReadOnlyCluster(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "minority",
MutationMode: "read_only",
},
}
service := NewService(store)
_, err := service.CreateVPNConnection(context.Background(), CreateVPNConnectionInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
Name: "office-a",
})
if !errors.Is(err, ErrClusterReadOnly) {
t.Fatalf("err = %v, want ErrClusterReadOnly", err)
}
}
func TestAcquireVPNLeaseRequiresEnabledConnection(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredDisabled,
},
}
service := NewService(store)
_, err := service.AcquireVPNConnectionLease(context.Background(), AcquireVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
})
if err == nil || !strings.Contains(err.Error(), "enabled single_active") {
t.Fatalf("err = %v, want enabled single_active validation", err)
}
}
func TestAcquireVPNLeaseRejectsSecondActiveOwner(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
acquireVPNLeaseErr: ErrVPNLeaseAlreadyActive,
}
service := NewService(store)
_, err := service.AcquireVPNConnectionLease(context.Background(), AcquireVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-2",
})
if !errors.Is(err, ErrVPNLeaseAlreadyActive) {
t.Fatalf("err = %v, want ErrVPNLeaseAlreadyActive", err)
}
}
func TestAcquireVPNLeaseRejectsOwnerOutsideAllowedPolicy(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
ownerEligibility: VPNLeaseOwnerEligibility{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OwnerNodeID: "node-1",
MembershipStatus: "active",
NodeRegistrationStatus: NodeRegistrationActive,
AllowedByPolicy: false,
HasAuthorizedRole: true,
},
}
service := NewService(store)
_, err := service.AcquireVPNConnectionLease(context.Background(), AcquireVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
})
if !errors.Is(err, ErrVPNLeaseOwnerNotAllowed) {
t.Fatalf("err = %v, want ErrVPNLeaseOwnerNotAllowed", err)
}
}
func TestAcquireVPNLeaseRejectsOwnerWithoutVPNRole(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
ownerEligibility: VPNLeaseOwnerEligibility{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OwnerNodeID: "node-1",
MembershipStatus: "active",
NodeRegistrationStatus: NodeRegistrationActive,
AllowedByPolicy: true,
HasAuthorizedRole: false,
},
}
service := NewService(store)
_, err := service.AcquireVPNConnectionLease(context.Background(), AcquireVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
})
if !errors.Is(err, ErrVPNLeaseOwnerRoleRequired) {
t.Fatalf("err = %v, want ErrVPNLeaseOwnerRoleRequired", err)
}
}
func TestAcquireVPNLeaseRejectsWrongCluster(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
ownerEligibilityErr: pgx.ErrNoRows,
}
service := NewService(store)
_, err := service.AcquireVPNConnectionLease(context.Background(), AcquireVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-other",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
})
if !errors.Is(err, ErrInvalidVPNConnection) {
t.Fatalf("err = %v, want ErrInvalidVPNConnection", err)
}
}
func TestRenewVPNLeaseRejectsExpiredLease(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
ownerEligibility: VPNLeaseOwnerEligibility{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OwnerNodeID: "node-1",
MembershipStatus: "active",
NodeRegistrationStatus: NodeRegistrationActive,
AllowedByPolicy: true,
HasAuthorizedRole: true,
},
renewVPNLeaseErr: pgx.ErrNoRows,
}
service := NewService(store)
_, err := service.RenewVPNConnectionLease(context.Background(), RenewVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
LeaseID: "lease-1",
OwnerNodeID: "node-1",
FencingToken: "token-1",
})
if !errors.Is(err, ErrInvalidVPNLease) {
t.Fatalf("err = %v, want ErrInvalidVPNLease", err)
}
}
func TestFenceVPNLeaseRequiresRecoveryAdmin(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.FenceVPNConnectionLease(context.Background(), FenceVPNConnectionLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
LeaseID: "lease-1",
})
if !errors.Is(err, ErrAccessDenied) {
t.Fatalf("err = %v, want ErrAccessDenied", err)
}
}
func TestExpireStaleVPNConnectionLeasesAuditsEachExpiredLease(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
expiredVPNLeases: []VPNConnectionLease{
{ID: "lease-1", ClusterID: "cluster-1", VPNConnectionID: "vpn-1", Status: VPNLeaseStatusExpired},
{ID: "lease-2", ClusterID: "cluster-1", VPNConnectionID: "vpn-2", Status: VPNLeaseStatusExpired},
},
}
service := NewService(store)
items, err := service.ExpireStaleVPNConnectionLeases(context.Background(), ExpireStaleVPNConnectionLeasesInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
})
if err != nil {
t.Fatalf("expire stale vpn leases: %v", err)
}
if got, want := len(items), 2; got != want {
t.Fatalf("expired leases = %d, want %d", got, want)
}
var auditCount int
for _, event := range store.auditEvents {
if event.EventType == "vpn_connection.lease_expired" {
auditCount++
}
}
if got, want := auditCount, 2; got != want {
t.Fatalf("lease_expired audit count = %d, want %d", got, want)
}
}
func TestSetVPNConnectionAllowedNodesDeduplicatesScope(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
items, err := service.SetVPNConnectionAllowedNodes(context.Background(), SetVPNConnectionAllowedNodesInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
NodeIDs: []string{"node-1", "node-1", " ", "node-2"},
})
if err != nil {
t.Fatalf("set allowed nodes: %v", err)
}
if got, want := len(store.lastAllowedNodesInput.NodeIDs), 2; got != want {
t.Fatalf("deduped nodes = %d, want %d", got, want)
}
if got, want := len(items), 2; got != want {
t.Fatalf("allowed nodes returned = %d, want %d", got, want)
}
}
func TestUpsertVPNRoutePolicyRejectsInvalidType(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
_, err := service.UpsertVPNConnectionRoutePolicy(context.Background(), UpsertVPNConnectionRoutePolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
RouteType: "submarine",
Destination: "10.0.0.0/24",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestListNodeVPNAssignmentsDoesNotRequirePlatformAdmin(t *testing.T) {
store := &fakeRepository{
platformRole: "user",
nodeVPNAssignments: []NodeVPNAssignment{
{VPNConnectionID: "vpn-1", ClusterID: "cluster-1", OrganizationID: "org-1", AssignmentReason: "eligible_candidate"},
},
}
service := NewService(store)
items, err := service.ListNodeVPNAssignments(context.Background(), "cluster-1", "node-1")
if err != nil {
t.Fatalf("list node vpn assignments: %v", err)
}
if got, want := len(items), 1; got != want {
t.Fatalf("assignments = %d, want %d", got, want)
}
}
func TestAcquireNodeVPNAssignmentLeaseAllowsEligibleCandidateWithoutPlatformAdmin(t *testing.T) {
store := &fakeRepository{
platformRole: "user",
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
nodeVPNAssignments: []NodeVPNAssignment{
{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
DesiredState: VPNConnectionDesiredEnabled,
AssignmentReason: "eligible_candidate",
},
},
}
service := NewService(store)
lease, err := service.AcquireNodeVPNAssignmentLease(context.Background(), AcquireNodeVPNAssignmentLeaseInput{
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
TTL: time.Minute,
Metadata: json.RawMessage(`{"reason":"test"}`),
})
if err != nil {
t.Fatalf("acquire node vpn assignment lease: %v", err)
}
if lease.OwnerNodeID != "node-1" || lease.VPNConnectionID != "vpn-1" || lease.Status != VPNLeaseStatusActive {
t.Fatalf("unexpected lease: %+v", lease)
}
}
func TestAcquireNodeVPNAssignmentLeaseRejectsInvisibleAssignment(t *testing.T) {
store := &fakeRepository{
platformRole: "user",
vpnConnection: VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
},
nodeVPNAssignments: []NodeVPNAssignment{
{VPNConnectionID: "other-vpn", ClusterID: "cluster-1", AssignmentReason: "eligible_candidate"},
},
}
service := NewService(store)
_, err := service.AcquireNodeVPNAssignmentLease(context.Background(), AcquireNodeVPNAssignmentLeaseInput{
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
OwnerNodeID: "node-1",
TTL: time.Minute,
})
if !errors.Is(err, ErrVPNLeaseOwnerNotAllowed) {
t.Fatalf("err = %v, want ErrVPNLeaseOwnerNotAllowed", err)
}
}
func TestRenewNodeVPNAssignmentLeaseAllowsActiveOwnerWithoutPlatformAdmin(t *testing.T) {
store := &fakeRepository{
platformRole: "user",
nodeVPNAssignments: []NodeVPNAssignment{
{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
AssignmentReason: "active_owner",
ActiveLease: &NodeVPNAssignmentLease{
LeaseID: "lease-1",
OwnerNodeID: "node-1",
Status: VPNLeaseStatusActive,
},
},
},
}
service := NewService(store)
lease, err := service.RenewNodeVPNAssignmentLease(context.Background(), RenewNodeVPNAssignmentLeaseInput{
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
LeaseID: "lease-1",
OwnerNodeID: "node-1",
TTL: time.Minute,
})
if err != nil {
t.Fatalf("renew node vpn assignment lease: %v", err)
}
if lease.ID != "lease-1" {
t.Fatalf("lease.ID = %q, want lease-1", lease.ID)
}
}
func TestRenewNodeVPNAssignmentLeaseRejectsNonOwner(t *testing.T) {
store := &fakeRepository{
nodeVPNAssignments: []NodeVPNAssignment{
{VPNConnectionID: "vpn-1", ClusterID: "cluster-1", OrganizationID: "org-1", AssignmentReason: "eligible_candidate"},
},
}
service := NewService(store)
_, err := service.RenewNodeVPNAssignmentLease(context.Background(), RenewNodeVPNAssignmentLeaseInput{
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
LeaseID: "lease-1",
OwnerNodeID: "node-1",
})
if !errors.Is(err, ErrVPNLeaseOwnerNotAllowed) {
t.Fatalf("err = %v, want ErrVPNLeaseOwnerNotAllowed", err)
}
}
func TestReportNodeVPNAssignmentStatusRejectsInvisibleAssignment(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
_, err := service.ReportNodeVPNAssignmentStatus(context.Background(), ReportNodeVPNAssignmentStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
VPNConnectionID: "vpn-foreign",
ObservedStatus: VPNAssignmentStatusAssigned,
})
if !errors.Is(err, ErrVPNLeaseOwnerNotAllowed) {
t.Fatalf("err = %v, want ErrVPNLeaseOwnerNotAllowed", err)
}
}
func TestReportNodeVPNAssignmentStatusAcceptsExplicitStates(t *testing.T) {
store := &fakeRepository{
nodeVPNAssignments: []NodeVPNAssignment{
{VPNConnectionID: "vpn-1", ClusterID: "cluster-1", OrganizationID: "org-1"},
},
}
service := NewService(store)
status, err := service.ReportNodeVPNAssignmentStatus(context.Background(), ReportNodeVPNAssignmentStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
VPNConnectionID: "vpn-1",
ObservedStatus: VPNAssignmentStatusLeaseRequired,
StatusPayload: json.RawMessage(`{"reason":"no_lease"}`),
})
if err != nil {
t.Fatalf("report node vpn assignment status: %v", err)
}
if status.ObservedStatus != VPNAssignmentStatusLeaseRequired {
t.Fatalf("ObservedStatus = %q, want %q", status.ObservedStatus, VPNAssignmentStatusLeaseRequired)
}
}
func TestReportNodeVPNAssignmentStatusRejectsInvalidStatus(t *testing.T) {
store := &fakeRepository{
nodeVPNAssignments: []NodeVPNAssignment{
{VPNConnectionID: "vpn-1", ClusterID: "cluster-1", OrganizationID: "org-1"},
},
}
service := NewService(store)
_, err := service.ReportNodeVPNAssignmentStatus(context.Background(), ReportNodeVPNAssignmentStatusInput{
ClusterID: "cluster-1",
NodeID: "node-1",
VPNConnectionID: "vpn-1",
ObservedStatus: "running_tunnel",
})
if !errors.Is(err, ErrInvalidPayload) {
t.Fatalf("err = %v, want ErrInvalidPayload", err)
}
}
func TestIssueFabricServiceChannelLeaseSelectsAuthorizedRoute(t *testing.T) {
now := time.Date(2026, 5, 7, 12, 0, 0, 0, time.UTC)
service := NewService(&fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-usa-home",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"usa-los-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"home-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 20,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["usa-los-1", "relay-1", "home-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"route_version": "rv-1",
"policy_version": "pv-1"
}`),
UpdatedAt: now,
},
{
ID: "route-home-home",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"home-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"home-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 5,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["home-1", "home-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
})
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"home-1", "usa-los-1"},
ExitNodeIDs: []string{"home-1", "ifcm-1"},
PreferredEntryNodeID: "usa-los-1",
PreferredExitNodeID: "home-1",
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.Status != FabricServiceChannelStatusReady {
t.Fatalf("lease.Status = %q, want ready", lease.Status)
}
if lease.SelectedEntryNodeID != "usa-los-1" || lease.SelectedExitNodeID != "home-1" {
t.Fatalf("selected nodes = %s -> %s", lease.SelectedEntryNodeID, lease.SelectedExitNodeID)
}
if lease.PrimaryRoute.RouteID != "route-usa-home" {
t.Fatalf("primary route = %q, want route-usa-home", lease.PrimaryRoute.RouteID)
}
if lease.RecoveryPolicy == nil || lease.RecoveryPolicy.HysteresisPenalty != fabricServiceChannelRecoveryHysteresisPenalty {
t.Fatalf("lease recovery policy provenance = %+v", lease.RecoveryPolicy)
}
if lease.PrimaryRoute.RecoveryPolicy == nil || lease.PrimaryRoute.RecoveryPolicy.PromotionMinSamples != fabricServiceChannelRecoveryPromotionMinSamples {
t.Fatalf("primary route recovery policy provenance = %+v", lease.PrimaryRoute.RecoveryPolicy)
}
if lease.Fallback.Active || lease.Fallback.Degraded {
t.Fatalf("fallback should be available but inactive: %+v", lease.Fallback)
}
if !containsString(lease.AllowedChannels, "vpn_packet") || !containsString(lease.RequiredRoles, "vpn-exit") {
t.Fatalf("unexpected channel/role defaults: channels=%v roles=%v", lease.AllowedChannels, lease.RequiredRoles)
}
if lease.Token.Token == "" || lease.Token.TTLSeconds != 90 {
t.Fatalf("unexpected token contract: %+v", lease.Token)
}
if lease.EntryHTTP.Type != "fabric_quic_only" || lease.EntryHTTP.PathTemplate != "" || lease.EntryHTTP.WebSocketPathTemplate != "" || len(lease.EntryHTTP.SupportedMethods) != 0 {
t.Fatalf("entry http contract must be disabled for QUIC-only fabric: %+v", lease.EntryHTTP)
}
if lease.DataPlane.SchemaVersion != "rap.fabric_service_channel_data_plane.v1" ||
lease.DataPlane.Mode != "fabric_quic_only" ||
lease.DataPlane.ControlPlaneTransport != "fabric_control_quic" ||
lease.DataPlane.WorkingDataTransport != "fabric_quic_route" ||
lease.DataPlane.SteadyStateTransport != "fabric_route" ||
lease.DataPlane.BackendRelayPolicy != "disabled" ||
!lease.DataPlane.ProductionForwardingRequired ||
!lease.DataPlane.ServiceNeutral ||
!lease.DataPlane.ProtocolAgnostic ||
lease.DataPlane.LogicalFlowMode != "multi_flow_isolated" ||
!containsString(lease.DataPlane.RequiredFlowIsolationClasses, "vpn_packet") {
t.Fatalf("unexpected data-plane contract: %+v", lease.DataPlane)
}
if lease.AuthoritySignature == nil || len(lease.AuthorityPayload) == 0 {
t.Fatalf("lease must be signed: payload=%s signature=%+v", string(lease.AuthorityPayload), lease.AuthoritySignature)
}
var signedPayload FabricServiceChannelLeaseAuthorityPayload
if err := json.Unmarshal(lease.AuthorityPayload, &signedPayload); err != nil {
t.Fatalf("unmarshal signed payload: %v", err)
}
if signedPayload.TokenHash != fabricServiceChannelTokenHash(lease.Token.Token) || signedPayload.ChannelID != lease.ChannelID {
t.Fatalf("signed payload does not bind token/channel: %+v", signedPayload)
}
if signedPayload.RecoveryPolicy == nil || signedPayload.RecoveryPolicy.Source != "defaults" {
t.Fatalf("signed payload recovery policy provenance = %+v", signedPayload.RecoveryPolicy)
}
if signedPayload.DataPlane.SchemaVersion != lease.DataPlane.SchemaVersion ||
signedPayload.DataPlane.WorkingDataTransport != "fabric_quic_route" ||
signedPayload.DataPlane.BackendRelayPolicy != "disabled" {
t.Fatalf("signed payload data-plane contract = %+v", signedPayload.DataPlane)
}
store := service.store.(*fakeRepository)
if err := clusterauth.VerifyRaw(store.clusterAuthority.PublicKey, lease.AuthorityPayload, *lease.AuthoritySignature); err != nil {
t.Fatalf("verify lease authority: %v", err)
}
}
func TestFabricServiceChannelLeaseIntrospectionAllowsFreshToken(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
service.now = func() time.Time { return time.Date(2026, 5, 8, 14, 0, 0, 0, time.UTC) }
store.routeIntents = []MeshRouteIntent{{
ID: "route-usa-home",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassVPNPackets,
Status: "active",
Policy: json.RawMessage(`{
"schema_version":"rap.synthetic_route_policy.v1",
"source_node_id":"usa-1",
"destination_node_id":"home-1",
"hops":["usa-1","home-1"],
"allowed_channels":["vpn_packet","fabric_control"],
"synthetic_enabled":true
}`),
CreatedAt: service.now(),
UpdatedAt: service.now(),
}}
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
UserID: "user-1",
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"usa-1"},
ExitNodeIDs: []string{"home-1"},
AllowedChannels: []string{
"vpn_packet",
FabricChannelControl,
},
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
result, err := service.IntrospectFabricServiceChannelLease(context.Background(), IntrospectFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
ChannelID: lease.ChannelID,
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
ChannelClass: "vpn_packet",
Token: lease.Token.Token,
EntryNodeID: "usa-1",
})
if err != nil {
t.Fatalf("introspect lease: %v", err)
}
if !result.Allowed || result.AcceptedBy != "introspection" || result.PreferredRouteID != "route-usa-home" || result.ForceCompatFallback {
t.Fatalf("unexpected introspection result: %+v", result)
}
if result.DataPlane.SchemaVersion != "rap.fabric_service_channel_data_plane.v1" ||
result.DataPlane.WorkingDataTransport != "fabric_quic_route" ||
result.DataPlane.SteadyStateTransport != "fabric_route" ||
result.DataPlane.BackendRelayPolicy != "disabled" {
t.Fatalf("unexpected introspection data-plane contract: %+v", result.DataPlane)
}
}
func TestFabricServiceChannelLeaseIntrospectionRejectsWrongToken(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
service.now = func() time.Time { return time.Date(2026, 5, 8, 14, 0, 0, 0, time.UTC) }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
UserID: "user-1",
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"usa-1"},
ExitNodeIDs: []string{"home-1"},
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
result, err := service.IntrospectFabricServiceChannelLease(context.Background(), IntrospectFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
ChannelID: lease.ChannelID,
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
ChannelClass: "vpn_packet",
Token: "rap_fsc_wrong",
EntryNodeID: "usa-1",
})
if err != nil {
t.Fatalf("introspect lease: %v", err)
}
if result.Allowed || result.Reason != "lease_token_mismatch" {
t.Fatalf("unexpected introspection result: %+v", result)
}
}
func TestFabricServiceChannelLeaseIntrospectionSurvivesServiceRestart(t *testing.T) {
store := &fakeRepository{}
now := time.Date(2026, 5, 8, 14, 30, 0, 0, time.UTC)
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
UserID: "user-1",
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"usa-1"},
ExitNodeIDs: []string{"home-1"},
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
restarted := NewService(store)
restarted.now = func() time.Time { return now.Add(5 * time.Second) }
result, err := restarted.IntrospectFabricServiceChannelLease(context.Background(), IntrospectFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
ChannelID: lease.ChannelID,
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
ChannelClass: "vpn_packet",
Token: lease.Token.Token,
EntryNodeID: "usa-1",
})
if err != nil {
t.Fatalf("introspect lease after restart: %v", err)
}
if !result.Allowed || result.Reason != "lease_introspection_allowed" {
t.Fatalf("unexpected introspection result: %+v", result)
}
if stored := store.fabricLeases[fabricServiceChannelLeaseCacheKey("cluster-1", lease.ChannelID)]; stored.Lease.Token.Token != "" {
t.Fatalf("stored durable lease must not include raw bearer token: %+v", stored.Lease.Token)
}
}
func TestFabricServiceChannelLeaseMaintenanceListsAndCleansExpired(t *testing.T) {
store := &fakeRepository{platformRole: PlatformRoleAdmin}
now := time.Date(2026, 5, 8, 15, 0, 0, 0, time.UTC)
activeLease := FabricServiceChannelLease{
ChannelID: "channel-active",
ClusterID: "cluster-1",
ResourceID: "vpn-active",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
AllowedChannels: []string{"vpn_packet"},
PrimaryRoute: FabricServiceChannelRoute{RouteID: "route-active", Status: "ready"},
Token: FabricServiceChannelToken{Token: "rap_fsc_active"},
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Minute),
}
expiredLease := activeLease
expiredLease.ChannelID = "channel-expired"
expiredLease.ResourceID = "vpn-expired"
expiredLease.Token = FabricServiceChannelToken{Token: "rap_fsc_expired"}
expiredLease.ExpiresAt = now.Add(-time.Second)
if _, err := store.StoreFabricServiceChannelLease(context.Background(), StoreFabricServiceChannelLeaseInput{Lease: activeLease, TokenHash: fabricServiceChannelTokenHash(activeLease.Token.Token)}); err != nil {
t.Fatalf("store active lease: %v", err)
}
if _, err := store.StoreFabricServiceChannelLease(context.Background(), StoreFabricServiceChannelLeaseInput{Lease: expiredLease, TokenHash: fabricServiceChannelTokenHash(expiredLease.Token.Token)}); err != nil {
t.Fatalf("store expired lease: %v", err)
}
service := NewService(store)
service.now = func() time.Time { return now }
health, err := service.ListFabricServiceChannelLeases(context.Background(), "admin-1", ListFabricServiceChannelLeasesInput{
ClusterID: "cluster-1",
IncludeExpired: true,
Limit: 10,
})
if err != nil {
t.Fatalf("list leases: %v", err)
}
if health.ActiveCount != 1 || health.ExpiredCount != 1 || health.Status != "degraded" {
t.Fatalf("unexpected lease maintenance health: %+v", health)
}
cleanup, err := service.CleanupFabricServiceChannelLeases(context.Background(), CleanupFabricServiceChannelLeasesInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("cleanup leases: %v", err)
}
if cleanup.DeletedExpiredCount != 1 || cleanup.ExpiredCount != 0 || cleanup.ActiveCount != 1 || cleanup.Status != "ready" {
t.Fatalf("unexpected cleanup result: %+v", cleanup)
}
}
func TestFabricServiceChannelAccessTelemetryAggregatesNodeReports(t *testing.T) {
now := time.Date(2026, 5, 8, 15, 20, 0, 0, time.UTC)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-1", Name: "entry-1"},
{ID: "node-2", Name: "entry-2"},
},
heartbeats: map[string][]NodeHeartbeat{
"node-1": {
{
ClusterID: "cluster-1",
NodeID: "node-1",
ObservedAt: now.Add(-2 * time.Second),
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18z64.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"traffic_class_counts": {"bulk": 32, "interactive": 12},
"recommended_parallel_windows": {"bulk": 1, "interactive": 4, "control": 4, "reliable": 3, "droppable": 1},
"adaptive_backpressure_active": true,
"adaptive_backpressure_reason": "bulk_window_reduced_to_protect_interactive",
"channel_count": 44,
"dropped": 0,
"high_watermark": 25,
"max_in_flight": 4,
"channel_stats": {}
}
}
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := store.RecordNodeTelemetry(context.Background(), RecordNodeTelemetryInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Payload: json.RawMessage(`{
"fabric_service_channel_access_report": {
"schema_version": "c18z52.fabric_service_channel_access_report.v1",
"total": 7,
"signed": 3,
"introspection": 4,
"standard_unsigned": 0,
"degraded_route_count": 0,
"data_plane_contract": 5,
"last_data_plane_mode": "fabric_quic_only",
"last_working_data_transport": "fabric_quic_route",
"last_steady_state_transport": "fabric_route",
"last_degraded_route_policy": "disabled",
"last_logical_flow_mode": "multi_flow_isolated",
"last_accepted_at": "2026-05-08T15:19:59Z"
}
}`),
ObservedAt: now,
})
if err != nil {
t.Fatalf("record telemetry: %v", err)
}
expiresAt := now.Add(5 * time.Minute)
store.fabricLeases = map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-1"): {
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "node-1",
ExpiresAt: expiresAt,
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "node-1",
SelectedExitNodeID: "node-2",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-1",
Status: "ready",
},
ExpiresAt: expiresAt,
},
},
}
_, err = store.RecordFabricServiceChannelRouteFeedback(context.Background(), RecordFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
ReporterNodeID: "node-1",
RouteID: "route-1",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 15,
LastSendDurationMs: 42,
Payload: json.RawMessage(`{"quality_window_sample_count":5,"quality_window_failure_count":0,"quality_window_drop_count":0,"quality_window_slow_count":1}`),
ObservedAt: now,
ExpiresAt: expiresAt,
})
if err != nil {
t.Fatalf("record route feedback: %v", err)
}
report, err := service.GetFabricServiceChannelAccessTelemetry(context.Background(), "admin-1", GetFabricServiceChannelAccessTelemetryInput{
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("get access telemetry: %v", err)
}
if report.ReportingNodeCount != 1 || report.TotalAccepted != 7 || report.SignedAccepted != 3 || report.IntrospectionAccepted != 4 || report.CompatFallbackCount != 0 {
t.Fatalf("unexpected access telemetry: %+v", report)
}
if report.DataPlaneContractCount != 5 || report.LastDataPlaneMode != "fabric_quic_only" || report.LastWorkingDataTransport != "fabric_quic_route" || report.LastSteadyStateTransport != "fabric_route" || report.LastCompatRelayPolicy != "disabled" || report.LastLogicalFlowMode != "multi_flow_isolated" {
t.Fatalf("unexpected aggregate data-plane telemetry: %+v", report)
}
if report.Nodes[0].DataPlaneContractCount != 5 || report.Nodes[0].LastWorkingDataTransport != "fabric_quic_route" || report.Nodes[0].LastCompatRelayPolicy != "disabled" || report.Nodes[0].LastLogicalFlowMode != "multi_flow_isolated" {
t.Fatalf("unexpected node data-plane telemetry: %+v", report.Nodes[0])
}
if got := report.Nodes[0].TrafficClassCounts["bulk"]; got != 32 {
t.Fatalf("bulk traffic class count = %d, want 32: %+v", got, report.Nodes[0])
}
if report.TrafficClassCounts["bulk"] != 32 || report.TrafficClassCounts["interactive"] != 12 || report.FlowChannelCount != 44 || report.FlowMaxInFlight != 4 {
t.Fatalf("unexpected aggregate flow telemetry: %+v", report)
}
if report.FlowHealthStatus != "degraded" || report.FlowHealthReason != "route_quality_window_slow_samples_reported" {
t.Fatalf("unexpected aggregate flow health: %+v", report)
}
if !report.AdaptiveBackpressureActive || report.AdaptiveBackpressureReason != "bulk_window_reduced_to_protect_interactive" || report.RecommendedParallelWindows["bulk"] != 1 || report.RecommendedParallelWindows["interactive"] != 4 {
t.Fatalf("unexpected aggregate adaptive backpressure: %+v", report)
}
if report.Nodes[0].FlowChannelCount != 44 || report.Nodes[0].FlowHighWatermark != 25 || report.Nodes[0].FlowMaxInFlight != 4 {
t.Fatalf("unexpected flow telemetry on node: %+v", report.Nodes[0])
}
if report.Nodes[0].FlowHealthStatus != "watch" || report.Nodes[0].FlowHealthReason != "bulk_pressure_with_interactive_qos_observed" {
t.Fatalf("unexpected node flow health: %+v", report.Nodes[0])
}
if !report.Nodes[0].AdaptiveBackpressureActive || report.Nodes[0].RecommendedParallelWindows["control"] != 4 || report.Nodes[0].RecommendedParallelWindows["droppable"] != 1 {
t.Fatalf("unexpected node adaptive backpressure: %+v", report.Nodes[0])
}
if report.ActiveChannelCount != 1 || report.CorrelatedRouteCount != 1 || report.DegradedRouteCount != 0 {
t.Fatalf("unexpected channel correlation counters: %+v", report)
}
if len(report.ActiveChannels) != 1 {
t.Fatalf("expected one active channel, got %d", len(report.ActiveChannels))
}
channel := report.ActiveChannels[0]
if channel.ChannelID != "channel-1" || channel.EntryNodeTotalAccepted != 7 || channel.RouteFeedbackStatus != "healthy" || channel.RouteQualityWindowSampleCount != 5 || channel.LastSendDurationMs != 42 {
t.Fatalf("unexpected active channel correlation: %+v", channel)
}
if channel.EntryNodeDataPlaneContractCount != 5 || channel.EntryNodeLastDataPlaneMode != "fabric_quic_only" || channel.EntryNodeLastWorkingDataTransport != "fabric_quic_route" || channel.EntryNodeLastSteadyStateTransport != "fabric_route" || channel.EntryNodeLastCompatRelayPolicy != "disabled" || channel.EntryNodeLastLogicalFlowMode != "multi_flow_isolated" {
t.Fatalf("unexpected active channel data-plane telemetry: %+v", channel)
}
if channel.EntryNodeTrafficClassCounts["interactive"] != 12 || channel.EntryNodeFlowChannelCount != 44 || channel.EntryNodeFlowMaxInFlight != 4 {
t.Fatalf("unexpected active channel flow telemetry: %+v", channel)
}
if channel.EntryNodeFlowHealthStatus != "degraded" || channel.EntryNodeFlowHealthReason != "route_quality_window_slow_samples_reported" {
t.Fatalf("unexpected channel flow health: %+v", channel)
}
if !channel.EntryNodeAdaptiveBackpressureActive || channel.EntryNodeAdaptiveBackpressureReason != "bulk_window_reduced_to_protect_interactive" || channel.EntryNodeRecommendedParallelWindows["bulk"] != 1 {
t.Fatalf("unexpected channel adaptive backpressure: %+v", channel)
}
if channel.RemediationAction != "none" {
t.Fatalf("healthy route should not need remediation: %+v", channel)
}
incidents, err := service.ListFabricServiceChannelRouteRebuildIncidents(context.Background(), "admin-1", ListFabricServiceChannelRouteRebuildIncidentsInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("list rebuild incidents: %v", err)
}
if len(incidents) != 0 {
t.Fatalf("degraded backend relay incidents must not be projected in QUIC-only fabric: %+v", incidents)
}
}
func TestFabricServiceChannelFlowHealthPolicyClassifiesPressure(t *testing.T) {
status, reason, action := fabricServiceChannelFlowHealth(map[string]int{"bulk": 32, "interactive": 12}, 0, 25, 4, 0, 132, 0, 0, 0)
if status != "watch" || reason != "bulk_pressure_with_interactive_qos_observed" || action == "" {
t.Fatalf("unexpected healthy pressure classification: status=%q reason=%q action=%q", status, reason, action)
}
status, reason, _ = fabricServiceChannelFlowHealth(map[string]int{"bulk": 32}, 1, 25, 4, 0, 0, 0, 0, 0)
if status != "critical" || reason != "flow_drops_reported" {
t.Fatalf("unexpected drop classification: status=%q reason=%q", status, reason)
}
status, reason, _ = fabricServiceChannelFlowHealth(map[string]int{"bulk": 2}, 0, 4, 1, 0, 1500, 0, 0, 0)
if status != "degraded" || reason != "route_send_latency_high" {
t.Fatalf("unexpected latency classification: status=%q reason=%q", status, reason)
}
}
func TestFabricServiceChannelAccessTelemetryRecommendsAlternateForDegradedRoute(t *testing.T) {
now := time.Date(2026, 5, 8, 15, 55, 0, 0, time.UTC)
expiresAt := now.Add(5 * time.Minute)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "node-1", Name: "entry-1"},
{ID: "node-2", Name: "exit-1"},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-1"): {
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "node-1",
ExpiresAt: expiresAt,
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "node-1",
SelectedExitNodeID: "node-2",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
Status: "authorized",
},
AlternateRoutes: []FabricServiceChannelRoute{{
RouteID: "route-alt",
Status: "authorized",
}},
ExpiresAt: expiresAt,
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := store.RecordNodeTelemetry(context.Background(), RecordNodeTelemetryInput{
ClusterID: "cluster-1",
NodeID: "node-1",
Payload: json.RawMessage(`{
"fabric_service_channel_access_report": {
"total": 4,
"introspection": 4,
"degraded_route_count": 0
}
}`),
ObservedAt: now,
})
if err != nil {
t.Fatalf("record telemetry: %v", err)
}
_, err = store.RecordFabricServiceChannelRouteFeedback(context.Background(), RecordFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
ReporterNodeID: "node-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
LastSendDurationMs: 1200,
Payload: json.RawMessage(`{"quality_window_sample_count":7,"quality_window_failure_count":3,"quality_window_drop_count":1}`),
ObservedAt: now,
ExpiresAt: expiresAt,
})
if err != nil {
t.Fatalf("record route feedback: %v", err)
}
_, err = store.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"ingress": {
"route_manager": {
"last_applied_at": "2026-05-08T15:55:01Z",
"decisions": [{
"route_id": "route-bad",
"replacement_route_id": "route-alt",
"rebuild_request_id": "fsc-remediation:channel-1:prefer_alternate_route:route-alt",
"rebuild_status": "applied",
"rebuild_reason": "authorized_alternate_route_available",
"decision_source": "service_channel_remediation_command",
"generation": "config-c18z74"
}]
},
"route_manager_transition": {
"status": "applied_rebuild",
"generation": "config-c18z74",
"observed_at": "2026-05-08T15:55:01Z"
}
}
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
report, err := service.GetFabricServiceChannelAccessTelemetry(context.Background(), "admin-1", GetFabricServiceChannelAccessTelemetryInput{
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("get access telemetry: %v", err)
}
if report.DegradedRouteCount != 1 || report.Status != "degraded" {
t.Fatalf("expected degraded route aggregate: %+v", report)
}
if len(report.ActiveChannels) != 1 {
t.Fatalf("expected one active channel, got %d", len(report.ActiveChannels))
}
channel := report.ActiveChannels[0]
if channel.RemediationAction != "prefer_alternate_route" || channel.RemediationRouteID != "route-alt" {
t.Fatalf("expected alternate remediation, got %+v", channel)
}
if channel.RemediationCommand == nil {
t.Fatalf("expected bounded remediation command, got %+v", channel)
}
if channel.RemediationCommand.Action != "prefer_alternate_route" ||
channel.RemediationCommand.ReplacementRouteID != "route-alt" ||
channel.RemediationCommand.PrimaryRouteID != "route-bad" ||
channel.RemediationCommand.ClusterID != "cluster-1" {
t.Fatalf("unexpected remediation command: %+v", channel.RemediationCommand)
}
if channel.RemediationExecutionStatus != "applied" ||
channel.RemediationExecutionReason != "authorized_alternate_route_available" ||
channel.RemediationExecutionGeneration != "config-c18z74" ||
channel.RemediationCommand.ExecutionStatus != "applied" {
t.Fatalf("unexpected remediation execution: channel=%+v command=%+v", channel, channel.RemediationCommand)
}
if !channel.RemediationCommand.IssuedAt.Equal(now) || channel.RemediationCommand.ExpiresAt.After(expiresAt) || !channel.RemediationCommand.ExpiresAt.After(now) {
t.Fatalf("unexpected remediation command ttl: %+v", channel.RemediationCommand)
}
}
func TestFabricServiceChannelAccessTelemetryRejectsAlternateOutsideSignedPoolPolicy(t *testing.T) {
now := time.Date(2026, 5, 8, 16, 15, 0, 0, time.UTC)
expiresAt := now.Add(5 * time.Minute)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "entry-1", Name: "entry-1"},
{ID: "exit-1", Name: "exit-1"},
{ID: "exit-2", Name: "exit-2"},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-guard"): {
ClusterID: "cluster-1",
ChannelID: "channel-guard",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: expiresAt,
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-guard",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
EntryPool: []FabricServiceChannelNodeCandidate{{
NodeID: "entry-1",
Status: "selected",
}},
ExitPool: []FabricServiceChannelNodeCandidate{{
NodeID: "exit-1",
Status: "selected",
}},
PoolPolicy: &FabricServiceChannelPoolPolicy{
SchemaVersion: "rap.fabric_service_channel_pool_policy.v1",
Fingerprint: "pool-fingerprint-1",
EntryPoolNodeIDs: []string{"entry-1"},
ExitPoolNodeIDs: []string{"exit-1"},
SelectionStrategy: "fastest_healthy",
RouteRebuild: "automatic",
EntryFailover: "automatic",
ExitFailover: "automatic",
CompatFallbackAllowed: true,
StickySession: true,
Source: "cluster_metadata",
},
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassVPNPackets,
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
Status: "authorized",
},
AlternateRoutes: []FabricServiceChannelRoute{{
RouteID: "route-outside-exit",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassVPNPackets,
SourceNodeID: "entry-1",
DestinationNodeID: "exit-2",
Status: "authorized",
}},
ExpiresAt: expiresAt,
},
},
},
fabricRebuildAttempts: []FabricServiceChannelRouteRebuildAttempt{{
ID: "fsc-rebuild-guard-1",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
ServiceClass: FabricServiceClassVPNPackets,
RouteID: "route-bad",
ReplacementRouteID: "route-outside-exit",
RebuildRequestID: "fsc-remediation:channel-guard:rebuild_route:route-outside-exit",
RebuildStatus: "rejected",
RebuildReason: "replacement_exit_outside_signed_pool_policy",
DecisionSource: "service_channel_remediation_command",
Outcome: "policy_guard_rejected",
PolicyFingerprint: "pool-fingerprint-1",
CreatedAt: now,
UpdatedAt: now,
}},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := store.RecordFabricServiceChannelRouteFeedback(context.Background(), RecordFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
LastSendDurationMs: 1200,
ObservedAt: now,
ExpiresAt: expiresAt,
})
if err != nil {
t.Fatalf("record route feedback: %v", err)
}
report, err := service.GetFabricServiceChannelAccessTelemetry(context.Background(), "admin-1", GetFabricServiceChannelAccessTelemetryInput{
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("get access telemetry: %v", err)
}
if len(report.ActiveChannels) != 1 {
t.Fatalf("expected one active channel, got %d", len(report.ActiveChannels))
}
channel := report.ActiveChannels[0]
if channel.RemediationAction != "rebuild_route" ||
channel.RemediationReason != "alternate_route_rejected_by_pool_policy" ||
channel.RemediationRouteID != "route-outside-exit" ||
channel.RemediationGuardStatus != "rejected" ||
channel.RemediationGuardReason != "replacement_exit_outside_signed_pool_policy" ||
channel.PoolPolicyFingerprint != "pool-fingerprint-1" {
t.Fatalf("expected guarded rebuild remediation, got %+v", channel)
}
if channel.RemediationCommand == nil {
t.Fatalf("expected guarded remediation command, got %+v", channel)
}
if channel.RemediationCommand.Action != "rebuild_route" ||
channel.RemediationCommand.GuardStatus != "rejected" ||
channel.RemediationCommand.GuardReason != "replacement_exit_outside_signed_pool_policy" ||
channel.RemediationCommand.PoolPolicyFingerprint != "pool-fingerprint-1" ||
channel.RemediationCommand.ExecutionStatus != "rebuild_request_rejected" {
t.Fatalf("unexpected guarded remediation command: %+v", channel.RemediationCommand)
}
}
func TestFabricServiceChannelAccessTelemetryShowsRebuildRouteNodePending(t *testing.T) {
now := time.Date(2026, 5, 8, 16, 50, 0, 0, time.UTC)
expiresAt := now.Add(5 * time.Minute)
commandID := "fsc-remediation:channel-pending:rebuild_route:route-bad"
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "entry-1", Name: "entry-1"},
{ID: "exit-1", Name: "exit-1"},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-pending"): {
ClusterID: "cluster-1",
ChannelID: "channel-pending",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: expiresAt,
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-pending",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassVPNPackets,
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
Status: "authorized",
},
ExpiresAt: expiresAt,
},
},
},
fabricRebuildAttempts: []FabricServiceChannelRouteRebuildAttempt{{
ID: "fsc-rebuild-pending-1",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
ServiceClass: FabricServiceClassVPNPackets,
RouteID: "route-bad",
RebuildRequestID: commandID,
RebuildStatus: "requested",
RebuildReason: "route_feedback_recommends_rebuild",
DecisionSource: "service_channel_remediation_command",
Outcome: "rebuild_requested",
Generation: commandID,
CreatedAt: now,
UpdatedAt: now,
}},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := store.RecordFabricServiceChannelRouteFeedback(context.Background(), RecordFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ObservedAt: now,
ExpiresAt: expiresAt,
})
if err != nil {
t.Fatalf("record route feedback: %v", err)
}
_, err = store.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"ingress": {
"route_manager": {
"last_applied_at": "2026-05-08T16:50:01Z",
"decisions": [{
"route_id": "route-bad",
"rebuild_request_id": "fsc-remediation:channel-pending:rebuild_route:route-bad",
"rebuild_status": "pending_degraded_fallback",
"rebuild_reason": "route_feedback_recommends_rebuild",
"decision_source": "service_channel_remediation_command",
"generation": "fsc-remediation:channel-pending:rebuild_route:route-bad"
}]
},
"route_manager_transition": {
"status": "pending_degraded_fallback",
"generation": "fsc-remediation:channel-pending:rebuild_route:route-bad",
"observed_at": "2026-05-08T16:50:01Z"
}
}
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
report, err := service.GetFabricServiceChannelAccessTelemetry(context.Background(), "admin-1", GetFabricServiceChannelAccessTelemetryInput{
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("get access telemetry: %v", err)
}
if len(report.ActiveChannels) != 1 {
t.Fatalf("expected one active channel, got %d", len(report.ActiveChannels))
}
channel := report.ActiveChannels[0]
if channel.RemediationAction != "rebuild_route" ||
channel.RemediationExecutionStatus != "rebuild_request_recorded_node_pending" ||
channel.RemediationExecutionGeneration != commandID ||
channel.RouteDecisionSource != "service_channel_remediation_command" ||
channel.RouteDecisionRebuildStatus != "pending_degraded_fallback" ||
channel.RemediationCommand == nil ||
channel.RemediationCommand.ExecutionStatus != "rebuild_request_recorded_node_pending" {
t.Fatalf("unexpected rebuild route execution: channel=%+v command=%+v", channel, channel.RemediationCommand)
}
}
func TestFabricServiceChannelAccessTelemetryProjectsNoSafeRecoveryDecision(t *testing.T) {
now := time.Date(2026, 5, 9, 3, 10, 0, 0, time.UTC)
expiresAt := now.Add(5 * time.Minute)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
clusterNodes: []ClusterNode{
{ID: "entry-1", Name: "entry-1"},
{ID: "exit-1", Name: "exit-1"},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-no-safe"): {
ClusterID: "cluster-1",
ChannelID: "channel-no-safe",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: expiresAt,
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-no-safe",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-primary",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassVPNPackets,
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
Status: "authorized",
},
ExpiresAt: expiresAt,
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := store.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"ingress": {
"route_manager": {
"last_applied_at": "2026-05-09T03:10:01Z",
"decisions": [{
"route_id": "route-replacement",
"source_node_id": "entry-1",
"destination_node_id": "exit-1",
"local_node_id": "entry-1",
"decision_source": "service_channel_feedback_no_alternate",
"rebuild_status": "pending_degraded_fallback",
"rebuild_reason": "service_channel_feedback_rebuild_requested",
"generation": "c18z82-generation",
"score_reasons": [
"service_channel_fenced_route",
"no_unfenced_alternate_route",
"backend_relay_degraded_fallback_until_rebuild"
]
}]
},
"route_manager_transition": {
"status": "pending_degraded_fallback",
"generation": "c18z82-generation",
"observed_at": "2026-05-09T03:10:01Z"
}
}
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
report, err := service.GetFabricServiceChannelAccessTelemetry(context.Background(), "admin-1", GetFabricServiceChannelAccessTelemetryInput{
ClusterID: "cluster-1",
Limit: 10,
Now: now,
})
if err != nil {
t.Fatalf("get access telemetry: %v", err)
}
if len(report.ActiveChannels) != 1 {
t.Fatalf("expected one active channel, got %d", len(report.ActiveChannels))
}
channel := report.ActiveChannels[0]
if channel.RouteDecisionSource != "service_channel_feedback_no_alternate" ||
channel.RouteDecisionRouteID != "route-replacement" ||
channel.RouteDecisionRebuildStatus != "pending_degraded_fallback" ||
!containsString(channel.RouteDecisionScoreReasons, "no_unfenced_alternate_route") ||
channel.RemediationAction != "use_degraded_route" ||
channel.RemediationExecutionStatus != "route_rebuild_no_safe_recovery" {
t.Fatalf("unexpected no-safe route decision projection: %+v", channel)
}
if report.RouteDecisionChannelCount != 1 ||
report.NoSafeRecoveryDecisionCount != 1 ||
report.ReplacementDecisionCount != 0 ||
report.AppliedRebuildDecisionCount != 0 ||
report.Status != "degraded" ||
report.Reason != "active_channels_no_safe_recovery" {
t.Fatalf("unexpected no-safe route decision aggregate: %+v", report)
}
health, err := service.GetFabricServiceChannelRouteRebuildHealthSummary(context.Background(), "admin-1", GetFabricServiceChannelRouteRebuildHealthSummaryInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("get rebuild health: %v", err)
}
if health.AccessRouteDecisionCount != 1 ||
health.AccessNoSafeCount != 1 ||
health.ActiveBadCount != 1 ||
health.RecommendedOperatorAction != "inspect_access_no_safe_recovery_route_pool_and_signed_policy" {
t.Fatalf("unexpected rebuild health access decision projection: %+v", health)
}
incidents, err := service.ListFabricServiceChannelRouteRebuildIncidents(context.Background(), "admin-1", ListFabricServiceChannelRouteRebuildIncidentsInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("list rebuild incidents: %v", err)
}
if len(incidents) == 0 ||
incidents[0].IncidentSource != "access_decision" ||
incidents[0].ChannelID != "channel-no-safe" ||
incidents[0].GuardStatus != "access_no_safe_recovery" ||
incidents[0].GuardSeverity != "bad" {
t.Fatalf("unexpected access decision incident projection: %+v", incidents)
}
silence, err := service.SilenceFabricServiceChannelRouteRebuildAlert(context.Background(), SilenceFabricServiceChannelRouteRebuildAlertInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
IncidentSource: "access_decision",
ChannelID: incidents[0].ChannelID,
ReporterNodeID: incidents[0].ReporterNodeID,
RouteID: incidents[0].RouteID,
GuardStatus: incidents[0].GuardStatus,
Generation: incidents[0].Generation,
Reason: "operator acknowledged access no-safe",
TTL: 6 * time.Hour,
Now: now,
})
if err != nil {
t.Fatalf("silence access decision incident: %v", err)
}
health, err = service.GetFabricServiceChannelRouteRebuildHealthSummary(context.Background(), "admin-1", GetFabricServiceChannelRouteRebuildHealthSummaryInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("get silenced rebuild health: %v", err)
}
if health.AccessNoSafeCount != 1 || health.ActiveBadCount != 0 || health.SilencedCount != 1 {
t.Fatalf("unexpected silenced access decision health: %+v", health)
}
incidents, err = service.ListFabricServiceChannelRouteRebuildIncidents(context.Background(), "admin-1", ListFabricServiceChannelRouteRebuildIncidentsInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("list silenced rebuild incidents: %v", err)
}
if len(incidents) == 0 || !incidents[0].AlertSilenced {
t.Fatalf("expected silenced access decision incident: %+v", incidents)
}
silences, err := service.ListFabricServiceChannelRouteRebuildAlertSilences(context.Background(), "admin-1", "cluster-1", now)
if err != nil {
t.Fatalf("list rebuild alert silences: %v", err)
}
if len(silences) != 1 ||
silences[0].ID != silence.ID ||
silences[0].IncidentSource != "access_decision" ||
silences[0].ChannelID != "channel-no-safe" ||
silences[0].DisplayRouteID != "route-replacement" {
t.Fatalf("unexpected listed access decision silence: %+v", silences)
}
_, err = service.UnsilenceFabricServiceChannelRouteRebuildAlert(context.Background(), UnsilenceFabricServiceChannelRouteRebuildAlertInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
SilenceID: silence.ID,
Reason: "operator reopened access no-safe",
Now: now.Add(time.Minute),
})
if err != nil {
t.Fatalf("unsilence access decision incident: %v", err)
}
health, err = service.GetFabricServiceChannelRouteRebuildHealthSummary(context.Background(), "admin-1", GetFabricServiceChannelRouteRebuildHealthSummaryInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("get unsilenced rebuild health: %v", err)
}
if health.ActiveBadCount != 1 || health.SilencedCount != 0 {
t.Fatalf("unexpected unsilenced access decision health: %+v", health)
}
silence, err = service.SilenceFabricServiceChannelRouteRebuildAlert(context.Background(), SilenceFabricServiceChannelRouteRebuildAlertInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
IncidentSource: "access_decision",
ChannelID: incidents[0].ChannelID,
ReporterNodeID: incidents[0].ReporterNodeID,
RouteID: incidents[0].RouteID,
GuardStatus: incidents[0].GuardStatus,
Generation: incidents[0].Generation,
Reason: "operator acknowledged access no-safe again",
TTL: 6 * time.Hour,
Now: now,
})
if err != nil {
t.Fatalf("resilence access decision incident: %v", err)
}
_, err = store.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"ingress": {
"route_manager": {
"last_applied_at": "2026-05-09T03:11:01Z",
"decisions": [{
"route_id": "route-replacement",
"source_node_id": "entry-1",
"destination_node_id": "exit-1",
"local_node_id": "entry-1",
"decision_source": "service_channel_feedback_no_alternate",
"rebuild_status": "pending_degraded_fallback",
"rebuild_reason": "service_channel_feedback_rebuild_requested",
"generation": "c18z82-generation-next",
"score_reasons": ["service_channel_fenced_route", "no_unfenced_alternate_route"]
}]
}
}
}
}`),
})
if err != nil {
t.Fatalf("record resurfaced heartbeat: %v", err)
}
incidents, err = service.ListFabricServiceChannelRouteRebuildIncidents(context.Background(), "admin-1", ListFabricServiceChannelRouteRebuildIncidentsInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("list resurfaced rebuild incidents: %v", err)
}
if len(incidents) == 0 || incidents[0].AlertSilenced || !incidents[0].AlertResurfaced || incidents[0].Generation != "c18z82-generation-next" ||
incidents[0].AlertResurfacedCause != "generation_changed" ||
incidents[0].AlertResurfacedPreviousGeneration != "c18z82-generation" ||
incidents[0].AlertResurfacedPreviousRouteID != "route-replacement" ||
incidents[0].AlertResurfacedPreviousChannelID != "channel-no-safe" {
t.Fatalf("expected resurfaced access decision incident on new generation: %+v", incidents)
}
}
func TestRecordFabricServiceChannelRemediationRebuildIntentsPersistsRequestedAndRejected(t *testing.T) {
now := time.Date(2026, 5, 8, 16, 45, 0, 0, time.UTC)
store := &fakeRepository{}
service := NewService(store)
err := service.recordFabricServiceChannelRemediationRebuildIntents(context.Background(), "cluster-1", "entry-1", []FabricServiceChannelAccessRemediationCommand{
{
CommandID: "cmd-requested",
Action: "rebuild_route",
ChannelID: "channel-1",
ServiceClass: FabricServiceClassVPNPackets,
PrimaryRouteID: "route-a",
PoolPolicyFingerprint: "pool-fp-1",
GuardStatus: "allowed",
GuardReason: "lease_pool_policy_allows_route",
Reason: "route_feedback_recommends_rebuild",
ExpiresAt: now.Add(time.Minute),
},
{
CommandID: "cmd-rejected",
Action: "rebuild_route",
ChannelID: "channel-2",
ServiceClass: FabricServiceClassVPNPackets,
PrimaryRouteID: "route-b",
ReplacementRouteID: "route-outside",
PoolPolicyFingerprint: "pool-fp-2",
GuardStatus: "rejected",
GuardReason: "replacement_exit_outside_signed_pool_policy",
Reason: "alternate_route_rejected_by_pool_policy",
ExpiresAt: now.Add(time.Minute),
},
}, now)
if err != nil {
t.Fatalf("record rebuild intents: %v", err)
}
if len(store.fabricRebuildAttempts) != 2 {
t.Fatalf("rebuild attempts = %+v, want two", store.fabricRebuildAttempts)
}
first := store.fabricRebuildAttempts[0]
if first.RebuildRequestID != "cmd-requested" ||
first.RebuildStatus != "requested" ||
first.Outcome != "rebuild_requested" ||
first.DecisionSource != "service_channel_remediation_command" ||
first.PolicyFingerprint != "pool-fp-1" {
t.Fatalf("unexpected requested rebuild intent: %+v", first)
}
second := store.fabricRebuildAttempts[1]
if second.RebuildRequestID != "cmd-rejected" ||
second.RebuildStatus != "rejected" ||
second.Outcome != "policy_guard_rejected" ||
second.ReplacementRouteID != "route-outside" ||
second.PolicyFingerprint != "pool-fp-2" {
t.Fatalf("unexpected rejected rebuild intent: %+v", second)
}
}
func TestResolveFabricServiceChannelRemediationRebuildIntentsRecordsNoAlternate(t *testing.T) {
now := time.Date(2026, 5, 9, 1, 10, 0, 0, time.UTC)
store := &fakeRepository{
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-no-alt"): {
ClusterID: "cluster-1",
ChannelID: "channel-no-alt",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: now.Add(time.Minute),
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-no-alt",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
EntryPool: []FabricServiceChannelNodeCandidate{{NodeID: "entry-1", Status: "selected"}},
ExitPool: []FabricServiceChannelNodeCandidate{{NodeID: "exit-1", Status: "selected"}},
ExpiresAt: now.Add(time.Minute),
},
},
},
}
service := NewService(store)
decisions, err := service.resolveFabricServiceChannelRemediationRebuildIntents(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
}, []FabricServiceChannelAccessRemediationCommand{{
CommandID: "cmd-no-alt",
Action: "rebuild_route",
ChannelID: "channel-no-alt",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
PrimaryRouteID: "route-bad",
GuardStatus: "allowed",
Reason: "route_feedback_recommends_rebuild",
ExpiresAt: now.Add(time.Minute),
}}, []MeshRouteIntent{{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
}}, map[string]fabricServiceChannelRouteFeedback{
"route-bad": {
RouteID: "route-bad",
Fenced: true,
RouteRebuildRecommended: true,
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ConsecutiveFailures: 3,
},
}, "config-c18z77", now)
if err != nil {
t.Fatalf("resolve rebuild intents: %v", err)
}
if len(decisions) != 0 {
t.Fatalf("decisions = %+v, want none without alternate", decisions)
}
if len(store.fabricRebuildAttempts) != 1 {
t.Fatalf("rebuild attempts = %+v, want one", store.fabricRebuildAttempts)
}
attempt := store.fabricRebuildAttempts[0]
if attempt.RebuildStatus != "no_alternate" ||
attempt.Outcome != "no_alternate" ||
attempt.RebuildReason != "no_unfenced_alternate_route" ||
attempt.ConsecutiveFailures != 3 {
t.Fatalf("unexpected no-alternate rebuild resolution: %+v", attempt)
}
}
func TestResolveFabricServiceChannelRemediationRebuildIntentsAppliesAlternateDecision(t *testing.T) {
now := time.Date(2026, 5, 9, 1, 15, 0, 0, time.UTC)
store := &fakeRepository{
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-apply"): {
ClusterID: "cluster-1",
ChannelID: "channel-apply",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: now.Add(time.Minute),
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-apply",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
EntryPool: []FabricServiceChannelNodeCandidate{{NodeID: "entry-1", Status: "selected"}},
ExitPool: []FabricServiceChannelNodeCandidate{{NodeID: "exit-1", Status: "selected"}},
ExpiresAt: now.Add(time.Minute),
},
},
},
}
service := NewService(store)
intents := []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-good",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 90,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
}
decisions, err := service.resolveFabricServiceChannelRemediationRebuildIntents(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
}, []FabricServiceChannelAccessRemediationCommand{{
CommandID: "cmd-apply",
Action: "rebuild_route",
ChannelID: "channel-apply",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
PrimaryRouteID: "route-bad",
GuardStatus: "allowed",
Reason: "route_feedback_recommends_rebuild",
ExpiresAt: now.Add(time.Minute),
}}, intents, map[string]fabricServiceChannelRouteFeedback{
"route-bad": {
RouteID: "route-bad",
Fenced: true,
RouteRebuildRecommended: true,
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
},
"route-good": {
RouteID: "route-good",
ScoreAdjustment: 100,
Reasons: []string{"service_channel_recent_success"},
},
}, "config-c18z77", now)
if err != nil {
t.Fatalf("resolve rebuild intents: %v", err)
}
if len(decisions) != 1 {
t.Fatalf("decisions = %+v, want one applied alternate", decisions)
}
decision := decisions[0]
if decision.RebuildRequestID != "cmd-apply" ||
decision.RebuildStatus != "applied" ||
decision.ReplacementRouteID != "route-good" ||
decision.DecisionSource != "service_channel_remediation_command" {
t.Fatalf("unexpected applied remediation decision: %+v", decision)
}
attempt := store.fabricRebuildAttempts[0]
if attempt.RebuildStatus != "applied" ||
attempt.Outcome != "replacement_selected" ||
attempt.ReplacementRouteID != "route-good" ||
!reflect.DeepEqual(attempt.OldHops, []string{"entry-1", "exit-1"}) ||
!reflect.DeepEqual(attempt.ReplacementHops, []string{"entry-1", "exit-1"}) {
t.Fatalf("unexpected applied rebuild resolution: %+v", attempt)
}
}
func TestIssueFabricServiceChannelLeaseMarksBackendRelayAsDegradedFallbackWhenRouteMissing(t *testing.T) {
now := time.Date(2026, 5, 7, 12, 30, 0, 0, time.UTC)
service := NewService(&fakeRepository{})
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ServiceClass: FabricServiceClassRemoteWorkspace,
EntryNodeIDs: []string{"entry-a"},
ExitNodeIDs: []string{"exit-b"},
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.Status != FabricServiceChannelStatusDegradedFallback {
t.Fatalf("lease.Status = %q, want degraded_fallback", lease.Status)
}
if lease.PrimaryRoute.Status != "missing_route_intent" || lease.PrimaryRoute.RouteID != "" {
t.Fatalf("unexpected primary route fallback: %+v", lease.PrimaryRoute)
}
if lease.PrimaryRoute.RecoveryPolicy == nil {
t.Fatalf("fallback primary route must include recovery policy provenance")
}
if !lease.Fallback.Active || !lease.Fallback.Degraded || !lease.Fallback.BackendRelay {
t.Fatalf("fallback should be active degraded backend relay: %+v", lease.Fallback)
}
if !containsString(lease.AllowedChannels, FabricChannelInteractive) || !containsString(lease.RequiredRoles, "rdp-worker") {
t.Fatalf("remote workspace defaults not applied: channels=%v roles=%v", lease.AllowedChannels, lease.RequiredRoles)
}
if lease.EntryHTTP.Type != "fabric_quic_only" || lease.EntryHTTP.PathTemplate != "" || lease.EntryHTTP.WebSocketPathTemplate != "" {
t.Fatalf("remote workspace ingress must not expose HTTP/WebSocket transport paths: %+v", lease.EntryHTTP)
}
if lease.DataPlane.StableContractForServiceClass != FabricServiceClassRemoteWorkspace ||
!lease.DataPlane.ServiceNeutral ||
!lease.DataPlane.ProtocolAgnostic ||
!containsString(lease.DataPlane.RequiredFlowIsolationClasses, FabricChannelInteractive) {
t.Fatalf("unexpected remote workspace data-plane contract: %+v", lease.DataPlane)
}
}
func TestIssueFabricServiceChannelLeaseUsesServiceClassAwareIngressDescriptors(t *testing.T) {
now := time.Date(2026, 5, 12, 14, 10, 0, 0, time.UTC)
service := NewService(&fakeRepository{})
service.now = func() time.Time { return now }
tests := []struct {
name string
service string
pathNeedle string
packetMedia string
}{
{name: "vpn", service: FabricServiceClassVPNPackets, pathNeedle: "vpn-connections", packetMedia: "application/vnd.rap.vpn-packet-batch.v1"},
{name: "remote workspace", service: FabricServiceClassRemoteWorkspace, pathNeedle: "remote-workspaces", packetMedia: "application/vnd.rap.remote-workspace-frame-batch.v1"},
{name: "file transfer", service: FabricServiceClassFileTransfer, pathNeedle: "file-transfers", packetMedia: "application/vnd.rap.file-transfer-chunk-batch.v1"},
{name: "video", service: FabricServiceClassVideo, pathNeedle: "video-sessions", packetMedia: "application/vnd.rap.video-frame-batch.v1"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "resource-1",
ServiceClass: tt.service,
EntryNodeIDs: []string{"entry-a"},
ExitNodeIDs: []string{"exit-b"},
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.EntryHTTP.Type != "fabric_quic_only" || lease.EntryHTTP.PathTemplate != "" || lease.EntryHTTP.WebSocketPathTemplate != "" || lease.EntryHTTP.PacketBatchFormat != "" {
t.Fatalf("EntryHTTP must be disabled for QUIC-only fabric: %+v", lease.EntryHTTP)
}
if lease.DataPlane.StableContractForServiceClass != tt.service {
t.Fatalf("StableContractForServiceClass = %q, want %q", lease.DataPlane.StableContractForServiceClass, tt.service)
}
})
}
}
func TestIssueFabricServiceChannelLeaseFencesRouteFromFlowFeedback(t *testing.T) {
now := time.Date(2026, 5, 7, 13, 0, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 50,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-bad", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-good",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 10,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-good", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"entry-1": {
{
ClusterID: "cluster-1",
NodeID: "entry-1",
ObservedAt: now.Add(-15 * time.Second),
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18l.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"channel_stats": {
"flow-7": {
"last_failed_route_id": "route-bad",
"last_error": "forward peer unavailable",
"consecutive_failures": 2,
"route_rebuild_recommended": true,
"degraded_fallback_recommended": true
},
"flow-9": {
"last_route_id": "route-good",
"last_next_hop": "relay-good",
"consecutive_failures": 0
}
}
}
}
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-good" {
t.Fatalf("primary route = %q, want route-good after route-bad feedback fence", lease.PrimaryRoute.RouteID)
}
if !containsString(lease.PrimaryRoute.ScoreReasons, "service_channel_recent_success") {
t.Fatalf("primary route should include service-channel success feedback: %+v", lease.PrimaryRoute)
}
for _, alternate := range lease.AlternateRoutes {
if alternate.RouteID == "route-bad" {
t.Fatalf("fenced route must not be offered as alternate: %+v", lease.AlternateRoutes)
}
}
if lease.Fallback.Active || lease.Fallback.Degraded {
t.Fatalf("healthy alternate should avoid degraded fallback: %+v", lease.Fallback)
}
}
func TestIssueFabricServiceChannelLeasePrefersFastHealthyRouteFeedback(t *testing.T) {
now := time.Date(2026, 5, 7, 16, 10, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-slow",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 120,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-slow", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-fast",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 80,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-fast", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"entry-1": {
{
ClusterID: "cluster-1",
NodeID: "entry-1",
ObservedAt: now.Add(-10 * time.Second),
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18l.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"channel_stats": {
"flow-fast": {
"last_route_id": "route-fast",
"last_next_hop": "relay-fast",
"last_send_duration_ms": 8,
"consecutive_failures": 0,
"stall_count": 0
},
"flow-slow": {
"last_route_id": "route-slow",
"last_next_hop": "relay-slow",
"last_send_duration_ms": 900,
"consecutive_failures": 0,
"stall_count": 0
}
}
}
}
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-fast" {
t.Fatalf("primary route = %q, want route-fast from quality feedback; route=%+v alternates=%+v", lease.PrimaryRoute.RouteID, lease.PrimaryRoute, lease.AlternateRoutes)
}
if !containsString(lease.PrimaryRoute.ScoreReasons, "service_channel_quality_latency_le_10ms") {
t.Fatalf("fast route should include latency quality reason: %+v", lease.PrimaryRoute)
}
var slow FabricServiceChannelRoute
for _, route := range lease.AlternateRoutes {
if route.RouteID == "route-slow" {
slow = route
break
}
}
if slow.RouteID == "" || !containsString(slow.ScoreReasons, "service_channel_quality_latency_very_slow") {
t.Fatalf("slow alternate should retain quality penalty reason: %+v", lease.AlternateRoutes)
}
}
func TestIssueFabricServiceChannelLeaseDecaysOlderHealthyRouteFeedback(t *testing.T) {
now := time.Date(2026, 5, 8, 9, 0, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-old-fast",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 80,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-fresh",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 80,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-fresh", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ID: "feedback-old",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-old-fast",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_quality_latency_le_10ms"},
LastSendDurationMs: 1,
ObservedAt: now.Add(-90 * time.Second),
ExpiresAt: now.Add(30 * time.Second),
},
{
ID: "feedback-fresh",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-fresh",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 40,
Reasons: []string{"service_channel_recent_success", "service_channel_quality_latency_le_50ms"},
LastSendDurationMs: 40,
ObservedAt: now.Add(-5 * time.Second),
ExpiresAt: now.Add(115 * time.Second),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
TTL: 90 * time.Second,
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-fresh" {
t.Fatalf("primary route = %q, want fresher feedback route after age decay; route=%+v alternates=%+v", lease.PrimaryRoute.RouteID, lease.PrimaryRoute, lease.AlternateRoutes)
}
var oldRoute FabricServiceChannelRoute
for _, route := range lease.AlternateRoutes {
if route.RouteID == "route-old-fast" {
oldRoute = route
break
}
}
if oldRoute.RouteID == "" || !containsString(oldRoute.ScoreReasons, "service_channel_feedback_age_decay") {
t.Fatalf("old route should carry age decay reason: %+v", lease.AlternateRoutes)
}
}
func TestServiceChannelRouteFeedbackReportIncludesEffectiveDecayedScore(t *testing.T) {
now := time.Date(2026, 5, 8, 9, 3, 0, 0, time.UTC)
report := serviceChannelRouteFeedbackReport([]FabricServiceChannelRouteFeedbackObservation{{
ID: "feedback-old",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-old-fast",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success"},
LastSendDurationMs: 1,
ObservedAt: now.Add(-90 * time.Second),
ExpiresAt: now.Add(30 * time.Second),
}}, now)
if report == nil || len(report.Observations) != 1 {
t.Fatalf("report observations = %+v, want one observation", report)
}
observation := report.Observations[0]
if observation.ScoreAdjustment != 90 || observation.EffectiveScoreAdjustment != 23 {
t.Fatalf("scores raw/effective = %d/%d, want 90/23", observation.ScoreAdjustment, observation.EffectiveScoreAdjustment)
}
if !containsString(observation.Reasons, "service_channel_feedback_age_decay") {
t.Fatalf("reasons = %+v, want age decay reason", observation.Reasons)
}
}
func TestIssueFabricServiceChannelLeaseFallsBackWhenOnlyRouteFencedByFlowFeedback(t *testing.T) {
now := time.Date(2026, 5, 7, 13, 30, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 50,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-bad", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
heartbeats: map[string][]NodeHeartbeat{
"entry-1": {
{
ClusterID: "cluster-1",
NodeID: "entry-1",
ObservedAt: now.Add(-10 * time.Second),
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18l.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"channel_stats": {
"flow-7": {
"last_failed_route_id": "route-bad",
"consecutive_failures": 2,
"route_rebuild_recommended": true
}
}
}
}
}
}`),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.Status != FabricServiceChannelStatusDegradedFallback ||
lease.Fallback.Reason != "fabric_route_rebuild_pending_backend_relay" {
t.Fatalf("lease should degrade because the only route is fenced: status=%s fallback=%+v", lease.Status, lease.Fallback)
}
}
func TestIssueFabricServiceChannelLeaseSelectsHealthyAlternateExitFromPool(t *testing.T) {
now := time.Date(2026, 5, 7, 13, 45, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-entry-exit-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-a"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-a"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"exit_pool_id": "pool-home"}
}`),
UpdatedAt: now,
},
{
ID: "route-entry-exit-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-b"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 30,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-b"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"exit_pool_id": "pool-home"}
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-entry-exit-a",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ConsecutiveFailures: 2,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-entry-exit-b",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 10,
Reasons: []string{"service_channel_recent_success"},
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-a", "exit-b"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-a",
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-entry-exit-b" || lease.SelectedExitNodeID != "exit-b" {
t.Fatalf("lease should select alternate exit from pool: selected_exit=%s primary=%+v", lease.SelectedExitNodeID, lease.PrimaryRoute)
}
for _, candidate := range lease.ExitPool {
if candidate.NodeID == "exit-b" && candidate.Status != "selected" {
t.Fatalf("alternate exit should be marked selected in exit pool: %+v", lease.ExitPool)
}
}
var signedPayload FabricServiceChannelLeaseAuthorityPayload
if err := json.Unmarshal(lease.AuthorityPayload, &signedPayload); err != nil {
t.Fatalf("unmarshal signed payload: %v", err)
}
if signedPayload.SelectedExitNodeID != "exit-b" || len(signedPayload.ExitPool) != 2 {
t.Fatalf("signed payload must bind selected exit and authorized exit pool: %+v", signedPayload)
}
if lease.Fallback.Active || lease.Fallback.Degraded {
t.Fatalf("healthy exit-pool alternate should avoid degraded fallback: %+v", lease.Fallback)
}
}
func TestIssueFabricServiceChannelLeaseSelectsHealthyAlternateEntryFromPool(t *testing.T) {
now := time.Date(2026, 5, 7, 14, 45, 0, 0, time.UTC)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-entry-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-a", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"entry_pool_id": "pool-edge"}
}`),
UpdatedAt: now,
},
{
ID: "route-entry-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-b"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 30,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-b", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"entry_pool_id": "pool-edge"}
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-a",
RouteID: "route-entry-a",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_entry_unreachable"},
ConsecutiveFailures: 3,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-b",
RouteID: "route-entry-b",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 10,
Reasons: []string{"service_channel_recent_success"},
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-a", "entry-b"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-a",
PreferredExitNodeID: "exit-1",
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-entry-b" || lease.SelectedEntryNodeID != "entry-b" {
t.Fatalf("lease should select alternate entry from pool: selected_entry=%s primary=%+v", lease.SelectedEntryNodeID, lease.PrimaryRoute)
}
for _, candidate := range lease.EntryPool {
if candidate.NodeID == "entry-b" && candidate.Status != "selected" {
t.Fatalf("alternate entry should be marked selected in entry pool: %+v", lease.EntryPool)
}
}
var signedPayload FabricServiceChannelLeaseAuthorityPayload
if err := json.Unmarshal(lease.AuthorityPayload, &signedPayload); err != nil {
t.Fatalf("unmarshal signed payload: %v", err)
}
if signedPayload.SelectedEntryNodeID != "entry-b" || len(signedPayload.EntryPool) != 2 {
t.Fatalf("signed payload must bind selected entry and authorized entry pool: %+v", signedPayload)
}
if lease.Fallback.Active || lease.Fallback.Degraded {
t.Fatalf("healthy entry-pool alternate should avoid degraded fallback: %+v", lease.Fallback)
}
}
func TestIssueFabricServiceChannelLeaseAppliesClusterPoolPolicy(t *testing.T) {
now := time.Date(2026, 5, 8, 20, 10, 0, 0, time.UTC)
policy := defaultFabricServiceChannelPoolPolicy()
policy.Source = "cluster_metadata"
policy.EntryPoolNodeIDs = []string{"entry-b"}
policy.ExitPoolNodeIDs = []string{"exit-b"}
policy.PreferredEntryNodeID = "entry-b"
policy.PreferredExitNodeID = "exit-b"
policy.SelectionStrategy = "preferred_first"
policy.RouteRebuild = "automatic"
policy.EntryFailover = "automatic"
policy.ExitFailover = "automatic"
policy.CompatFallbackAllowed = true
policy.StickySession = true
policy = normalizeFabricServiceChannelPoolPolicy(policy, defaultFabricServiceChannelPoolPolicy())
metadata, err := upsertFabricServiceChannelPoolPolicyMetadata(json.RawMessage(`{}`), policy)
if err != nil {
t.Fatalf("policy metadata: %v", err)
}
store := &fakeRepository{
cluster: Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: metadata,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-a"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{"hops":["entry-a","exit-a"],"allowed_channels":["vpn_packet","fabric_control"]}`),
UpdatedAt: now,
},
{
ID: "route-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-b"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-b"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 10,
Status: "active",
Policy: json.RawMessage(`{"hops":["entry-b","exit-b"],"allowed_channels":["vpn_packet","fabric_control"]}`),
UpdatedAt: now,
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-a", "entry-b"},
ExitNodeIDs: []string{"exit-a", "exit-b"},
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.SelectedEntryNodeID != "entry-b" || lease.SelectedExitNodeID != "exit-b" || lease.PrimaryRoute.RouteID != "route-b" {
t.Fatalf("lease did not apply pool policy: selected_entry=%s selected_exit=%s primary=%+v", lease.SelectedEntryNodeID, lease.SelectedExitNodeID, lease.PrimaryRoute)
}
if len(lease.EntryPool) != 1 || lease.EntryPool[0].NodeID != "entry-b" || len(lease.ExitPool) != 1 || lease.ExitPool[0].NodeID != "exit-b" {
t.Fatalf("lease pools should be constrained by pool policy: entry=%+v exit=%+v", lease.EntryPool, lease.ExitPool)
}
if lease.PoolPolicy == nil || lease.PoolPolicy.Fingerprint != policy.Fingerprint {
t.Fatalf("lease missing pool policy provenance: %+v want %s", lease.PoolPolicy, policy.Fingerprint)
}
var signedPayload FabricServiceChannelLeaseAuthorityPayload
if err := json.Unmarshal(lease.AuthorityPayload, &signedPayload); err != nil {
t.Fatalf("unmarshal signed payload: %v", err)
}
if signedPayload.PoolPolicy == nil || signedPayload.PoolPolicy.Fingerprint != policy.Fingerprint {
t.Fatalf("signed payload missing pool policy provenance: %+v want %s", signedPayload.PoolPolicy, policy.Fingerprint)
}
}
func TestRecordHeartbeatPersistsServiceChannelRouteFeedbackForLaterLease(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-bad", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-good",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 10,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "relay-good", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := service.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18l.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"channel_stats": {
"flow-1": {
"last_failed_route_id": "route-bad",
"consecutive_failures": 2,
"route_rebuild_recommended": true
}
}
}
}
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
if len(store.fabricRouteFeedback) != 1 || store.fabricRouteFeedback[0].RouteID != "route-bad" ||
store.fabricRouteFeedback[0].FeedbackStatus != "fenced" {
t.Fatalf("service-channel route feedback was not persisted: %+v", store.fabricRouteFeedback)
}
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-good" {
t.Fatalf("primary route = %q, want durable feedback to fence route-bad and select route-good", lease.PrimaryRoute.RouteID)
}
}
func TestRecordHeartbeatTurnsBlockedFallbackSendFailureIntoRebuildFeedback(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true,"hops":["entry-1","exit-1"],"allowed_channels":["vpn_packet","fabric_control"]}`),
UpdatedAt: now,
},
{
ID: "route-good",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 10,
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true,"hops":["entry-1","exit-1"],"allowed_channels":["vpn_packet","fabric_control"]}`),
UpdatedAt: now,
},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-1"): {
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: now.Add(time.Minute),
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
Status: "authorized",
},
AlternateRoutes: []FabricServiceChannelRoute{{
RouteID: "route-good",
Status: "authorized",
}},
ExpiresAt: now.Add(time.Minute),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := service.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_access_report": {
"schema_version": "c18z52.fabric_service_channel_access_report.v1",
"total": 1,
"signed": 1,
"degraded_route_count": 0,
"degraded_route_blocked_count": 1,
"fabric_route_send_failure": 1,
"data_plane_contract": 1,
"last_degraded_route_policy": "disabled",
"last_working_data_transport": "fabric_service_channel",
"last_steady_state_transport": "fabric_route",
"last_data_plane_violation_status": "fabric_route_send_failed_degraded_route_blocked",
"last_data_plane_violation_reason": "mesh synthetic route not found"
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
if len(store.fabricRouteFeedback) != 1 {
t.Fatalf("route feedback count = %d, want one blocked fallback feedback: %+v", len(store.fabricRouteFeedback), store.fabricRouteFeedback)
}
feedback := store.fabricRouteFeedback[0]
if feedback.RouteID != "route-bad" ||
feedback.FeedbackStatus != "fenced" ||
feedback.ScoreAdjustment != -1030 ||
!containsString(feedback.Reasons, "data_plane_fabric_route_send_failed") ||
!containsString(feedback.Reasons, "degraded_route_blocked_by_policy") {
t.Fatalf("unexpected route feedback from blocked fallback: %+v", feedback)
}
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("expected blocked fallback feedback to drive replacement decision: %+v", cfg.RoutePathDecisions)
}
decision := cfg.RoutePathDecisions.Decisions[0]
if decision.RouteID != "route-bad" ||
decision.ReplacementRouteID != "route-good" ||
decision.RebuildStatus != "applied" ||
decision.FeedbackObservationID == "" ||
decision.FeedbackSource != "fabric_service_channel_access_report" ||
decision.FeedbackChannelID != "channel-1" ||
decision.FeedbackResourceID != "vpn-home" ||
decision.FeedbackViolationStatus != "fabric_route_send_failed_degraded_route_blocked" ||
!containsString(decision.ScoreReasons, "service_channel_fenced_route") ||
!containsString(decision.ScoreReasons, "service_channel_rebuild_applied") {
t.Fatalf("unexpected replacement decision: %+v", decision)
}
if len(store.fabricRebuildAttempts) != 1 {
t.Fatalf("rebuild attempt count = %d, want one correlated attempt: %+v", len(store.fabricRebuildAttempts), store.fabricRebuildAttempts)
}
attempt := store.fabricRebuildAttempts[0]
if attempt.FeedbackObservationID != decision.FeedbackObservationID ||
attempt.FeedbackSource != "fabric_service_channel_access_report" ||
attempt.FeedbackChannelID != "channel-1" ||
attempt.FeedbackResourceID != "vpn-home" ||
attempt.FeedbackViolationStatus != "fabric_route_send_failed_degraded_route_blocked" {
t.Fatalf("unexpected rebuild attempt feedback correlation: %+v", attempt)
}
if jsonString(jsonObject(attempt.Payload), "feedback_observation_id") != decision.FeedbackObservationID ||
jsonString(jsonObject(attempt.Payload), "feedback_source") != "fabric_service_channel_access_report" {
t.Fatalf("rebuild attempt payload missing feedback correlation: %s", string(attempt.Payload))
}
health, err := service.GetFabricServiceChannelRouteRebuildHealthSummary(context.Background(), "admin-1", GetFabricServiceChannelRouteRebuildHealthSummaryInput{
ClusterID: "cluster-1",
Limit: 10,
})
if err != nil {
t.Fatalf("get rebuild health: %v", err)
}
if len(health.FeedbackBreakdowns) != 1 {
t.Fatalf("feedback breakdowns = %+v, want one access-report group", health.FeedbackBreakdowns)
}
breakdown := health.FeedbackBreakdowns[0]
if breakdown.FeedbackSource != "fabric_service_channel_access_report" ||
breakdown.FeedbackChannelID != "channel-1" ||
breakdown.FeedbackViolationStatus != "fabric_route_send_failed_degraded_route_blocked" ||
breakdown.TotalCount != 1 ||
len(breakdown.AffectedReporterNodeIDs) != 1 ||
breakdown.AffectedReporterNodeIDs[0] != "entry-1" ||
len(breakdown.AffectedRouteIDs) != 1 ||
breakdown.AffectedRouteIDs[0] != "route-bad" {
t.Fatalf("unexpected feedback breakdown: %+v", breakdown)
}
}
func TestRecordHeartbeatDeduplicatesBlockedFallbackAccessFeedback(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{"synthetic_enabled":true,"hops":["entry-1","exit-1"],"allowed_channels":["vpn_packet","fabric_control"]}`),
UpdatedAt: now,
},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-1"): {
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: now.Add(time.Minute),
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
Status: "authorized",
},
ExpiresAt: now.Add(time.Minute),
},
},
},
}
service := NewService(store)
heartbeat := RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_access_report": {
"schema_version": "c18z52.fabric_service_channel_access_report.v1",
"total": 1,
"signed": 1,
"degraded_route_count": 0,
"degraded_route_blocked_count": 1,
"fabric_route_send_failure": 1,
"data_plane_contract": 1,
"last_degraded_route_policy": "disabled",
"last_working_data_transport": "fabric_service_channel",
"last_steady_state_transport": "fabric_route",
"last_data_plane_violation_status": "fabric_route_send_failed_degraded_route_blocked",
"last_data_plane_violation_reason": "mesh synthetic route not found"
}
}`),
}
if _, err := service.RecordHeartbeat(context.Background(), heartbeat); err != nil {
t.Fatalf("record first heartbeat: %v", err)
}
if _, err := service.RecordHeartbeat(context.Background(), heartbeat); err != nil {
t.Fatalf("record duplicate heartbeat: %v", err)
}
if len(store.fabricRouteFeedback) != 1 {
t.Fatalf("route feedback count = %d, want duplicate access-report feedback suppressed: %+v", len(store.fabricRouteFeedback), store.fabricRouteFeedback)
}
feedback := store.fabricRouteFeedback[0]
if feedback.RouteID != "route-bad" ||
feedback.FeedbackStatus != "fenced" ||
!containsString(feedback.Reasons, "data_plane_fabric_route_send_failed") ||
jsonString(jsonObject(feedback.Payload), "source") != "fabric_service_channel_access_report" {
t.Fatalf("unexpected deduplicated feedback: %+v", feedback)
}
}
func TestRecordHeartbeatUsesRollingQualityWindowForRouteFeedback(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
store := &fakeRepository{}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := service.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{
"fabric_service_channel_runtime_report": {
"schema_version": "c18z21.fabric_service_channel_runtime_report.v1",
"ingress": {
"flow_scheduler": {
"channel_stats": {
"vpn:vpn-1:flow-01": {
"last_route_id": "route-good",
"last_failed_route_id": "route-bad",
"last_error": "old failure",
"consecutive_failures": 2,
"stall_count": 2,
"last_send_duration_ms": 1500,
"route_rebuild_recommended": true,
"degraded_fallback_recommended": true,
"quality_window_sample_count": 32,
"quality_window_success_count": 32,
"quality_window_failure_count": 0,
"quality_window_slow_count": 0,
"quality_window_drop_count": 0,
"quality_window_avg_latency_ms": 1
}
}
}
}
}
}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
if len(store.fabricRouteFeedback) != 1 {
t.Fatalf("route feedback count = %d, want one healthy fresh observation: %+v", len(store.fabricRouteFeedback), store.fabricRouteFeedback)
}
observation := store.fabricRouteFeedback[0]
if observation.RouteID != "route-good" || observation.FeedbackStatus != "healthy" {
t.Fatalf("route feedback = %+v, want rolling window to ignore old failed route", observation)
}
if observation.ConsecutiveFailures != 0 || observation.StallCount != 0 || observation.LastSendDurationMs != 1 {
t.Fatalf("rolling counters = failures:%d stalls:%d latency:%d, want fresh window values", observation.ConsecutiveFailures, observation.StallCount, observation.LastSendDurationMs)
}
if !containsString(observation.Reasons, "service_channel_rolling_quality_window") || !containsString(observation.Reasons, "service_channel_quality_latency_le_10ms") {
t.Fatalf("feedback reasons = %+v, want rolling window quality reasons", observation.Reasons)
}
}
func TestRecordHeartbeatSanitizesNullUnicodeEscapes(t *testing.T) {
store := &fakeRepository{}
service := NewService(store)
heartbeat, err := service.RecordHeartbeat(context.Background(), RecordHeartbeatInput{
ClusterID: "cluster-1",
NodeID: "node-1",
HealthStatus: "healthy",
Metadata: json.RawMessage(`{"fabric_registry_runtime_report":{"status":"candidate_only","bad":"prefix\u0000suffix"}}`),
})
if err != nil {
t.Fatalf("record heartbeat: %v", err)
}
if strings.Contains(string(heartbeat.Metadata), `\u0000`) {
t.Fatalf("heartbeat metadata still contains null escape: %s", string(heartbeat.Metadata))
}
if strings.Contains(string(store.heartbeats["node-1"][0].Metadata), `\u0000`) {
t.Fatalf("stored heartbeat metadata still contains null escape: %s", string(store.heartbeats["node-1"][0].Metadata))
}
}
func TestGetNodeSyntheticMeshConfigSkipsFencedServiceChannelRoute(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-good",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 10,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-unproven",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 900,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ConsecutiveFailures: 2,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-good",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 10,
Reasons: []string{"service_channel_recent_success"},
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
fabricLeases: map[string]FabricServiceChannelLeaseRecord{
fabricServiceChannelLeaseCacheKey("cluster-1", "channel-1"): {
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
SelectedEntryNodeID: "entry-1",
ExpiresAt: now.Add(time.Minute),
Lease: FabricServiceChannelLease{
ClusterID: "cluster-1",
ChannelID: "channel-1",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
Status: FabricServiceChannelStatusReady,
SelectedEntryNodeID: "entry-1",
SelectedExitNodeID: "exit-1",
PrimaryRoute: FabricServiceChannelRoute{
RouteID: "route-bad",
Status: "authorized",
},
AlternateRoutes: []FabricServiceChannelRoute{{
RouteID: "route-good",
Status: "authorized",
}},
ExpiresAt: now.Add(time.Minute),
},
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if len(cfg.Routes) != 2 || containsRouteID(cfg.Routes, "route-bad") || !containsRouteID(cfg.Routes, "route-good") {
t.Fatalf("routes = %+v, want route-bad excluded and route-good retained", cfg.Routes)
}
if cfg.ServiceChannelFeedback == nil || cfg.ServiceChannelFeedback.FencedRouteCount != 1 || cfg.ServiceChannelFeedback.HealthyRouteCount != 1 {
t.Fatalf("feedback report missing fenced count: %+v", cfg.ServiceChannelFeedback)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("expected one service-channel replacement decision: %+v", cfg.RoutePathDecisions)
}
if len(cfg.ServiceChannelRemediationCommands) != 1 {
t.Fatalf("remediation commands = %+v, want one", cfg.ServiceChannelRemediationCommands)
}
command := cfg.ServiceChannelRemediationCommands[0]
if command.Action != "prefer_alternate_route" ||
command.PrimaryRouteID != "route-bad" ||
command.ReplacementRouteID != "route-good" ||
command.ChannelID != "channel-1" ||
!command.ExpiresAt.After(now) {
t.Fatalf("unexpected remediation command: %+v", command)
}
var replacement RoutePathDecision
for _, decision := range cfg.RoutePathDecisions.Decisions {
if decision.DecisionSource == "service_channel_feedback_replacement" {
replacement = decision
break
}
}
if replacement.RouteID != "route-bad" || replacement.ReplacementRouteID != "route-good" ||
replacement.RebuildStatus != "applied" ||
replacement.RebuildRequestID == "" ||
!containsString(replacement.ScoreReasons, "selected_unfenced_alternate_route") ||
!containsString(replacement.ScoreReasons, "service_channel_rebuild_applied") ||
!containsString(replacement.ScoreReasons, "active_healthy_feedback_dampening_window") {
t.Fatalf("unexpected replacement decision: %+v", replacement)
}
attempts, err := service.ListFabricServiceChannelRouteRebuildAttempts(context.Background(), "admin-1", ListFabricServiceChannelRouteRebuildAttemptsInput{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
})
if err != nil {
t.Fatalf("list rebuild attempts: %v", err)
}
if len(attempts) != 1 {
t.Fatalf("rebuild attempts = %+v, want one", attempts)
}
attempt := attempts[0]
if attempt.RebuildStatus != "applied" ||
attempt.Outcome != "replacement_selected" ||
attempt.ReplacementRouteID != "route-good" ||
attempt.RebuildRequestID != replacement.RebuildRequestID ||
attempt.FeedbackStatus != "fenced" ||
attempt.ConsecutiveFailures != 2 ||
!containsString(attempt.FeedbackReasons, "service_channel_route_rebuild_recommended") ||
!reflect.DeepEqual(attempt.OldHops, []string{"entry-1", "exit-1"}) ||
!reflect.DeepEqual(attempt.ReplacementHops, []string{"entry-1", "exit-1"}) {
t.Fatalf("unexpected rebuild ledger attempt: %+v", attempt)
}
}
func TestGetNodeSyntheticMeshConfigReportsRebuildPendingWhenNoAlternateExists(t *testing.T) {
now := time.Date(2026, 5, 7, 14, 0, 0, 0, time.UTC)
store := &fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-bad",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended", "service_channel_degraded_fallback_recommended"},
ConsecutiveFailures: 3,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if !containsRouteID(cfg.Routes, "route-bad") {
t.Fatalf("fenced route should be retained until an alternate exists: %+v", cfg.Routes)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.RebuildRequestCount != 1 || cfg.RoutePathDecisions.DegradedDecisionCount != 0 {
t.Fatalf("expected rebuild/degraded decision counts: %+v", cfg.RoutePathDecisions)
}
var decision RoutePathDecision
for _, item := range cfg.RoutePathDecisions.Decisions {
if item.DecisionSource == "service_channel_feedback_no_alternate_keep_primary" {
decision = item
break
}
}
if decision.DecisionSource != "service_channel_feedback_no_alternate_keep_primary" ||
decision.RebuildStatus != "requested" ||
decision.RebuildRequestID == "" ||
decision.RebuildAttempt != 3 ||
!containsString(decision.ScoreReasons, "primary_route_retained_until_rebuild") {
t.Fatalf("unexpected rebuild decision: %+v", decision)
}
}
func TestGetNodeSyntheticMeshConfigReplacesFencedServiceChannelRouteAcrossExitPool(t *testing.T) {
now := time.Date(2026, 5, 7, 14, 15, 0, 0, time.UTC)
store := &fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-exit-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-a"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-a"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"exit_pool_id": "pool-home"}
}`),
UpdatedAt: now,
},
{
ID: "route-exit-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-b"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 20,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-b"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"exit_pool_id": "pool-home"}
}`),
UpdatedAt: now,
},
{
ID: "route-other-pool",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-c"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 900,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-c"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"exit_pool_id": "pool-other"}
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-exit-a",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ConsecutiveFailures: 2,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-exit-b",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 10,
Reasons: []string{"service_channel_recent_success"},
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("expected one exit-pool replacement decision: %+v", cfg.RoutePathDecisions)
}
var replacement RoutePathDecision
for _, decision := range cfg.RoutePathDecisions.Decisions {
if decision.RouteID == "route-exit-a" {
replacement = decision
break
}
}
if replacement.ReplacementRouteID != "route-exit-b" ||
replacement.DecisionSource != "service_channel_feedback_exit_pool_replacement" ||
replacement.RebuildStatus != "applied" ||
!containsString(replacement.ScoreReasons, "selected_unfenced_exit_pool_route") {
t.Fatalf("unexpected exit-pool replacement decision: %+v", replacement)
}
}
func TestGetNodeSyntheticMeshConfigReplacesFencedServiceChannelRouteAcrossEntryPool(t *testing.T) {
now := time.Date(2026, 5, 7, 15, 5, 0, 0, time.UTC)
store := &fakeRepository{
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-entry-a",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-a"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-a", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"entry_pool_id": "pool-edge"}
}`),
UpdatedAt: now,
},
{
ID: "route-entry-b",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-b"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 20,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-b", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"entry_pool_id": "pool-edge"}
}`),
UpdatedAt: now,
},
{
ID: "route-other-pool",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-c"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 900,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-c", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"],
"metadata": {"entry_pool_id": "pool-other"}
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "exit-1",
RouteID: "route-entry-a",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ConsecutiveFailures: 2,
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "exit-1",
RouteID: "route-entry-b",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 10,
Reasons: []string{"service_channel_recent_success"},
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "exit-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if cfg.RoutePathDecisions == nil || cfg.RoutePathDecisions.ReplacementDecisionCount != 1 {
t.Fatalf("expected one entry-pool replacement decision: %+v", cfg.RoutePathDecisions)
}
var replacement RoutePathDecision
for _, decision := range cfg.RoutePathDecisions.Decisions {
if decision.RouteID == "route-entry-a" {
replacement = decision
break
}
}
if replacement.ReplacementRouteID != "route-entry-b" ||
replacement.DecisionSource != "service_channel_feedback_entry_pool_replacement" ||
replacement.RebuildStatus != "applied" ||
!containsString(replacement.ScoreReasons, "selected_unfenced_entry_pool_route") {
t.Fatalf("unexpected entry-pool replacement decision: %+v", replacement)
}
if replacement.LocalRole != "exit" || replacement.PreviousHopID != "entry-b" {
t.Fatalf("entry-pool replacement should be visible from shared exit perspective: %+v", replacement)
}
}
func TestExpireFabricServiceChannelRouteFeedbackRemovesActiveFeedback(t *testing.T) {
now := time.Date(2026, 5, 7, 12, 0, 0, 0, time.UTC)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "authoritative",
MutationMode: "normal",
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ObservedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Minute),
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-2",
RouteID: "route-other",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ObservedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
result, err := service.ExpireFabricServiceChannelRouteFeedback(context.Background(), ExpireFabricServiceChannelRouteFeedbackInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-bad",
ServiceClass: FabricServiceClassVPNPackets,
Reason: "operator verified route is healthy",
})
if err != nil {
t.Fatalf("expire feedback: %v", err)
}
if result.ExpiredCount != 1 || !result.ExpiredAt.Equal(now) || !result.CooldownUntil.Equal(now.Add(fabricServiceChannelOperatorExpireCooldown)) {
t.Fatalf("unexpected expire result: %+v", result)
}
if len(store.auditEvents) == 0 || store.auditEvents[len(store.auditEvents)-1].EventType != "fabric.service_channel_route_feedback.expired" {
t.Fatalf("missing feedback expire audit event: %+v", store.auditEvents)
}
active, err := service.ListFabricServiceChannelRouteFeedback(context.Background(), "admin-1", ListFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
Now: now,
})
if err != nil {
t.Fatalf("list feedback: %v", err)
}
if len(active) != 1 || active[0].RouteID != "route-other" {
t.Fatalf("active feedback = %+v, want only route-other", active)
}
expired, err := service.ListFabricServiceChannelRouteFeedback(context.Background(), "admin-1", ListFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
RouteID: "route-bad",
IncludeExpired: true,
Now: now,
})
if err != nil {
t.Fatalf("list expired feedback: %v", err)
}
if len(expired) != 1 || !expired[0].ExpiresAt.Equal(now) {
t.Fatalf("expired feedback = %+v, want route-bad expired at now", expired)
}
}
func TestRecordFabricServiceChannelRouteRebuildFeedbackBreakdownInvestigationAudit(t *testing.T) {
now := time.Date(2026, 5, 9, 13, 30, 0, 0, time.UTC)
store := &fakeRepository{platformRole: PlatformRoleAdmin}
service := NewService(store)
service.now = func() time.Time { return now }
err := service.RecordFabricServiceChannelRouteRebuildInvestigation(context.Background(), RecordFabricServiceChannelRouteRebuildInvestigationInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
FeedbackSource: "fabric_service_channel_access_report",
FeedbackChannelID: "channel-1",
FeedbackViolationStatus: "fabric_route_send_failed_degraded_route_blocked",
DrilldownSource: "rebuild_health_feedback_breakdown",
Reason: "operator opened rebuild-health feedback breakdown ledger",
})
if err != nil {
t.Fatalf("record investigation: %v", err)
}
if len(store.auditEvents) != 1 {
t.Fatalf("audit events = %d, want 1", len(store.auditEvents))
}
event := store.auditEvents[0]
if event.EventType != "fabric.service_channel_rebuild_feedback_breakdown.investigation_opened" {
t.Fatalf("event type = %q", event.EventType)
}
if event.TargetType != "fabric_service_channel_rebuild_feedback_breakdown" || event.TargetID == nil || *event.TargetID != "channel-1" {
t.Fatalf("unexpected target: type=%q id=%v", event.TargetType, event.TargetID)
}
payload := jsonObject(event.Payload)
if jsonString(payload, "feedback_source") != "fabric_service_channel_access_report" ||
jsonString(payload, "feedback_channel_id") != "channel-1" ||
jsonString(payload, "feedback_violation_status") != "fabric_route_send_failed_degraded_route_blocked" ||
jsonString(payload, "drilldown_source") != "rebuild_health_feedback_breakdown" {
t.Fatalf("unexpected audit payload: %s", string(event.Payload))
}
if !event.CreatedAt.Equal(now) {
t.Fatalf("created_at = %s, want %s", event.CreatedAt, now)
}
}
func TestListAuditEventsFiltersFabricInvestigationBreadcrumbs(t *testing.T) {
clusterID := "cluster-1"
otherClusterID := "cluster-other"
now := time.Date(2026, 5, 9, 14, 20, 0, 0, time.UTC)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
fabricRebuildAttempts: []FabricServiceChannelRouteRebuildAttempt{
{
ID: "attempt-1",
ClusterID: clusterID,
ReporterNodeID: "entry-1",
RouteID: "route-1",
ServiceClass: FabricServiceClassVPNPackets,
RebuildStatus: "applied",
Outcome: "replacement_selected",
FeedbackSource: "fabric_service_channel_access_report",
FeedbackChannelID: "channel-1",
FeedbackViolationStatus: "fabric_route_send_failed_degraded_route_blocked",
FeedbackObservedAt: &now,
UpdatedAt: now,
CreatedAt: now,
Payload: json.RawMessage(`{}`),
},
},
auditEvents: []ClusterAuditEvent{
{
ID: "audit-1",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_feedback_breakdown.investigation_opened",
TargetType: "fabric_service_channel_rebuild_feedback_breakdown",
TargetID: stringPtr("channel-1"),
Payload: json.RawMessage(`{"feedback_source":"fabric_service_channel_access_report","feedback_channel_id":"channel-1","feedback_violation_status":"fabric_route_send_failed_degraded_route_blocked"}`),
CreatedAt: now.Add(-5 * time.Minute),
},
{
ID: "audit-2",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_incident.investigation_opened",
TargetType: "fabric_service_channel_route_rebuild_incident",
TargetID: stringPtr("route-1"),
Payload: json.RawMessage(`{}`),
CreatedAt: now.Add(-2 * time.Hour),
},
{
ID: "audit-3",
ClusterID: &clusterID,
EventType: "fabric.service_channel_route_feedback.expired",
TargetType: "fabric_service_channel_route_feedback",
TargetID: stringPtr("route-2"),
Payload: json.RawMessage(`{}`),
CreatedAt: now,
},
{
ID: "audit-4",
ClusterID: &otherClusterID,
EventType: "fabric.service_channel_rebuild_feedback_breakdown.investigation_opened",
TargetType: "fabric_service_channel_rebuild_feedback_breakdown",
TargetID: stringPtr("channel-other"),
Payload: json.RawMessage(`{}`),
CreatedAt: now,
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
events, err := service.ListAuditEvents(context.Background(), "admin-1", ListAuditEventsInput{
ClusterID: clusterID,
EventTypes: []string{
"fabric.service_channel_rebuild_feedback_breakdown.investigation_opened",
"fabric.service_channel_rebuild_incident.investigation_opened",
},
Correlation: "fabric_diagnostics",
Limit: 10,
})
if err != nil {
t.Fatalf("list audit events: %v", err)
}
if len(events) != 2 || events[0].ID != "audit-1" || events[1].ID != "audit-2" {
t.Fatalf("events = %+v, want only fabric investigation breadcrumbs", events)
}
if events[0].CorrelationHints == nil || events[0].CorrelationHints.CurrentDiagnosticStatus != "breakdown_active" ||
events[0].CorrelationHints.FeedbackBreakdown == nil || events[0].CorrelationHints.FeedbackBreakdown.FeedbackChannelID != "channel-1" {
t.Fatalf("feedback breadcrumb correlation hints = %+v", events[0].CorrelationHints)
}
breadcrumbs, err := service.ListFabricServiceChannelRebuildInvestigationBreadcrumbs(context.Background(), "admin-1", ListFabricServiceChannelRebuildInvestigationBreadcrumbsInput{
ClusterID: clusterID,
Limit: 10,
CurrentWindowSeconds: int64((30 * time.Minute).Seconds()),
HistoryWindowSeconds: int64((24 * time.Hour).Seconds()),
})
if err != nil {
t.Fatalf("list breadcrumbs: %v", err)
}
if len(breadcrumbs.Events) != 2 || breadcrumbs.Summary.TotalCount != 2 || breadcrumbs.Summary.CorrelatedCount != 2 {
t.Fatalf("breadcrumbs = %+v", breadcrumbs)
}
if breadcrumbs.Summary.CountsByCurrentDiagnosticStatus["breakdown_active"] != 1 ||
breadcrumbs.Summary.CountsByCurrentDiagnosticStatus["incident_visible"] != 1 {
t.Fatalf("breadcrumb summary statuses = %+v", breadcrumbs.Summary.CountsByCurrentDiagnosticStatus)
}
if breadcrumbs.CurrentCount != 1 || breadcrumbs.StaleCount != 1 || breadcrumbs.ExpiredCount != 0 ||
breadcrumbs.Summary.CountsByBreadcrumbStatus["current"] != 1 ||
breadcrumbs.Summary.CountsByBreadcrumbStatus["stale"] != 1 {
t.Fatalf("breadcrumb freshness = %+v summary=%+v", breadcrumbs, breadcrumbs.Summary.CountsByBreadcrumbStatus)
}
}
func TestRebuildHealthSilenceIsGenerationScoped(t *testing.T) {
now := time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
fabricRebuildAttempts: []FabricServiceChannelRouteRebuildAttempt{
{
ID: "attempt-old",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
ServiceClass: FabricServiceClassVPNPackets,
RouteID: "route-1",
RebuildStatus: "applied",
Generation: "gen-old",
UpdatedAt: now.Add(-5 * time.Minute),
CreatedAt: now.Add(-5 * time.Minute),
Payload: json.RawMessage(`{}`),
},
{
ID: "attempt-new",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
ServiceClass: FabricServiceClassVPNPackets,
RouteID: "route-1",
RebuildStatus: "applied",
Generation: "gen-new",
UpdatedAt: now.Add(-4 * time.Minute),
CreatedAt: now.Add(-4 * time.Minute),
Payload: json.RawMessage(`{}`),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
_, err := service.SilenceFabricServiceChannelRouteRebuildAlert(context.Background(), SilenceFabricServiceChannelRouteRebuildAlertInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-1",
GuardStatus: "missing_node_transition",
Generation: "gen-old",
Reason: "known old test route",
TTL: time.Hour,
Now: now,
})
if err != nil {
t.Fatalf("silence rebuild alert: %v", err)
}
summary, err := service.GetFabricServiceChannelRouteRebuildHealthSummary(context.Background(), "admin-1", GetFabricServiceChannelRouteRebuildHealthSummaryInput{
ClusterID: "cluster-1",
Limit: 20,
})
if err != nil {
t.Fatalf("get rebuild health: %v", err)
}
if summary.BadCount != 2 || summary.SilencedCount != 1 || summary.ActiveBadCount != 1 {
t.Fatalf("summary counts = %+v, want bad=2 silenced=1 active_bad=1", summary)
}
if len(summary.MostRecentBadAttempts) != 1 || summary.MostRecentBadAttempts[0].Generation != "gen-new" {
t.Fatalf("active bad attempts = %+v, want only fresh generation", summary.MostRecentBadAttempts)
}
if summary.ResurfacedCount != 1 || len(summary.ResurfacedAttempts) != 1 || summary.ResurfacedAttempts[0].AlertResurfacedPreviousGeneration != "gen-old" {
t.Fatalf("resurfaced attempts = %+v / count %d, want gen-new resurfaced from gen-old", summary.ResurfacedAttempts, summary.ResurfacedCount)
}
readiness, err := service.GetFabricServiceChannelReadiness(context.Background(), "admin-1", GetFabricServiceChannelReadinessInput{
ClusterID: "cluster-1",
Limit: 20,
})
if err != nil {
t.Fatalf("get readiness: %v", err)
}
if readiness.Status != "blocked" || readiness.Reason != "resurfaced_rebuild_alert" || readiness.ResurfacedCount != 1 {
t.Fatalf("readiness = %+v, want blocked by resurfaced alert", readiness)
}
}
func TestOperatorExpiredFabricServiceChannelFeedbackAllowsRetryAndSuppressesImmediateChurn(t *testing.T) {
now := time.Date(2026, 5, 7, 12, 30, 0, 0, time.UTC)
cooldownUntil := now.Add(fabricServiceChannelOperatorExpireCooldown)
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
authorityState: ClusterAuthorityState{
ClusterID: "cluster-1",
AuthorityState: "authoritative",
MutationMode: "normal",
},
testingFlags: EffectiveNodeTestingFlags{
Enabled: true,
SyntheticLinksEnabled: true,
},
routeIntents: []MeshRouteIntent{
{
ID: "route-retry",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"synthetic_enabled": true,
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-retry",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ObservedAt: now.Add(-time.Minute),
ExpiresAt: now,
RetryCooldownUntil: &cooldownUntil,
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
cfg, err := service.GetNodeSyntheticMeshConfig(context.Background(), GetNodeSyntheticMeshConfigInput{
ClusterID: "cluster-1",
NodeID: "entry-1",
})
if err != nil {
t.Fatalf("synthetic config: %v", err)
}
if !containsRouteID(cfg.Routes, "route-retry") {
t.Fatalf("route-retry should be retried during operator cooldown: %+v", cfg.Routes)
}
if cfg.RoutePathDecisions == nil || len(cfg.RoutePathDecisions.Decisions) != 1 ||
!containsString(cfg.RoutePathDecisions.Decisions[0].ScoreReasons, "service_channel_route_retry_after_operator_expire") {
t.Fatalf("missing manual retry decision reason: %+v", cfg.RoutePathDecisions)
}
_, err = store.RecordFabricServiceChannelRouteFeedback(context.Background(), RecordFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-retry",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ObservedAt: now.Add(30 * time.Second),
ExpiresAt: now.Add(3 * time.Minute),
Payload: json.RawMessage(`{"last_error":"retry failed"}`),
})
if err != nil {
t.Fatalf("record feedback: %v", err)
}
active, err := service.ListFabricServiceChannelRouteFeedback(context.Background(), "admin-1", ListFabricServiceChannelRouteFeedbackInput{
ClusterID: "cluster-1",
RouteID: "route-retry",
Now: now.Add(30 * time.Second),
})
if err != nil {
t.Fatalf("list feedback: %v", err)
}
if len(active) != 1 || active[0].FeedbackStatus != "operator_retry_cooldown" || active[0].ScoreAdjustment != 0 {
t.Fatalf("feedback not suppressed during cooldown: %+v", active)
}
}
func TestIssueFabricServiceChannelLeaseDampensRecoveredRouteDuringRetryCooldown(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
cooldownUntil := now.Add(2 * time.Minute)
store := &fakeRepository{
routeIntents: []MeshRouteIntent{
{
ID: "route-recovered",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
{
ID: "route-steady",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 80,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
},
},
fabricRouteFeedback: []FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-recovered",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_route_rebuild_recommended"},
ObservedAt: now.Add(-time.Minute),
ExpiresAt: now,
RetryCooldownUntil: &cooldownUntil,
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-recovered",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_quality_latency_le_10ms", "service_channel_rolling_quality_window"},
LastSendDurationMs: 1,
Payload: json.RawMessage(`{"quality_window_sample_count":32,"quality_window_success_count":32,"quality_window_failure_count":0,"quality_window_drop_count":0,"quality_window_avg_latency_ms":1}`),
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
lease, err := service.IssueFabricServiceChannelLease(context.Background(), IssueFabricServiceChannelLeaseInput{
ClusterID: "cluster-1",
OrganizationID: "org-home",
UserID: "user-m",
ResourceID: "vpn-home",
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: []string{"entry-1"},
ExitNodeIDs: []string{"exit-1"},
PreferredEntryNodeID: "entry-1",
PreferredExitNodeID: "exit-1",
})
if err != nil {
t.Fatalf("issue lease: %v", err)
}
if lease.PrimaryRoute.RouteID != "route-steady" {
t.Fatalf("primary route = %q, want steady route while recovered route is dampened", lease.PrimaryRoute.RouteID)
}
var recovered FabricServiceChannelRoute
for _, route := range append([]FabricServiceChannelRoute{lease.PrimaryRoute}, lease.AlternateRoutes...) {
if route.RouteID == "route-recovered" {
recovered = route
break
}
}
if recovered.RouteID == "" || recovered.Status != "authorized" {
t.Fatalf("recovered route should be authorized alternate during hysteresis: primary=%+v alternates=%+v", lease.PrimaryRoute, lease.AlternateRoutes)
}
if recovered.RecoveryState != "recovered" || recovered.RecoveryPenalty != fabricServiceChannelRecoveryHysteresisPenalty {
t.Fatalf("recovered telemetry state=%q penalty=%d, want recovered penalty %d", recovered.RecoveryState, recovered.RecoveryPenalty, fabricServiceChannelRecoveryHysteresisPenalty)
}
if !containsString(recovered.ScoreReasons, "service_channel_recovery_hysteresis") ||
!containsString(recovered.ScoreReasons, "service_channel_rolling_quality_window") ||
!containsString(recovered.ScoreReasons, "manual_feedback_expired_retry_cooldown") {
t.Fatalf("recovered route score reasons = %+v, want hysteresis + rolling feedback reasons", recovered.ScoreReasons)
}
if recovered.PathScore >= lease.PrimaryRoute.PathScore {
t.Fatalf("recovered score = %d primary score = %d, want recovered route dampened below steady primary", recovered.PathScore, lease.PrimaryRoute.PathScore)
}
}
func TestServiceChannelRouteFeedbackReportExposesRecoveryState(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
cooldownUntil := now.Add(2 * time.Minute)
report := serviceChannelRouteFeedbackReport([]FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-recovered",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_rolling_quality_window"},
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
RetryCooldownUntil: &cooldownUntil,
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-promoted",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_rolling_quality_window"},
Payload: json.RawMessage(`{"quality_window_sample_count":96,"quality_window_success_count":96,"quality_window_failure_count":0,"quality_window_slow_count":0,"quality_window_drop_count":0}`),
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
RetryCooldownUntil: &cooldownUntil,
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-demoted",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "degraded",
ScoreAdjustment: -30,
Reasons: []string{"service_channel_recent_route_failure", "service_channel_rolling_quality_window"},
Payload: json.RawMessage(`{"quality_window_sample_count":96,"quality_window_success_count":95,"quality_window_failure_count":1,"quality_window_slow_count":0,"quality_window_drop_count":0}`),
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
RetryCooldownUntil: &cooldownUntil,
},
}, now)
if report.RecoveredRouteCount != 1 || report.RecoveryHysteresisCount != 1 || report.RecoveryPromotedCount != 1 || report.RecoveryDemotedCount != 1 {
t.Fatalf("recovery counters = recovered:%d hysteresis:%d promoted:%d demoted:%d, want 1/1/1/1", report.RecoveredRouteCount, report.RecoveryHysteresisCount, report.RecoveryPromotedCount, report.RecoveryDemotedCount)
}
if len(report.Observations) != 3 {
t.Fatalf("observations = %d, want 3", len(report.Observations))
}
observation := report.Observations[0]
if observation.RecoveryState != "recovered" || !observation.RecoveryHysteresisActive || observation.RecoveryHysteresisPenalty != fabricServiceChannelRecoveryHysteresisPenalty {
t.Fatalf("observation recovery telemetry = state:%q active:%t penalty:%d", observation.RecoveryState, observation.RecoveryHysteresisActive, observation.RecoveryHysteresisPenalty)
}
promoted := report.Observations[1]
if promoted.RecoveryState != "healthy" || !promoted.RecoveryPromoted || promoted.RecoveryHysteresisActive {
t.Fatalf("promoted recovery telemetry = state:%q promoted:%t hysteresis:%t", promoted.RecoveryState, promoted.RecoveryPromoted, promoted.RecoveryHysteresisActive)
}
demoted := report.Observations[2]
if demoted.RecoveryState != "degraded" || !demoted.RecoveryDemoted || demoted.RecoveryReason != "service_channel_recovery_demoted_failure" {
t.Fatalf("demoted recovery telemetry = state:%q demoted:%t reason:%q", demoted.RecoveryState, demoted.RecoveryDemoted, demoted.RecoveryReason)
}
}
func TestFabricServiceChannelRecoveryPromotionRemovesHysteresisPenalty(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
route, ok := fabricServiceChannelRouteFromIntent(MeshRouteIntent{
ID: "route-promoted",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
}, FabricServiceClassVPNPackets, []string{"entry-1"}, []string{"exit-1"}, []string{"vpn_packet"}, "generation-1", now, now.Add(time.Minute), map[string]fabricServiceChannelRouteFeedback{
"route-promoted": {
RouteID: "route-promoted",
ManualRetry: true,
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_rolling_quality_window", "manual_feedback_expired_retry_cooldown"},
QualityWindowSampleCount: 96,
QualityWindowSuccessCount: 96,
},
}, defaultFabricServiceChannelRecoveryPolicy())
if !ok {
t.Fatal("route was not built")
}
if route.RecoveryState != "healthy" || !route.RecoveryPromoted || route.RecoveryPenalty != 0 {
t.Fatalf("promoted route recovery = state:%q promoted:%t penalty:%d", route.RecoveryState, route.RecoveryPromoted, route.RecoveryPenalty)
}
if containsString(route.ScoreReasons, "service_channel_recovery_hysteresis") || !containsString(route.ScoreReasons, "service_channel_recovery_promoted") {
t.Fatalf("promoted route reasons = %+v", route.ScoreReasons)
}
}
func TestFabricServiceChannelRecoveryDemotionMarksRouteReason(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
route, ok := fabricServiceChannelRouteFromIntent(MeshRouteIntent{
ID: "route-demoted",
ClusterID: "cluster-1",
SourceSelector: json.RawMessage(`{"node_id":"entry-1"}`),
DestinationSelector: json.RawMessage(`{"node_id":"exit-1"}`),
ServiceClass: FabricServiceClassVPNPackets,
Priority: 100,
Status: "active",
Policy: json.RawMessage(`{
"hops": ["entry-1", "exit-1"],
"allowed_channels": ["vpn_packet", "fabric_control"]
}`),
UpdatedAt: now,
}, FabricServiceClassVPNPackets, []string{"entry-1"}, []string{"exit-1"}, []string{"vpn_packet"}, "generation-1", now, now.Add(time.Minute), map[string]fabricServiceChannelRouteFeedback{
"route-demoted": {
RouteID: "route-demoted",
ManualRetry: true,
ScoreAdjustment: -30,
Reasons: []string{"service_channel_recent_route_failure", "service_channel_rolling_quality_window", "manual_feedback_expired_retry_cooldown"},
QualityWindowSampleCount: 96,
QualityWindowSuccessCount: 95,
QualityWindowFailureCount: 1,
},
}, defaultFabricServiceChannelRecoveryPolicy())
if !ok {
t.Fatal("route was not built")
}
if !route.RecoveryDemoted || route.RecoveryReason != "service_channel_recovery_demoted_failure" || route.RecoveryPromoted {
t.Fatalf("demoted route recovery = demoted:%t reason:%q promoted:%t", route.RecoveryDemoted, route.RecoveryReason, route.RecoveryPromoted)
}
if !containsString(route.ScoreReasons, "service_channel_recovery_demoted") || !containsString(route.ScoreReasons, "service_channel_recovery_demoted_failure") {
t.Fatalf("demoted route reasons = %+v", route.ScoreReasons)
}
}
func TestFabricServiceChannelRecoveryPolicyControlsPromotionAndPenalty(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
policy := defaultFabricServiceChannelRecoveryPolicy()
policy.HysteresisPenalty = 40
policy.PromotionMinSamples = 4
cooldownUntil := now.Add(2 * time.Minute)
report := serviceChannelRouteFeedbackReportWithPolicy([]FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-promoted",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_rolling_quality_window"},
Payload: json.RawMessage(`{"quality_window_sample_count":4,"quality_window_success_count":4,"quality_window_failure_count":0,"quality_window_slow_count":0,"quality_window_drop_count":0}`),
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
RetryCooldownUntil: &cooldownUntil,
},
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-recovered",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 90,
Reasons: []string{"service_channel_recent_success", "service_channel_rolling_quality_window"},
ObservedAt: now,
ExpiresAt: now.Add(2 * time.Minute),
RetryCooldownUntil: &cooldownUntil,
},
}, now, policy)
if report.RecoveryPromotedCount != 1 || report.RecoveryHysteresisCount != 1 {
t.Fatalf("policy counters promoted/hysteresis = %d/%d, want 1/1", report.RecoveryPromotedCount, report.RecoveryHysteresisCount)
}
if report.Observations[1].RecoveryHysteresisPenalty != 40 {
t.Fatalf("hysteresis penalty = %d, want policy penalty 40", report.Observations[1].RecoveryHysteresisPenalty)
}
if report.RecoveryPolicy == nil || report.RecoveryPolicy.HysteresisPenalty != 40 || report.RecoveryPolicy.PromotionMinSamples != 4 {
t.Fatalf("report recovery policy provenance = %+v", report.RecoveryPolicy)
}
}
func TestFabricServiceChannelFeedbackStalePolicyIsConservative(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
policy := defaultFabricServiceChannelRecoveryPolicy()
policy.HysteresisPenalty = 44
policy = normalizeFabricServiceChannelRecoveryPolicy(policy, defaultFabricServiceChannelRecoveryPolicy())
routeProvenance := map[string]fabricServiceChannelRouteProvenance{
"route-1": {RouteID: "route-1", RouteGeneration: "policy-v2", PolicyVersion: "policy-v2", RouteVersion: "route-v2"},
}
report := serviceChannelRouteFeedbackReportWithPolicyAndProvenance([]FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-1",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "fenced",
ScoreAdjustment: -1030,
Reasons: []string{"service_channel_recent_route_failure", "service_channel_route_rebuild_recommended"},
Payload: json.RawMessage(`{"recovery_policy_fingerprint":"old-policy","route_policy_version":"policy-v1","quality_window_failure_count":3}`),
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
ConsecutiveFailures: 3,
LastSendDurationMs: 900,
},
}, now, policy, routeProvenance)
if report == nil || report.StalePolicyCount != 1 || report.StaleGenerationCount != 1 {
t.Fatalf("stale counters = %+v, want policy/generation stale", report)
}
if report.Observations[0].EffectiveScoreAdjustment != -10 || !report.Observations[0].StalePolicy || !report.Observations[0].StaleGeneration {
t.Fatalf("stale observation = %+v", report.Observations[0])
}
feedback := fabricServiceChannelRouteFeedbackFromObservationsWithProvenance(report.Observations, now, policy, routeProvenance)
item := feedback["route-1"]
if item.Fenced || item.RouteRebuildRecommended || item.ScoreAdjustment != -10 {
t.Fatalf("stale feedback should not fence/rebuild current route: %+v", item)
}
}
func TestFabricServiceChannelFeedbackMissingProvenanceIsVisibleButcompatible(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
policy := defaultFabricServiceChannelRecoveryPolicy()
routeProvenance := map[string]fabricServiceChannelRouteProvenance{
"route-1": {RouteID: "route-1", RouteGeneration: "policy-v2", PolicyVersion: "policy-v2", RouteVersion: "route-v2"},
}
report := serviceChannelRouteFeedbackReportWithPolicyAndProvenance([]FabricServiceChannelRouteFeedbackObservation{
{
ClusterID: "cluster-1",
ReporterNodeID: "entry-1",
RouteID: "route-1",
ServiceClass: FabricServiceClassVPNPackets,
FeedbackStatus: "healthy",
ScoreAdjustment: 42,
Reasons: []string{"service_channel_recent_success"},
Payload: json.RawMessage(`{"quality_window_success_count":8}`),
ObservedAt: now,
ExpiresAt: now.Add(time.Minute),
},
}, now, policy, routeProvenance)
if report == nil || report.MissingProvenanceCount != 1 || report.StalePolicyCount != 0 || report.StaleGenerationCount != 0 {
t.Fatalf("missing provenance counters = %+v", report)
}
feedback := fabricServiceChannelRouteFeedbackFromObservationsWithProvenance(report.Observations, now, policy, routeProvenance)
if feedback["route-1"].ScoreAdjustment != 42 || !feedback["route-1"].ProvenanceMissing {
t.Fatalf("missing provenance should stay compatible for old agents: %+v", feedback["route-1"])
}
}
func TestUpdateFabricServiceChannelRecoveryPolicyPersistsClusterMetadata(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{"existing":true}`),
},
}
service := NewService(store)
enabled := true
policy, err := service.UpdateFabricServiceChannelRecoveryPolicy(context.Background(), UpdateFabricServiceChannelRecoveryPolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
HysteresisPenalty: 42,
PromotionMinSamples: 7,
DemotionFailureThreshold: 3,
DemotionDropThreshold: 2,
DemotionSlowThreshold: 5,
DemotionRebuildEnabled: &enabled,
DemotionFencedEnabled: &enabled,
})
if err != nil {
t.Fatalf("update recovery policy: %v", err)
}
if policy.HysteresisPenalty != 42 || policy.PromotionMinSamples != 7 || policy.DemotionFailureThreshold != 3 {
t.Fatalf("policy = %+v, want configured values", policy)
}
var metadata map[string]any
if err := json.Unmarshal(store.cluster.Metadata, &metadata); err != nil {
t.Fatalf("metadata json: %v", err)
}
if metadata["existing"] != true || metadata["fabric_service_channel_recovery_policy"] == nil {
t.Fatalf("metadata = %+v, want existing value plus policy", metadata)
}
}
func TestUpdateFabricServiceChannelAdaptivePolicyPersistsClusterMetadata(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{"existing":true}`),
},
}
service := NewService(store)
policy, err := service.UpdateFabricServiceChannelAdaptivePolicy(context.Background(), UpdateFabricServiceChannelAdaptivePolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
MaxParallelWindow: 6,
BulkPressureChannelThreshold: 8,
QueuePressureHighWatermark: 9,
QueuePressureMaxInFlight: 10,
ClassWindows: map[string]int{
"control": 6,
"interactive": 6,
"reliable": 4,
"bulk": 2,
"droppable": 1,
},
})
if err != nil {
t.Fatalf("update adaptive policy: %v", err)
}
if policy.MaxParallelWindow != 6 || policy.ClassWindows["bulk"] != 2 || policy.QueuePressureHighWatermark != 9 {
t.Fatalf("policy = %+v, want configured values", policy)
}
if policy.Fingerprint == "" || policy.Source != "cluster_metadata" {
t.Fatalf("policy provenance = %+v", policy)
}
var metadata map[string]any
if err := json.Unmarshal(store.cluster.Metadata, &metadata); err != nil {
t.Fatalf("metadata json: %v", err)
}
if metadata["existing"] != true || metadata["fabric_service_channel_adaptive_policy"] == nil {
t.Fatalf("metadata = %+v, want existing value plus adaptive policy", metadata)
}
}
func TestUpdateFabricServiceChannelPoolPolicyPersistsClusterMetadata(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{"existing":true}`),
},
}
service := NewService(store)
enabled := true
sticky := false
policy, err := service.UpdateFabricServiceChannelPoolPolicy(context.Background(), UpdateFabricServiceChannelPoolPolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
EntryPoolNodeIDs: []string{"entry-a", "entry-b"},
ExitPoolNodeIDs: []string{"exit-b"},
PreferredEntryNodeID: "entry-b",
PreferredExitNodeID: "exit-b",
SelectionStrategy: "preferred_first",
RouteRebuild: "automatic",
EntryFailover: "automatic",
ExitFailover: "manual",
CompatFallbackAllowed: &enabled,
StickySession: &sticky,
})
if err != nil {
t.Fatalf("update pool policy: %v", err)
}
if policy.PreferredEntryNodeID != "entry-b" || policy.PreferredExitNodeID != "exit-b" || policy.ExitFailover != "manual" || policy.StickySession {
t.Fatalf("policy = %+v, want configured values", policy)
}
if policy.Fingerprint == "" || policy.Source != "cluster_metadata" {
t.Fatalf("policy provenance = %+v", policy)
}
var metadata map[string]any
if err := json.Unmarshal(store.cluster.Metadata, &metadata); err != nil {
t.Fatalf("metadata json: %v", err)
}
if metadata["existing"] != true || metadata["fabric_service_channel_pool_policy"] == nil {
t.Fatalf("metadata = %+v, want existing value plus pool policy", metadata)
}
}
func TestUpdateFabricServiceChannelBreadcrumbWindowPolicyPersistsClusterMetadata(t *testing.T) {
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{"existing":true}`),
},
}
service := NewService(store)
policy, err := service.UpdateFabricServiceChannelBreadcrumbWindowPolicy(context.Background(), UpdateFabricServiceChannelBreadcrumbWindowPolicyInput{
ActorUserID: "admin-1",
ClusterID: "cluster-1",
CurrentWindowSeconds: 600,
HistoryWindowSeconds: 7200,
})
if err != nil {
t.Fatalf("update breadcrumb window policy: %v", err)
}
if policy.CurrentWindowSeconds != 600 || policy.HistoryWindowSeconds != 7200 {
t.Fatalf("policy = %+v, want configured windows", policy)
}
if policy.Fingerprint == "" || policy.Source != "cluster_metadata" {
t.Fatalf("policy provenance = %+v", policy)
}
var metadata map[string]any
if err := json.Unmarshal(store.cluster.Metadata, &metadata); err != nil {
t.Fatalf("metadata json: %v", err)
}
if metadata["existing"] != true || metadata["fabric_service_channel_breadcrumb_window_policy"] == nil {
t.Fatalf("metadata = %+v, want existing value plus breadcrumb window policy", metadata)
}
}
func TestListFabricBreadcrumbsUsesClusterDefaultWindowPolicy(t *testing.T) {
clusterID := "cluster-1"
now := time.Date(2026, 5, 9, 14, 20, 0, 0, time.UTC)
policy := defaultFabricServiceChannelBreadcrumbWindowPolicy()
policy.Source = "cluster_metadata"
policy.CurrentWindowSeconds = 600
policy.HistoryWindowSeconds = 1800
policy = normalizeFabricServiceChannelBreadcrumbWindowPolicy(policy, defaultFabricServiceChannelBreadcrumbWindowPolicy())
metadata, err := upsertFabricServiceChannelBreadcrumbWindowPolicyMetadata(json.RawMessage(`{}`), policy)
if err != nil {
t.Fatalf("policy metadata: %v", err)
}
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: clusterID,
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: metadata,
},
auditEvents: []ClusterAuditEvent{
{
ID: "audit-current",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_incident.investigation_opened",
TargetType: "fabric_service_channel_route_rebuild_incident",
Payload: json.RawMessage(`{}`),
CreatedAt: now.Add(-5 * time.Minute),
},
{
ID: "audit-stale",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_incident.investigation_opened",
TargetType: "fabric_service_channel_route_rebuild_incident",
Payload: json.RawMessage(`{}`),
CreatedAt: now.Add(-20 * time.Minute),
},
{
ID: "audit-expired",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_incident.investigation_opened",
TargetType: "fabric_service_channel_route_rebuild_incident",
Payload: json.RawMessage(`{}`),
CreatedAt: now.Add(-40 * time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
breadcrumbs, err := service.ListFabricServiceChannelRebuildInvestigationBreadcrumbs(context.Background(), "admin-1", ListFabricServiceChannelRebuildInvestigationBreadcrumbsInput{
ClusterID: clusterID,
Limit: 10,
})
if err != nil {
t.Fatalf("list breadcrumbs: %v", err)
}
if breadcrumbs.CurrentWindowSeconds != 600 || breadcrumbs.HistoryWindowSeconds != 1800 {
t.Fatalf("breadcrumb windows = %d/%d, want cluster policy", breadcrumbs.CurrentWindowSeconds, breadcrumbs.HistoryWindowSeconds)
}
if breadcrumbs.CurrentCount != 1 || breadcrumbs.StaleCount != 1 || breadcrumbs.ExpiredCount != 1 {
t.Fatalf("breadcrumb freshness counts = current %d stale %d expired %d", breadcrumbs.CurrentCount, breadcrumbs.StaleCount, breadcrumbs.ExpiredCount)
}
}
func TestListFabricBreadcrumbsKeepsQueryWindowOverrides(t *testing.T) {
clusterID := "cluster-1"
now := time.Date(2026, 5, 9, 14, 20, 0, 0, time.UTC)
policy := defaultFabricServiceChannelBreadcrumbWindowPolicy()
policy.Source = "cluster_metadata"
policy.CurrentWindowSeconds = 3600
policy.HistoryWindowSeconds = 7200
policy = normalizeFabricServiceChannelBreadcrumbWindowPolicy(policy, defaultFabricServiceChannelBreadcrumbWindowPolicy())
metadata, err := upsertFabricServiceChannelBreadcrumbWindowPolicyMetadata(json.RawMessage(`{}`), policy)
if err != nil {
t.Fatalf("policy metadata: %v", err)
}
store := &fakeRepository{
platformRole: PlatformRoleAdmin,
cluster: Cluster{
ID: clusterID,
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: metadata,
},
auditEvents: []ClusterAuditEvent{
{
ID: "audit-stale-by-override",
ClusterID: &clusterID,
EventType: "fabric.service_channel_rebuild_incident.investigation_opened",
TargetType: "fabric_service_channel_route_rebuild_incident",
Payload: json.RawMessage(`{}`),
CreatedAt: now.Add(-20 * time.Minute),
},
},
}
service := NewService(store)
service.now = func() time.Time { return now }
breadcrumbs, err := service.ListFabricServiceChannelRebuildInvestigationBreadcrumbs(context.Background(), "admin-1", ListFabricServiceChannelRebuildInvestigationBreadcrumbsInput{
ClusterID: clusterID,
Limit: 10,
CurrentWindowSeconds: 600,
HistoryWindowSeconds: 1800,
})
if err != nil {
t.Fatalf("list breadcrumbs: %v", err)
}
if breadcrumbs.CurrentWindowSeconds != 600 || breadcrumbs.HistoryWindowSeconds != 1800 {
t.Fatalf("breadcrumb windows = %d/%d, want query override", breadcrumbs.CurrentWindowSeconds, breadcrumbs.HistoryWindowSeconds)
}
if breadcrumbs.CurrentCount != 0 || breadcrumbs.StaleCount != 1 || breadcrumbs.ExpiredCount != 0 {
t.Fatalf("breadcrumb override freshness counts = current %d stale %d expired %d", breadcrumbs.CurrentCount, breadcrumbs.StaleCount, breadcrumbs.ExpiredCount)
}
}
func TestRoutePathDecisionReportCountsRecoveryHysteresis(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
policy := defaultFabricServiceChannelRecoveryPolicy()
policy.Source = "cluster_metadata"
policy.HysteresisPenalty = 33
report := routePathDecisionReportWithRecoveryPolicy("generation-1", []RoutePathDecision{
{
DecisionID: "decision-1",
RouteID: "route-recovered",
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
OriginalHops: []string{"entry-1", "exit-1"},
EffectiveHops: []string{"entry-1", "exit-1"},
LocalRole: "entry",
DecisionSource: "service_channel_feedback_replacement",
Generation: "generation-1",
ScoreReasons: []string{"service_channel_recovery_hysteresis"},
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: now.Add(time.Minute),
},
{
DecisionID: "decision-2",
RouteID: "route-promoted",
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
OriginalHops: []string{"entry-1", "exit-1"},
EffectiveHops: []string{"entry-1", "exit-1"},
LocalRole: "entry",
DecisionSource: "service_channel_feedback_replacement",
Generation: "generation-1",
ScoreReasons: []string{"service_channel_recovery_promoted"},
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: now.Add(time.Minute),
},
{
DecisionID: "decision-3",
RouteID: "route-demoted",
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
OriginalHops: []string{"entry-1", "exit-1"},
EffectiveHops: []string{"entry-1", "exit-1"},
LocalRole: "entry",
DecisionSource: "service_channel_feedback_replacement",
Generation: "generation-1",
ScoreReasons: []string{"service_channel_recovery_demoted", "service_channel_recovery_demoted_failure"},
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: now.Add(time.Minute),
},
}, policy)
if report == nil || report.RecoveryHysteresisCount != 1 || report.RecoveryPromotedCount != 1 || report.RecoveryDemotedCount != 1 {
t.Fatalf("recovery counts = %+v, want hysteresis/promoted/demoted 1/1/1", report)
}
if report.RecoveryPolicy == nil || report.RecoveryPolicy.Source != "cluster_metadata" || report.RecoveryPolicy.HysteresisPenalty != 33 {
t.Fatalf("route path decision recovery policy provenance = %+v", report.RecoveryPolicy)
}
}
func containsRouteID(routes []SyntheticMeshRouteConfig, routeID string) bool {
for _, route := range routes {
if route.RouteID == routeID {
return true
}
}
return false
}
func ptrTime(value time.Time) *time.Time {
return &value
}
func strPtr(value string) *string {
return &value
}
type fakeRepository struct {
platformRole string
lastTokenHash string
lastLookupTokenHash string
validJoinToken NodeJoinToken
validTokenErr error
createJoinRequestID string
joinJoinRequest NodeJoinRequest
clusterAuthority ClusterAuthorityKey
lastTokenAuthority json.RawMessage
lastApprovalAuthority json.RawMessage
authorityState ClusterAuthorityState
vpnConnection VPNConnection
lastVPNConnectionInput CreateVPNConnectionInput
lastAllowedNodesInput SetVPNConnectionAllowedNodesInput
lastAttachInput AttachExistingNodeInput
lastNodeGroupInput CreateNodeGroupInput
lastAssignGroupInput AssignNodeGroupInput
lastEntryPointInput CreateFabricEntryPointInput
lastEgressPoolInput CreateFabricEgressPoolInput
acquireVPNLeaseErr error
ownerEligibility VPNLeaseOwnerEligibility
ownerEligibilityErr error
renewVPNLeaseErr error
expiredVPNLeases []VPNConnectionLease
nodeVPNAssignments []NodeVPNAssignment
vpnClientProfile VPNClientProfile
testingFlags EffectiveNodeTestingFlags
routeIntents []MeshRouteIntent
createdRouteIntents []CreateRouteIntentInput
clusterNodes []ClusterNode
nodeRoles map[string][]NodeRoleAssignment
releaseVersions []ReleaseVersion
updateServiceCandidates []NodeUpdateServiceCandidate
nodeUpdatePolicies map[string]NodeUpdatePolicy
updateStatuses []NodeUpdateStatus
meshLinks []MeshLinkObservation
fabricRouteFeedback []FabricServiceChannelRouteFeedbackObservation
fabricLeases map[string]FabricServiceChannelLeaseRecord
fabricRebuildAttempts []FabricServiceChannelRouteRebuildAttempt
fabricRebuildSilences []FabricServiceChannelRouteRebuildAlertSilence
heartbeats map[string][]NodeHeartbeat
nodeTelemetry map[string][]NodeTelemetryObservation
desiredWorkloads []NodeWorkloadDesiredState
auditEvents []ClusterAuditEvent
cluster Cluster
lastPreferredEntryNodeID string
lastPreferredExitNodeID string
platformRoleErr error
}
func (f *fakeRepository) GetPlatformRole(context.Context, string) (string, error) {
return f.platformRole, f.platformRoleErr
}
func (f *fakeRepository) ListClusters(context.Context) ([]Cluster, error) {
return nil, nil
}
func (f *fakeRepository) GetCluster(context.Context, string) (Cluster, error) {
if f.cluster.ID != "" {
return f.cluster, nil
}
return Cluster{
ID: "cluster-1",
Slug: "cluster-1",
Name: "Cluster 1",
Status: ClusterStatusActive,
Metadata: json.RawMessage(`{}`),
}, nil
}
func (f *fakeRepository) CreateCluster(context.Context, CreateClusterInput) (Cluster, error) {
return Cluster{}, nil
}
func (f *fakeRepository) UpdateCluster(_ context.Context, input UpdateClusterInput) (Cluster, error) {
f.cluster = Cluster{
ID: input.ClusterID,
Slug: "cluster-1",
Name: input.Name,
Status: input.Status,
Region: input.Region,
Metadata: input.Metadata,
}
return f.cluster, nil
}
func (f *fakeRepository) GetClusterAuthority(_ context.Context, clusterID string) (ClusterAuthorityKey, error) {
if f.clusterAuthority.PrivateKey == "" {
keys, err := clusterauth.GenerateKeyPair()
if err != nil {
return ClusterAuthorityKey{}, err
}
f.clusterAuthority = ClusterAuthorityKey{
ClusterAuthorityDescriptor: ClusterAuthorityDescriptor{
SchemaVersion: clusterauth.AuthoritySchemaVersion,
ClusterID: clusterID,
AuthorityState: "active",
KeyAlgorithm: clusterauth.AlgorithmEd25519,
PublicKey: keys.PublicKeyB64,
PublicKeyFingerprint: keys.Fingerprint,
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
},
PrivateKey: keys.PrivateKeyB64,
}
}
if f.clusterAuthority.ClusterID == "" {
f.clusterAuthority.ClusterID = clusterID
}
return f.clusterAuthority, nil
}
func (f *fakeRepository) EnsureClusterAuthority(ctx context.Context, clusterID string, _ *string) (ClusterAuthorityKey, error) {
return f.GetClusterAuthority(ctx, clusterID)
}
func (f *fakeRepository) ListClusterNodes(context.Context, string) ([]ClusterNode, error) {
return f.clusterNodes, nil
}
func (f *fakeRepository) ListNodeGroups(context.Context, string) ([]ClusterNodeGroup, error) {
return nil, nil
}
func (f *fakeRepository) CreateNodeGroup(_ context.Context, input CreateNodeGroupInput) (ClusterNodeGroup, error) {
f.lastNodeGroupInput = input
return ClusterNodeGroup{
ID: "group-1",
ClusterID: input.ClusterID,
ParentGroupID: input.ParentGroupID,
Name: input.Name,
Description: input.Description,
SortOrder: input.SortOrder,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) AssignNodeToGroup(_ context.Context, input AssignNodeGroupInput) (ClusterNode, error) {
f.lastAssignGroupInput = input
return ClusterNode{
ID: input.NodeID,
NodeKey: "node-key-1",
Name: "Node One",
RegistrationStatus: NodeRegistrationActive,
MembershipStatus: "active",
NodeGroupID: input.GroupID,
}, nil
}
func (f *fakeRepository) CreateJoinToken(_ context.Context, input CreateJoinTokenInput, tokenHash string) (NodeJoinToken, error) {
f.lastTokenHash = tokenHash
return NodeJoinToken{
ID: "token-1",
ClusterID: input.ClusterID,
Scope: input.Scope,
ExpiresAt: input.ExpiresAt,
MaxUses: input.MaxUses,
Status: "active",
CreatedByUserID: &input.ActorUserID,
CreatedAt: time.Now().UTC(),
}, nil
}
func (f *fakeRepository) SetJoinTokenAuthority(_ context.Context, clusterID, tokenID string, payload json.RawMessage, signature ClusterSignature) (NodeJoinToken, error) {
f.lastTokenAuthority = payload
return NodeJoinToken{
ID: tokenID,
ClusterID: clusterID,
Scope: json.RawMessage(`{"roles":["rdp-worker"]}`),
ExpiresAt: time.Now().UTC().Add(time.Hour),
MaxUses: 1,
Status: "active",
AuthorityPayload: payload,
AuthoritySignature: &signature,
}, nil
}
func (f *fakeRepository) GetValidJoinTokenByHash(_ context.Context, _ string, tokenHash string) (NodeJoinToken, error) {
f.lastLookupTokenHash = tokenHash
if f.validTokenErr != nil {
return NodeJoinToken{}, f.validTokenErr
}
if f.validJoinToken.ID != "" {
return f.validJoinToken, nil
}
return NodeJoinToken{ID: "token-1", Status: "active", ExpiresAt: time.Now().Add(time.Hour), MaxUses: 1}, nil
}
func (f *fakeRepository) RevokeJoinToken(context.Context, RevokeJoinTokenInput) (NodeJoinToken, error) {
return NodeJoinToken{ID: "token-1", Status: "revoked"}, nil
}
func (f *fakeRepository) ListJoinTokens(context.Context, string) ([]NodeJoinToken, error) {
return []NodeJoinToken{{ID: "token-1", Status: "active", ExpiresAt: time.Now().Add(time.Hour), MaxUses: 1}}, nil
}
func (f *fakeRepository) ExpireJoinTokens(context.Context, string) error {
return nil
}
func (f *fakeRepository) CreateJoinRequest(_ context.Context, input CreateJoinRequestInput, joinTokenID string) (NodeJoinRequest, error) {
id := f.createJoinRequestID
if id == "" {
id = "join-request-1"
}
return NodeJoinRequest{
ID: id,
ClusterID: input.ClusterID,
JoinTokenID: &joinTokenID,
NodeName: input.NodeName,
NodeFingerprint: input.NodeFingerprint,
PublicKey: input.PublicKey,
ReportedCapabilities: input.ReportedCapabilities,
ReportedFacts: input.ReportedFacts,
RequestedRoles: input.RequestedRoles,
Status: JoinRequestStatusPending,
}, nil
}
func (f *fakeRepository) RegisterFabricNode(_ context.Context, input RegisterFabricNodeInput) (ClusterNode, error) {
return ClusterNode{
ID: "fabric-node-1",
OwnerOrganizationID: input.OwnerOrganizationID,
NodeKey: input.NodeKey,
Name: input.Name,
OwnershipType: input.OwnershipType,
RegistrationStatus: NodeRegistrationActive,
HealthStatus: "healthy",
VersionState: "current",
PartitionState: "healthy",
ReportedVersion: input.ReportedVersion,
MembershipStatus: "active",
MembershipMetadata: json.RawMessage(`{"source":"fabric_control_register"}`),
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
}, nil
}
func (f *fakeRepository) GetJoinRequestForJoin(context.Context, GetJoinRequestJoinInput) (NodeJoinRequest, error) {
if f.joinJoinRequest.ID != "" {
return f.joinJoinRequest, nil
}
return NodeJoinRequest{ID: "join-request-1", ClusterID: "cluster-1", Status: JoinRequestStatusPending}, nil
}
func (f *fakeRepository) ListJoinRequests(context.Context, string) ([]NodeJoinRequest, error) {
return nil, nil
}
func (f *fakeRepository) ApproveJoinRequest(_ context.Context, input ApproveJoinRequestInput) (ApprovedJoinRequest, error) {
return ApprovedJoinRequest{
JoinRequest: NodeJoinRequest{ID: input.JoinRequestID, ClusterID: input.ClusterID, Status: JoinRequestStatusApproved, ApprovedNodeID: &input.NodeKey},
JoinContract: NodeJoinContract{NodeID: input.NodeKey, ClusterID: input.ClusterID, IdentityStatus: "active"},
}, nil
}
func (f *fakeRepository) SetJoinRequestApprovalAuthority(_ context.Context, clusterID, joinRequestID string, payload json.RawMessage, signature ClusterSignature) (NodeJoinRequest, error) {
f.lastApprovalAuthority = payload
signatureRaw, _ := json.Marshal(signature)
nodeID := "node-1"
return NodeJoinRequest{
ID: joinRequestID,
ClusterID: clusterID,
Status: JoinRequestStatusApproved,
ApprovedNodeID: &nodeID,
ApprovalPayload: payload,
ApprovalSignature: signatureRaw,
}, nil
}
func (f *fakeRepository) RejectJoinRequest(context.Context, RejectJoinRequestInput) (NodeJoinRequest, error) {
return NodeJoinRequest{}, nil
}
func (f *fakeRepository) AssignNodeRole(_ context.Context, input AssignNodeRoleInput) (NodeRoleAssignment, error) {
return NodeRoleAssignment{ClusterID: input.ClusterID, NodeID: input.NodeID, Role: input.Role}, nil
}
func (f *fakeRepository) ListNodeRoleAssignments(_ context.Context, _ string, nodeID string) ([]NodeRoleAssignment, error) {
return f.nodeRoles[nodeID], nil
}
func (f *fakeRepository) AttachExistingNodeToCluster(_ context.Context, input AttachExistingNodeInput) (ClusterNode, error) {
f.lastAttachInput = input
return ClusterNode{
ID: input.NodeID,
NodeKey: "node-key-1",
Name: "Node One",
RegistrationStatus: NodeRegistrationActive,
MembershipStatus: "active",
}, nil
}
func (f *fakeRepository) RecordHeartbeat(_ context.Context, input RecordHeartbeatInput) (NodeHeartbeat, error) {
now := time.Now().UTC()
item := NodeHeartbeat{
ID: "heartbeat-" + input.NodeID,
ClusterID: input.ClusterID,
NodeID: input.NodeID,
HealthStatus: input.HealthStatus,
ReportedVersion: input.ReportedVersion,
Capabilities: input.Capabilities,
ServiceStates: input.ServiceStates,
Metadata: input.Metadata,
ObservedAt: now,
}
if f.heartbeats == nil {
f.heartbeats = map[string][]NodeHeartbeat{}
}
f.heartbeats[input.NodeID] = append([]NodeHeartbeat{item}, f.heartbeats[input.NodeID]...)
return item, nil
}
func (f *fakeRepository) ListNodeHeartbeats(_ context.Context, _ string, nodeID string, _ int) ([]NodeHeartbeat, error) {
return f.heartbeats[nodeID], nil
}
func (f *fakeRepository) CreateReleaseVersion(_ context.Context, input CreateReleaseVersionInput) (ReleaseVersion, error) {
item := ReleaseVersion{
ID: "release-" + input.Version,
ClusterID: input.ClusterID,
Product: input.Product,
Version: input.Version,
Channel: input.Channel,
Status: input.Status,
Compatibility: input.Compatibility,
Changelog: input.Changelog,
CreatedByUserID: &input.ActorUserID,
CreatedAt: time.Now().UTC(),
}
for i, artifact := range input.Artifacts {
item.Artifacts = append(item.Artifacts, ReleaseArtifact{
ID: item.ID + "-artifact",
ReleaseID: item.ID,
ClusterID: input.ClusterID,
Product: input.Product,
Version: input.Version,
OS: artifact.OS,
Arch: artifact.Arch,
InstallType: artifact.InstallType,
Kind: artifact.Kind,
URL: artifact.URL,
SHA256: artifact.SHA256,
SizeBytes: artifact.SizeBytes,
Signature: artifact.Signature,
Metadata: artifact.Metadata,
CreatedAt: time.Now().UTC().Add(time.Duration(i) * time.Second),
})
}
f.releaseVersions = append([]ReleaseVersion{item}, f.releaseVersions...)
return item, nil
}
func (f *fakeRepository) ListReleaseVersions(_ context.Context, clusterID, product, channel string) ([]ReleaseVersion, error) {
var out []ReleaseVersion
for _, item := range f.releaseVersions {
if item.ClusterID != clusterID {
continue
}
if product != "" && item.Product != product {
continue
}
if channel != "" && item.Channel != channel {
continue
}
out = append(out, item)
}
return out, nil
}
func (f *fakeRepository) GetReleaseArtifact(_ context.Context, clusterID, artifactID string) (ReleaseArtifact, error) {
for _, release := range f.releaseVersions {
if release.ClusterID != clusterID {
continue
}
for _, artifact := range release.Artifacts {
if artifact.ID == artifactID {
return artifact, nil
}
}
}
return ReleaseArtifact{}, pgx.ErrNoRows
}
func (f *fakeRepository) ListNodeUpdateServiceCandidates(context.Context, string) ([]NodeUpdateServiceCandidate, error) {
return f.updateServiceCandidates, nil
}
func (f *fakeRepository) UpsertNodeUpdatePolicy(_ context.Context, input UpsertNodeUpdatePolicyInput) (NodeUpdatePolicy, error) {
item := NodeUpdatePolicy{
ClusterID: input.ClusterID,
NodeID: input.NodeID,
Product: input.Product,
Channel: input.Channel,
TargetVersion: input.TargetVersion,
Strategy: input.Strategy,
Enabled: input.Enabled,
RollbackAllowed: input.RollbackAllowed,
HealthWindowSec: input.HealthWindowSec,
UpdatedByUserID: &input.ActorUserID,
UpdatedAt: time.Now().UTC(),
}
if f.nodeUpdatePolicies == nil {
f.nodeUpdatePolicies = map[string]NodeUpdatePolicy{}
}
f.nodeUpdatePolicies[input.NodeID+"|"+input.Product] = item
return item, nil
}
func (f *fakeRepository) GetNodeUpdatePolicy(_ context.Context, _ string, nodeID, product string) (NodeUpdatePolicy, error) {
if f.nodeUpdatePolicies == nil {
return NodeUpdatePolicy{}, pgx.ErrNoRows
}
item, ok := f.nodeUpdatePolicies[nodeID+"|"+product]
if !ok {
return NodeUpdatePolicy{}, pgx.ErrNoRows
}
return item, nil
}
func (f *fakeRepository) ReportNodeUpdateStatus(_ context.Context, input ReportNodeUpdateStatusInput) (NodeUpdateStatus, error) {
item := NodeUpdateStatus{
ID: "status-1",
ClusterID: input.ClusterID,
NodeID: input.NodeID,
Product: input.Product,
CurrentVersion: input.CurrentVersion,
TargetVersion: input.TargetVersion,
Phase: input.Phase,
Status: input.Status,
AttemptID: input.AttemptID,
ErrorMessage: input.ErrorMessage,
RollbackVersion: input.RollbackVersion,
Payload: input.Payload,
ObservedAt: input.ObservedAt,
}
f.updateStatuses = append(f.updateStatuses, item)
return item, nil
}
func (f *fakeRepository) ListNodeUpdateStatuses(_ context.Context, clusterID, nodeID string, limit int) ([]NodeUpdateStatus, error) {
out := []NodeUpdateStatus{}
for _, item := range f.updateStatuses {
if item.ClusterID == clusterID && item.NodeID == nodeID {
out = append(out, item)
}
}
if limit > 0 && len(out) > limit {
out = out[:limit]
}
return out, nil
}
func (f *fakeRepository) RevokeNodeIdentity(context.Context, RevokeNodeIdentityInput) error {
return nil
}
func (f *fakeRepository) DisableClusterMembership(context.Context, DisableMembershipInput) error {
return nil
}
func (f *fakeRepository) DeleteClusterNode(context.Context, DeleteClusterNodeInput) error {
return nil
}
func (f *fakeRepository) UpsertFabricTestingFlag(_ context.Context, input UpsertFabricTestingFlagInput) (FabricTestingFlag, error) {
return FabricTestingFlag{
ScopeType: input.ScopeType,
ScopeID: input.ScopeID,
ClusterID: input.ClusterID,
Enabled: input.Enabled,
TelemetryEnabled: input.TelemetryEnabled,
SyntheticLinksEnabled: input.SyntheticLinksEnabled,
HistoryRetentionHours: input.HistoryRetentionHours,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) ListFabricTestingFlags(context.Context) ([]FabricTestingFlag, error) {
return nil, nil
}
func (f *fakeRepository) GetEffectiveNodeTestingFlags(context.Context, string, string) (EffectiveNodeTestingFlags, error) {
return f.testingFlags, nil
}
func (f *fakeRepository) RecordNodeTelemetry(_ context.Context, input RecordNodeTelemetryInput) (NodeTelemetryObservation, error) {
item := NodeTelemetryObservation{
ClusterID: input.ClusterID,
NodeID: input.NodeID,
Payload: input.Payload,
ObservedAt: input.ObservedAt,
}
if item.ObservedAt.IsZero() {
item.ObservedAt = time.Now().UTC()
}
if f.nodeTelemetry == nil {
f.nodeTelemetry = map[string][]NodeTelemetryObservation{}
}
f.nodeTelemetry[input.NodeID] = append([]NodeTelemetryObservation{item}, f.nodeTelemetry[input.NodeID]...)
return item, nil
}
func (f *fakeRepository) ListNodeTelemetry(_ context.Context, _ string, nodeID string, limit int) ([]NodeTelemetryObservation, error) {
items := append([]NodeTelemetryObservation{}, f.nodeTelemetry[nodeID]...)
if limit > 0 && len(items) > limit {
items = items[:limit]
}
return items, nil
}
func (f *fakeRepository) SetDesiredWorkload(_ context.Context, input SetDesiredWorkloadInput) (NodeWorkloadDesiredState, error) {
return NodeWorkloadDesiredState{
ClusterID: input.ClusterID,
NodeID: input.NodeID,
ServiceType: input.ServiceType,
DesiredState: input.DesiredState,
RuntimeMode: input.RuntimeMode,
Config: input.Config,
Environment: input.Environment,
}, nil
}
func (f *fakeRepository) ListDesiredWorkloads(_ context.Context, clusterID, nodeID string) ([]NodeWorkloadDesiredState, error) {
out := []NodeWorkloadDesiredState{}
for _, item := range f.desiredWorkloads {
if item.ClusterID == clusterID && item.NodeID == nodeID {
out = append(out, item)
}
}
return out, nil
}
func (f *fakeRepository) ReportWorkloadStatus(_ context.Context, input ReportWorkloadStatusInput) (NodeWorkloadStatus, error) {
return NodeWorkloadStatus{
ClusterID: input.ClusterID,
NodeID: input.NodeID,
ServiceType: input.ServiceType,
ReportedState: input.ReportedState,
RuntimeMode: input.RuntimeMode,
StatusPayload: input.StatusPayload,
}, nil
}
func (f *fakeRepository) ListLatestWorkloadStatuses(context.Context, string, string) ([]NodeWorkloadStatus, error) {
return nil, nil
}
func (f *fakeRepository) ReportMeshLink(_ context.Context, input ReportMeshLinkInput) (MeshLinkObservation, error) {
return MeshLinkObservation{
ClusterID: input.ClusterID,
SourceNodeID: input.SourceNodeID,
TargetNodeID: input.TargetNodeID,
LinkStatus: input.LinkStatus,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) ListMeshLinks(context.Context, string) ([]MeshLinkObservation, error) {
return f.meshLinks, nil
}
func (f *fakeRepository) CreateRouteIntent(_ context.Context, input CreateRouteIntentInput) (MeshRouteIntent, error) {
f.createdRouteIntents = append(f.createdRouteIntents, input)
item := MeshRouteIntent{
ID: "route-intent-" + strconv.Itoa(len(f.createdRouteIntents)),
ClusterID: input.ClusterID,
SourceSelector: input.SourceSelector,
DestinationSelector: input.DestinationSelector,
ServiceClass: input.ServiceClass,
Priority: input.Priority,
Status: "active",
Policy: input.Policy,
UpdatedAt: time.Now().UTC(),
}
f.routeIntents = append(f.routeIntents, item)
return item, nil
}
func (f *fakeRepository) ListRouteIntents(context.Context, string) ([]MeshRouteIntent, error) {
return f.routeIntents, nil
}
func (f *fakeRepository) ExpireRouteIntent(_ context.Context, input RouteIntentLifecycleInput, expiresAt time.Time) (MeshRouteIntent, error) {
for index, item := range f.routeIntents {
if item.ClusterID != input.ClusterID || item.ID != input.RouteIntentID {
continue
}
var policy map[string]any
_ = json.Unmarshal(item.Policy, &policy)
if policy == nil {
policy = map[string]any{}
}
policy["expires_at"] = expiresAt.UTC().Format(time.RFC3339Nano)
policy["operator_expire"] = map[string]any{
"expired_at": expiresAt.UTC().Format(time.RFC3339Nano),
"reason": input.Reason,
}
item.Policy = mustJSONRaw(policy)
item.UpdatedAt = expiresAt.UTC()
f.routeIntents[index] = item
return item, nil
}
return MeshRouteIntent{}, pgx.ErrNoRows
}
func (f *fakeRepository) DisableRouteIntent(_ context.Context, input RouteIntentLifecycleInput) (MeshRouteIntent, error) {
for index, item := range f.routeIntents {
if item.ClusterID != input.ClusterID || item.ID != input.RouteIntentID {
continue
}
var policy map[string]any
_ = json.Unmarshal(item.Policy, &policy)
if policy == nil {
policy = map[string]any{}
}
policy["operator_disable"] = map[string]any{
"reason": input.Reason,
}
item.Status = "disabled"
item.Policy = mustJSONRaw(policy)
item.UpdatedAt = time.Now().UTC()
f.routeIntents[index] = item
return item, nil
}
return MeshRouteIntent{}, pgx.ErrNoRows
}
func (f *fakeRepository) RecordFabricServiceChannelRouteFeedback(_ context.Context, input RecordFabricServiceChannelRouteFeedbackInput) (FabricServiceChannelRouteFeedbackObservation, error) {
observedAt := input.ObservedAt.UTC()
if observedAt.IsZero() {
observedAt = time.Now().UTC()
}
if input.FeedbackStatus != "healthy" {
for _, current := range f.fabricRouteFeedback {
if current.ClusterID != input.ClusterID || current.ReporterNodeID != input.ReporterNodeID || current.RouteID != input.RouteID {
continue
}
if current.RetryCooldownUntil == nil || !current.RetryCooldownUntil.After(observedAt) {
continue
}
input = fabricServiceChannelFeedbackSuppressedByOperatorCooldown(input, *current.RetryCooldownUntil, observedAt)
break
}
}
item := FabricServiceChannelRouteFeedbackObservation{
ID: "fsc-feedback-" + strconv.Itoa(len(f.fabricRouteFeedback)+1),
ClusterID: input.ClusterID,
ReporterNodeID: input.ReporterNodeID,
RouteID: input.RouteID,
ServiceClass: input.ServiceClass,
FeedbackStatus: input.FeedbackStatus,
ScoreAdjustment: input.ScoreAdjustment,
Reasons: append([]string{}, input.Reasons...),
LastError: input.LastError,
ConsecutiveFailures: input.ConsecutiveFailures,
StallCount: input.StallCount,
LastSendDurationMs: input.LastSendDurationMs,
Payload: input.Payload,
ObservedAt: observedAt,
ExpiresAt: input.ExpiresAt,
RetryCooldownUntil: fabricServiceChannelRetryCooldownUntil(input.Payload),
}
f.fabricRouteFeedback = append(f.fabricRouteFeedback, item)
return item, nil
}
func (f *fakeRepository) ListFabricServiceChannelRouteFeedback(_ context.Context, input ListFabricServiceChannelRouteFeedbackInput) ([]FabricServiceChannelRouteFeedbackObservation, error) {
now := input.Now.UTC()
out := []FabricServiceChannelRouteFeedbackObservation{}
for _, item := range f.fabricRouteFeedback {
if item.ClusterID != input.ClusterID {
continue
}
if input.ReporterNodeID != "" && item.ReporterNodeID != input.ReporterNodeID {
continue
}
if input.RouteID != "" && item.RouteID != input.RouteID {
continue
}
if input.ServiceClass != "" && item.ServiceClass != input.ServiceClass {
continue
}
if input.FeedbackStatus != "" && item.FeedbackStatus != input.FeedbackStatus {
continue
}
if !input.IncludeExpired && !item.ExpiresAt.IsZero() && !item.ExpiresAt.After(now) {
continue
}
out = append(out, item)
}
return out, nil
}
func (f *fakeRepository) ExpireFabricServiceChannelRouteFeedback(_ context.Context, input ExpireFabricServiceChannelRouteFeedbackInput) (ExpireFabricServiceChannelRouteFeedbackResult, error) {
now := input.Now.UTC()
if now.IsZero() {
now = time.Now().UTC()
}
cooldownUntil := now.Add(fabricServiceChannelOperatorExpireCooldown)
expired := 0
for idx, item := range f.fabricRouteFeedback {
if item.ClusterID != input.ClusterID || item.RouteID != input.RouteID {
continue
}
if input.ReporterNodeID != "" && item.ReporterNodeID != input.ReporterNodeID {
continue
}
if input.ServiceClass != "" && item.ServiceClass != input.ServiceClass {
continue
}
if !item.ExpiresAt.IsZero() && !item.ExpiresAt.After(now) {
continue
}
f.fabricRouteFeedback[idx].ExpiresAt = now
f.fabricRouteFeedback[idx].RetryCooldownUntil = &cooldownUntil
expired++
}
return ExpireFabricServiceChannelRouteFeedbackResult{
ClusterID: input.ClusterID,
ReporterNodeID: input.ReporterNodeID,
RouteID: input.RouteID,
ServiceClass: input.ServiceClass,
ExpiredCount: expired,
ExpiredAt: now,
CooldownUntil: cooldownUntil,
}, nil
}
func (f *fakeRepository) StoreFabricServiceChannelLease(_ context.Context, input StoreFabricServiceChannelLeaseInput) (FabricServiceChannelLeaseRecord, error) {
lease := input.Lease
storedLease := lease
storedLease.Token.Token = ""
item := FabricServiceChannelLeaseRecord{
ClusterID: lease.ClusterID,
ChannelID: lease.ChannelID,
TokenHash: input.TokenHash,
ResourceID: lease.ResourceID,
ServiceClass: lease.ServiceClass,
SelectedEntryNodeID: lease.SelectedEntryNodeID,
ExpiresAt: lease.ExpiresAt,
Lease: storedLease,
CreatedAt: lease.IssuedAt,
UpdatedAt: lease.IssuedAt,
}
if f.fabricLeases == nil {
f.fabricLeases = map[string]FabricServiceChannelLeaseRecord{}
}
f.fabricLeases[fabricServiceChannelLeaseCacheKey(lease.ClusterID, lease.ChannelID)] = item
return item, nil
}
func (f *fakeRepository) GetFabricServiceChannelLease(_ context.Context, clusterID, channelID string) (FabricServiceChannelLeaseRecord, error) {
if f.fabricLeases == nil {
return FabricServiceChannelLeaseRecord{}, pgx.ErrNoRows
}
item, ok := f.fabricLeases[fabricServiceChannelLeaseCacheKey(clusterID, channelID)]
if !ok {
return FabricServiceChannelLeaseRecord{}, pgx.ErrNoRows
}
return item, nil
}
func (f *fakeRepository) ListFabricServiceChannelLeases(_ context.Context, input ListFabricServiceChannelLeasesInput) ([]FabricServiceChannelLeaseRecord, error) {
now := input.Now
if now.IsZero() {
now = time.Now().UTC()
}
out := []FabricServiceChannelLeaseRecord{}
for _, item := range f.fabricLeases {
if item.ClusterID != input.ClusterID {
continue
}
if input.ServiceClass != "" && item.ServiceClass != input.ServiceClass {
continue
}
if input.EntryNodeID != "" && item.SelectedEntryNodeID != input.EntryNodeID {
continue
}
if input.ResourceID != "" && item.ResourceID != input.ResourceID {
continue
}
if !input.IncludeExpired && !item.ExpiresAt.IsZero() && !item.ExpiresAt.After(now) {
continue
}
out = append(out, item)
}
sort.Slice(out, func(i, j int) bool {
return out[i].ExpiresAt.After(out[j].ExpiresAt)
})
if input.Limit > 0 && len(out) > input.Limit {
out = out[:input.Limit]
}
return out, nil
}
func (f *fakeRepository) CleanupExpiredFabricServiceChannelLeases(_ context.Context, clusterID string, now time.Time, limit int) (int, error) {
if f.fabricLeases == nil {
return 0, nil
}
if now.IsZero() {
now = time.Now().UTC()
}
if limit <= 0 {
limit = 100
}
deleted := 0
for key, item := range f.fabricLeases {
if deleted >= limit {
break
}
if item.ClusterID == clusterID && !item.ExpiresAt.IsZero() && !item.ExpiresAt.After(now) {
delete(f.fabricLeases, key)
deleted++
}
}
return deleted, nil
}
func (f *fakeRepository) RecordFabricServiceChannelRouteRebuildAttempt(_ context.Context, input RecordFabricServiceChannelRouteRebuildAttemptInput) (FabricServiceChannelRouteRebuildAttempt, error) {
item := FabricServiceChannelRouteRebuildAttempt{
ID: "fsc-rebuild-" + strconv.Itoa(len(f.fabricRebuildAttempts)+1),
ClusterID: input.ClusterID,
ReporterNodeID: input.ReporterNodeID,
ServiceClass: input.ServiceClass,
RouteID: input.RouteID,
ReplacementRouteID: input.ReplacementRouteID,
RebuildRequestID: input.RebuildRequestID,
RebuildStatus: input.RebuildStatus,
RebuildReason: input.RebuildReason,
RebuildAttempt: input.RebuildAttempt,
DecisionSource: input.DecisionSource,
Outcome: input.Outcome,
Generation: input.Generation,
PolicyFingerprint: input.PolicyFingerprint,
ObservedPolicyFingerprint: input.ObservedPolicyFingerprint,
ObservedRouteGeneration: input.ObservedRouteGeneration,
EffectiveRouteGeneration: input.EffectiveRouteGeneration,
FeedbackStatus: input.FeedbackStatus,
FeedbackObservationID: input.FeedbackObservationID,
FeedbackSource: input.FeedbackSource,
FeedbackObservedAt: input.FeedbackObservedAt,
FeedbackExpiresAt: input.FeedbackExpiresAt,
FeedbackChannelID: input.FeedbackChannelID,
FeedbackResourceID: input.FeedbackResourceID,
FeedbackViolationStatus: input.FeedbackViolationStatus,
FeedbackViolationReason: input.FeedbackViolationReason,
FeedbackScoreAdjustment: input.FeedbackScoreAdjustment,
FeedbackEffectiveScoreAdjustment: input.FeedbackEffectiveScoreAdjustment,
FeedbackReasons: append([]string{}, input.FeedbackReasons...),
LastError: input.LastError,
ConsecutiveFailures: input.ConsecutiveFailures,
StallCount: input.StallCount,
LastSendDurationMs: input.LastSendDurationMs,
QualityWindowSampleCount: input.QualityWindowSampleCount,
QualityWindowFailureCount: input.QualityWindowFailureCount,
QualityWindowDropCount: input.QualityWindowDropCount,
QualityWindowSlowCount: input.QualityWindowSlowCount,
OldHops: append([]string{}, input.OldHops...),
ReplacementHops: append([]string{}, input.ReplacementHops...),
Payload: input.Payload,
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
}
for idx, current := range f.fabricRebuildAttempts {
if current.ClusterID == item.ClusterID &&
current.ReporterNodeID == item.ReporterNodeID &&
current.ServiceClass == item.ServiceClass &&
current.RouteID == item.RouteID &&
current.RebuildRequestID == item.RebuildRequestID {
item.ID = current.ID
item.CreatedAt = current.CreatedAt
f.fabricRebuildAttempts[idx] = item
return item, nil
}
}
f.fabricRebuildAttempts = append(f.fabricRebuildAttempts, item)
return item, nil
}
func (f *fakeRepository) ListFabricServiceChannelRouteRebuildAttempts(_ context.Context, input ListFabricServiceChannelRouteRebuildAttemptsInput) ([]FabricServiceChannelRouteRebuildAttempt, error) {
out := []FabricServiceChannelRouteRebuildAttempt{}
for _, item := range f.fabricRebuildAttempts {
if item.ClusterID != input.ClusterID {
continue
}
if input.ReporterNodeID != "" && item.ReporterNodeID != input.ReporterNodeID {
continue
}
if input.RouteID != "" && item.RouteID != input.RouteID {
continue
}
if input.ReplacementRouteID != "" && item.ReplacementRouteID != input.ReplacementRouteID {
continue
}
if input.ServiceClass != "" && item.ServiceClass != input.ServiceClass {
continue
}
if input.RebuildStatus != "" && item.RebuildStatus != input.RebuildStatus {
continue
}
if input.RebuildRequestID != "" && item.RebuildRequestID != input.RebuildRequestID {
continue
}
payload := jsonObject(item.Payload)
if input.FeedbackSource != "" && firstNonEmptyString(item.FeedbackSource, jsonString(payload, "feedback_source")) != input.FeedbackSource {
continue
}
if input.FeedbackChannelID != "" && firstNonEmptyString(item.FeedbackChannelID, jsonString(payload, "feedback_channel_id")) != input.FeedbackChannelID {
continue
}
if input.FeedbackViolationStatus != "" && firstNonEmptyString(item.FeedbackViolationStatus, jsonString(payload, "feedback_violation_status")) != input.FeedbackViolationStatus {
continue
}
out = append(out, item)
}
return out, nil
}
func (f *fakeRepository) UpdateFabricServiceChannelRouteRebuildCorrelationSnapshot(_ context.Context, input UpdateFabricServiceChannelRouteRebuildCorrelationSnapshotInput) error {
for idx := range f.fabricRebuildAttempts {
if f.fabricRebuildAttempts[idx].ID != input.ID {
continue
}
f.fabricRebuildAttempts[idx].NodeTransitionStatus = input.NodeTransitionStatus
f.fabricRebuildAttempts[idx].NodeTransitionGeneration = input.NodeTransitionGeneration
f.fabricRebuildAttempts[idx].NodeTransitionObservedAt = input.NodeTransitionObservedAt
f.fabricRebuildAttempts[idx].NodeTransitionMatched = input.NodeTransitionMatched
f.fabricRebuildAttempts[idx].NodeRouteGenerationStatus = input.NodeRouteGenerationStatus
f.fabricRebuildAttempts[idx].NodeRouteGenerationAppliedAt = input.NodeRouteGenerationAppliedAt
f.fabricRebuildAttempts[idx].NodeRouteGenerationWithdrawnAt = input.NodeRouteGenerationWithdrawnAt
f.fabricRebuildAttempts[idx].NodeRouteGenerationMatched = input.NodeRouteGenerationMatched
f.fabricRebuildAttempts[idx].PostRebuildSelectedRouteID = input.PostRebuildSelectedRouteID
f.fabricRebuildAttempts[idx].PostRebuildSendPackets = input.PostRebuildSendPackets
f.fabricRebuildAttempts[idx].PostRebuildSendFailures = input.PostRebuildSendFailures
f.fabricRebuildAttempts[idx].PostRebuildSendFlowPackets = input.PostRebuildSendFlowPackets
f.fabricRebuildAttempts[idx].PostRebuildSendFlowDropped = input.PostRebuildSendFlowDropped
f.fabricRebuildAttempts[idx].GuardStatus = input.GuardStatus
f.fabricRebuildAttempts[idx].GuardSeverity = input.GuardSeverity
f.fabricRebuildAttempts[idx].GuardReason = input.GuardReason
f.fabricRebuildAttempts[idx].GuardTransitionDeadlineSeconds = input.GuardTransitionDeadlineSeconds
f.fabricRebuildAttempts[idx].GuardTrafficDeadlineSeconds = input.GuardTrafficDeadlineSeconds
f.fabricRebuildAttempts[idx].Timeline = append([]FabricServiceChannelRouteRebuildTimelineEvent{}, input.Timeline...)
snapshotAt := input.CorrelationSnapshotAt
f.fabricRebuildAttempts[idx].CorrelationSnapshotAt = &snapshotAt
return nil
}
return nil
}
func (f *fakeRepository) GetFabricServiceChannelSchemaStatus(_ context.Context, input GetFabricServiceChannelSchemaStatusInput) (FabricServiceChannelSchemaStatus, error) {
return FabricServiceChannelSchemaStatus{
ClusterID: input.ClusterID,
ObservedAt: time.Now().UTC(),
Status: "ready",
Reason: "schema_ready",
RequiredMigration: "000028_fabric_service_channel_rebuild_correlation_snapshot",
RequiredCheckCount: 1,
PassedCheckCount: 1,
RequiredChecks: []FabricServiceChannelSchemaCheck{{
CheckID: "fabric_service_channel_route_rebuild_attempts",
RelationName: "fabric_service_channel_route_rebuild_attempts",
Status: "present",
RequiredBy: "000028_fabric_service_channel_rebuild_correlation_snapshot",
}},
}, nil
}
func (f *fakeRepository) UpsertFabricServiceChannelRouteRebuildAlertSilence(_ context.Context, input SilenceFabricServiceChannelRouteRebuildAlertInput, expiresAt time.Time) (FabricServiceChannelRouteRebuildAlertSilence, error) {
createdAt := input.Now
if createdAt.IsZero() {
createdAt = time.Now().UTC()
}
item := FabricServiceChannelRouteRebuildAlertSilence{
ID: "fsc-rebuild-silence-" + strconv.Itoa(len(f.fabricRebuildSilences)+1),
ClusterID: input.ClusterID,
IncidentSource: input.IncidentSource,
ChannelID: input.ChannelID,
ReporterNodeID: input.ReporterNodeID,
RouteID: input.RouteID,
DisplayRouteID: input.RouteID,
GuardStatus: input.GuardStatus,
Generation: input.Generation,
Reason: input.Reason,
CreatedByUserID: &input.ActorUserID,
CreatedAt: createdAt,
ExpiresAt: expiresAt,
Payload: mustJSONRaw(map[string]any{
"schema_version": "rap.fabric_service_channel_rebuild_alert_silence.v1",
"reason": input.Reason,
"incident_source": input.IncidentSource,
"channel_id": input.ChannelID,
}),
}
if channelID, routeID, ok := fabricServiceChannelParseAccessDecisionSilenceRouteID(input.RouteID); ok {
item.IncidentSource = firstNonEmptyString(item.IncidentSource, "access_decision")
item.ChannelID = firstNonEmptyString(item.ChannelID, channelID)
item.DisplayRouteID = routeID
}
for idx, current := range f.fabricRebuildSilences {
if current.ClusterID == item.ClusterID && current.ReporterNodeID == item.ReporterNodeID && current.RouteID == item.RouteID && current.GuardStatus == item.GuardStatus && current.Generation == item.Generation {
f.fabricRebuildSilences[idx] = item
return item, nil
}
}
f.fabricRebuildSilences = append(f.fabricRebuildSilences, item)
return item, nil
}
func (f *fakeRepository) ListFabricServiceChannelRouteRebuildAlertSilences(_ context.Context, clusterID string, now time.Time) ([]FabricServiceChannelRouteRebuildAlertSilence, error) {
out := []FabricServiceChannelRouteRebuildAlertSilence{}
for _, item := range f.fabricRebuildSilences {
if item.ClusterID == clusterID && item.ExpiresAt.After(now) {
out = append(out, item)
}
}
return out, nil
}
func (f *fakeRepository) DeleteFabricServiceChannelRouteRebuildAlertSilence(_ context.Context, input UnsilenceFabricServiceChannelRouteRebuildAlertInput) (FabricServiceChannelRouteRebuildAlertSilence, error) {
for idx, item := range f.fabricRebuildSilences {
if item.ClusterID == input.ClusterID && item.ID == input.SilenceID {
f.fabricRebuildSilences = append(f.fabricRebuildSilences[:idx], f.fabricRebuildSilences[idx+1:]...)
return item, nil
}
}
return FabricServiceChannelRouteRebuildAlertSilence{}, pgx.ErrNoRows
}
func (f *fakeRepository) ListQoSPolicies(context.Context, string) ([]MeshQoSPolicy, error) {
return nil, nil
}
func (f *fakeRepository) ListFabricEntryPoints(context.Context, string) ([]FabricEntryPoint, error) {
return nil, nil
}
func (f *fakeRepository) CreateFabricEntryPoint(_ context.Context, input CreateFabricEntryPointInput) (FabricEntryPoint, error) {
f.lastEntryPointInput = input
return FabricEntryPoint{
ID: "entry-1",
ClusterID: input.ClusterID,
Name: input.Name,
Status: input.Status,
EndpointType: input.EndpointType,
PublicEndpoint: input.PublicEndpoint,
Policy: input.Policy,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) SetFabricEntryPointNode(_ context.Context, input SetFabricEntryPointNodeInput) (FabricEntryPointNode, error) {
return FabricEntryPointNode{
EntryPointID: input.EntryPointID,
ClusterID: input.ClusterID,
NodeID: input.NodeID,
Status: input.Status,
Priority: input.Priority,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) ListFabricEntryPointNodes(context.Context, string, string) ([]FabricEntryPointNode, error) {
return []FabricEntryPointNode{}, nil
}
func (f *fakeRepository) ListFabricEgressPools(context.Context, string) ([]FabricEgressPool, error) {
return nil, nil
}
func (f *fakeRepository) CreateFabricEgressPool(_ context.Context, input CreateFabricEgressPoolInput) (FabricEgressPool, error) {
f.lastEgressPoolInput = input
return FabricEgressPool{
ID: "egress-1",
ClusterID: input.ClusterID,
Name: input.Name,
Status: input.Status,
Description: input.Description,
RouteScope: input.RouteScope,
Policy: input.Policy,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) SetFabricEgressPoolNode(_ context.Context, input SetFabricEgressPoolNodeInput) (FabricEgressPoolNode, error) {
return FabricEgressPoolNode{
EgressPoolID: input.EgressPoolID,
ClusterID: input.ClusterID,
NodeID: input.NodeID,
Status: input.Status,
Priority: input.Priority,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) ListFabricEgressPoolNodes(context.Context, string, string) ([]FabricEgressPoolNode, error) {
return []FabricEgressPoolNode{}, nil
}
func (f *fakeRepository) GetClusterAuthorityState(context.Context, string) (ClusterAuthorityState, error) {
if f.authorityState.ClusterID == "" {
return ClusterAuthorityState{ClusterID: "cluster-1", AuthorityState: "authoritative", MutationMode: "normal"}, nil
}
return f.authorityState, nil
}
func (f *fakeRepository) UpdateClusterAuthorityState(_ context.Context, input UpdateClusterAuthorityInput) (ClusterAuthorityState, error) {
return ClusterAuthorityState{
ClusterID: input.ClusterID,
AuthorityState: input.AuthorityState,
MutationMode: input.MutationMode,
Notes: input.Notes,
}, nil
}
func (f *fakeRepository) ListClusterAdminSummaries(context.Context) ([]ClusterAdminSummary, error) {
return nil, nil
}
func (f *fakeRepository) CreateVPNConnection(_ context.Context, input CreateVPNConnectionInput) (VPNConnection, error) {
f.lastVPNConnectionInput = input
return VPNConnection{
ID: "vpn-1",
ClusterID: input.ClusterID,
OrganizationID: input.OrganizationID,
Name: input.Name,
TargetEndpoint: input.TargetEndpoint,
ProtocolFamily: input.ProtocolFamily,
CredentialRef: input.CredentialRef,
Mode: input.Mode,
DesiredState: input.DesiredState,
AllowedNodePolicy: input.AllowedNodePolicy,
RoutingUsage: input.RoutingUsage,
RoutePolicy: input.RoutePolicy,
QoSPolicy: input.QoSPolicy,
PlacementPolicy: input.PlacementPolicy,
Status: VPNConnectionStatusDisabled,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) ListVPNConnections(context.Context, string) ([]VPNConnection, error) {
return nil, nil
}
func (f *fakeRepository) GetVPNConnection(context.Context, string, string) (VPNConnection, error) {
if f.vpnConnection.ID != "" {
return f.vpnConnection, nil
}
return VPNConnection{
ID: "vpn-1",
ClusterID: "cluster-1",
Mode: VPNConnectionModeSingleActive,
DesiredState: VPNConnectionDesiredEnabled,
}, nil
}
func (f *fakeRepository) UpdateVPNConnectionDesiredState(_ context.Context, input UpdateVPNConnectionDesiredStateInput) (VPNConnection, error) {
return VPNConnection{ID: input.VPNConnectionID, ClusterID: input.ClusterID, DesiredState: input.DesiredState}, nil
}
func (f *fakeRepository) UpsertVPNConnectionRoutePolicy(_ context.Context, input UpsertVPNConnectionRoutePolicyInput) (VPNConnectionRoutePolicy, error) {
return VPNConnectionRoutePolicy{
ID: "route-policy-1",
VPNConnectionID: input.VPNConnectionID,
ClusterID: input.ClusterID,
RouteType: input.RouteType,
Destination: input.Destination,
Action: input.Action,
ServiceType: input.ServiceType,
Priority: input.Priority,
Policy: input.Policy,
Status: input.Status,
}, nil
}
func (f *fakeRepository) ListVPNConnectionRoutePolicies(context.Context, string, string) ([]VPNConnectionRoutePolicy, error) {
return nil, nil
}
func (f *fakeRepository) SetVPNConnectionAllowedNodes(_ context.Context, input SetVPNConnectionAllowedNodesInput) ([]VPNConnectionAllowedNode, error) {
f.lastAllowedNodesInput = input
items := make([]VPNConnectionAllowedNode, 0, len(input.NodeIDs))
for _, nodeID := range input.NodeIDs {
items = append(items, VPNConnectionAllowedNode{
VPNConnectionID: input.VPNConnectionID,
ClusterID: input.ClusterID,
NodeID: nodeID,
RolePreference: input.RolePreference,
Status: "active",
Metadata: input.Metadata,
})
}
return items, nil
}
func (f *fakeRepository) ListVPNConnectionAllowedNodes(context.Context, string, string) ([]VPNConnectionAllowedNode, error) {
return nil, nil
}
func (f *fakeRepository) AcquireVPNConnectionLease(_ context.Context, input AcquireVPNConnectionLeaseInput, expiresAt time.Time, fencingToken string) (VPNConnectionLease, error) {
if f.acquireVPNLeaseErr != nil {
return VPNConnectionLease{}, f.acquireVPNLeaseErr
}
return VPNConnectionLease{
ID: "lease-1",
VPNConnectionID: input.VPNConnectionID,
ClusterID: input.ClusterID,
OwnerNodeID: input.OwnerNodeID,
LeaseGeneration: 1,
FencingToken: fencingToken,
Status: VPNLeaseStatusActive,
ExpiresAt: expiresAt,
Metadata: input.Metadata,
}, nil
}
func (f *fakeRepository) RenewVPNConnectionLease(_ context.Context, input RenewVPNConnectionLeaseInput, expiresAt time.Time) (VPNConnectionLease, error) {
if f.renewVPNLeaseErr != nil {
return VPNConnectionLease{}, f.renewVPNLeaseErr
}
return VPNConnectionLease{ID: input.LeaseID, VPNConnectionID: input.VPNConnectionID, ClusterID: input.ClusterID, OwnerNodeID: input.OwnerNodeID, FencingToken: input.FencingToken, Status: VPNLeaseStatusActive, ExpiresAt: expiresAt}, nil
}
func (f *fakeRepository) RenewNodeVPNAssignmentLease(_ context.Context, input RenewNodeVPNAssignmentLeaseInput, expiresAt time.Time) (VPNConnectionLease, error) {
if f.renewVPNLeaseErr != nil {
return VPNConnectionLease{}, f.renewVPNLeaseErr
}
return VPNConnectionLease{ID: input.LeaseID, VPNConnectionID: input.VPNConnectionID, ClusterID: input.ClusterID, OwnerNodeID: input.OwnerNodeID, Status: VPNLeaseStatusActive, ExpiresAt: expiresAt}, nil
}
func (f *fakeRepository) ReleaseVPNConnectionLease(_ context.Context, input ReleaseVPNConnectionLeaseInput) (VPNConnectionLease, error) {
return VPNConnectionLease{ID: input.LeaseID, VPNConnectionID: input.VPNConnectionID, ClusterID: input.ClusterID, OwnerNodeID: input.OwnerNodeID, FencingToken: input.FencingToken, Status: VPNLeaseStatusReleased}, nil
}
func (f *fakeRepository) FenceVPNConnectionLease(_ context.Context, input FenceVPNConnectionLeaseInput) (VPNConnectionLease, error) {
return VPNConnectionLease{ID: input.LeaseID, VPNConnectionID: input.VPNConnectionID, ClusterID: input.ClusterID, Status: VPNLeaseStatusFenced}, nil
}
func (f *fakeRepository) GetActiveVPNConnectionLease(context.Context, string, string) (VPNConnectionLease, error) {
return VPNConnectionLease{ID: "lease-1", Status: VPNLeaseStatusActive}, nil
}
func (f *fakeRepository) CheckVPNLeaseOwnerEligibility(context.Context, string, string, string) (VPNLeaseOwnerEligibility, error) {
if f.ownerEligibilityErr != nil {
return VPNLeaseOwnerEligibility{}, f.ownerEligibilityErr
}
if f.ownerEligibility.VPNConnectionID != "" {
return f.ownerEligibility, nil
}
return VPNLeaseOwnerEligibility{
VPNConnectionID: "vpn-1",
ClusterID: "cluster-1",
OrganizationID: "org-1",
OwnerNodeID: "node-1",
MembershipStatus: "active",
NodeRegistrationStatus: NodeRegistrationActive,
AllowedByPolicy: true,
HasAuthorizedRole: true,
}, nil
}
func (f *fakeRepository) ExpireStaleVPNConnectionLeases(context.Context, string, time.Time) ([]VPNConnectionLease, error) {
return f.expiredVPNLeases, nil
}
func (f *fakeRepository) ListNodeVPNAssignments(context.Context, string, string) ([]NodeVPNAssignment, error) {
return f.nodeVPNAssignments, nil
}
func (f *fakeRepository) ReportNodeVPNAssignmentStatus(_ context.Context, input ReportNodeVPNAssignmentStatusInput) (NodeVPNAssignmentStatus, error) {
return NodeVPNAssignmentStatus{
ID: "status-1",
VPNConnectionID: input.VPNConnectionID,
ClusterID: input.ClusterID,
NodeID: input.NodeID,
ObservedStatus: input.ObservedStatus,
StatusPayload: input.StatusPayload,
ObservedAt: input.ObservedAt,
}, nil
}
func (f *fakeRepository) GetVPNClientProfile(
_ context.Context,
clusterID, organizationID, userID, preferredEntryNodeID, preferredExitNodeID string,
generatedAt time.Time,
) (VPNClientProfile, error) {
f.lastPreferredEntryNodeID = preferredEntryNodeID
f.lastPreferredExitNodeID = preferredExitNodeID
if f.vpnClientProfile.SchemaVersion != "" {
profile := f.vpnClientProfile
profile.ClusterID = clusterID
profile.OrganizationID = organizationID
profile.UserID = userID
profile.GeneratedAt = generatedAt
return profile, nil
}
return VPNClientProfile{
SchemaVersion: "rap.vpn_client_profile.v1",
ClusterID: clusterID,
OrganizationID: organizationID,
UserID: userID,
GeneratedAt: generatedAt,
}, nil
}
func (f *fakeRepository) RecordAudit(_ context.Context, event ClusterAuditEvent) error {
f.auditEvents = append(f.auditEvents, event)
return nil
}
func (f *fakeRepository) ListAuditEvents(_ context.Context, input ListAuditEventsInput) ([]ClusterAuditEvent, error) {
limit := input.Limit
if limit <= 0 || limit > 200 {
limit = 100
}
eventTypes := map[string]bool{}
for _, eventType := range trimStringSlice(input.EventTypes) {
eventTypes[eventType] = true
}
targetTypes := map[string]bool{}
for _, targetType := range trimStringSlice(input.TargetTypes) {
targetTypes[targetType] = true
}
out := []ClusterAuditEvent{}
for _, event := range f.auditEvents {
if event.ClusterID != nil && input.ClusterID != "" && *event.ClusterID != input.ClusterID {
continue
}
if len(eventTypes) > 0 && !eventTypes[event.EventType] {
continue
}
if len(targetTypes) > 0 && !targetTypes[event.TargetType] {
continue
}
out = append(out, event)
if len(out) >= limit {
break
}
}
return out, nil
}
func stringPtr(value string) *string {
return &value
}