package cluster import ( "context" "crypto/rand" "encoding/hex" "encoding/json" "errors" "sort" "strings" "time" "github.com/jackc/pgx/v5" "github.com/example/remote-access-platform/backend/internal/platform/clusterauth" ) var ( ErrAccessDenied = errors.New("platform admin role is required") ErrInvalidPayload = errors.New("invalid cluster payload") ErrInvalidJoinToken = errors.New("invalid or expired join token") ErrInvalidNodeRole = errors.New("invalid node role") ErrInvalidCluster = errors.New("cluster not found") ErrInvalidJoinRequest = errors.New("join request not found") ErrClusterReadOnly = errors.New("cluster is not authoritative for policy mutation") ErrInvalidVPNConnection = errors.New("vpn connection not found") ErrInvalidVPNLease = errors.New("vpn connection lease not found") ErrVPNLeaseAlreadyActive = errors.New("vpn connection already has an active lease") ErrVPNLeaseOwnerNotAllowed = errors.New("vpn lease owner is not allowed") ErrVPNLeaseOwnerRoleRequired = errors.New("vpn lease owner requires active vpn-exit or vpn-connector role") ) type Service struct { store Repository now func() time.Time } func NewService(store Repository) *Service { return &Service{store: store, now: func() time.Time { return time.Now().UTC() }} } const ( clusterJoinTokenAuthoritySchema = "rap.cluster.join_token.v1" clusterNodeApprovalAuthoritySchema = "rap.cluster.node_approval.v1" clusterMeshConfigAuthoritySchema = "rap.cluster.mesh_config_snapshot.v1" ) type clusterJoinTokenAuthorityPayload struct { SchemaVersion string `json:"schema_version"` ClusterID string `json:"cluster_id"` TokenID string `json:"token_id"` Scope json.RawMessage `json:"scope"` ExpiresAt time.Time `json:"expires_at"` MaxUses int `json:"max_uses"` CreatedByUserID *string `json:"created_by_user_id,omitempty"` IssuedAt time.Time `json:"issued_at"` ControlPlaneOnly bool `json:"control_plane_only"` ProductionForwarding bool `json:"production_forwarding"` } type clusterNodeApprovalAuthorityPayload struct { SchemaVersion string `json:"schema_version"` ClusterID string `json:"cluster_id"` JoinRequestID string `json:"join_request_id"` NodeID string `json:"node_id"` NodeFingerprint string `json:"node_fingerprint"` IdentityStatus string `json:"identity_status"` HeartbeatEndpoint string `json:"heartbeat_endpoint"` ApprovedByUserID string `json:"approved_by_user_id"` IssuedAt time.Time `json:"issued_at"` ControlPlaneOnly bool `json:"control_plane_only"` ProductionForwarding bool `json:"production_forwarding"` } type clusterMeshConfigAuthorityPayload struct { SchemaVersion string `json:"schema_version"` ClusterID string `json:"cluster_id"` LocalNodeID string `json:"local_node_id"` ConfigVersion string `json:"config_version"` ConfigSHA256 string `json:"config_sha256"` IssuedAt time.Time `json:"issued_at"` ExpiresAt time.Time `json:"expires_at"` ControlPlaneOnly bool `json:"control_plane_only"` ProductionForwarding bool `json:"production_forwarding"` } func (s *Service) ListClusters(ctx context.Context, actorUserID string) ([]Cluster, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListClusters(ctx) } func (s *Service) GetCluster(ctx context.Context, actorUserID, clusterID string) (Cluster, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return Cluster{}, err } item, err := s.store.GetCluster(ctx, clusterID) if errors.Is(err, pgx.ErrNoRows) { return Cluster{}, ErrInvalidCluster } return item, err } func (s *Service) CreateCluster(ctx context.Context, input CreateClusterInput) (Cluster, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return Cluster{}, err } input.Slug = strings.TrimSpace(input.Slug) input.Name = strings.TrimSpace(input.Name) if input.Slug == "" || input.Name == "" { return Cluster{}, ErrInvalidPayload } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return Cluster{}, errors.New("metadata must be valid json") } item, err := s.store.CreateCluster(ctx, input) if err != nil { return Cluster{}, err } auditPayload := json.RawMessage(`{}`) if authorityKey, err := s.ensureClusterAuthority(ctx, item.ID, &input.ActorUserID); err == nil { auditPayload, _ = json.Marshal(map[string]any{ "cluster_authority": map[string]any{ "key_algorithm": authorityKey.KeyAlgorithm, "public_key_fingerprint": authorityKey.PublicKeyFingerprint, }, }) } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &item.ID, ActorUserID: &input.ActorUserID, EventType: "cluster.created", TargetType: "cluster", TargetID: &item.ID, Payload: auditPayload, CreatedAt: s.now(), }) return item, nil } func (s *Service) ensureClusterAuthority(ctx context.Context, clusterID string, actorUserID *string) (ClusterAuthorityKey, error) { authorityKey, err := s.store.GetClusterAuthority(ctx, clusterID) if errors.Is(err, pgx.ErrNoRows) { return s.store.EnsureClusterAuthority(ctx, clusterID, actorUserID) } return authorityKey, err } func authorityDescriptor(authorityKey ClusterAuthorityKey) *ClusterAuthorityDescriptor { descriptor := authorityKey.ClusterAuthorityDescriptor if descriptor.SchemaVersion == "" { descriptor.SchemaVersion = clusterauth.AuthoritySchemaVersion } return &descriptor } func (s *Service) UpdateCluster(ctx context.Context, input UpdateClusterInput) (Cluster, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return Cluster{}, err } if input.ClusterID == "" { return Cluster{}, ErrInvalidCluster } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return Cluster{}, err } input.Name = strings.TrimSpace(input.Name) input.Status = strings.TrimSpace(input.Status) if input.Name == "" { return Cluster{}, ErrInvalidPayload } if input.Status == "" { input.Status = ClusterStatusActive } if input.Status != ClusterStatusActive && input.Status != ClusterStatusDisabled { return Cluster{}, ErrInvalidPayload } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return Cluster{}, errors.New("metadata must be valid json") } item, err := s.store.UpdateCluster(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return Cluster{}, ErrInvalidCluster } if err != nil { return Cluster{}, err } payload, _ := json.Marshal(map[string]any{ "name": item.Name, "status": item.Status, "region": item.Region, }) _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &item.ID, ActorUserID: &input.ActorUserID, EventType: "cluster.updated", TargetType: "cluster", TargetID: &item.ID, Payload: payload, CreatedAt: s.now(), }) return item, nil } func (s *Service) ListClusterNodes(ctx context.Context, actorUserID, clusterID string) ([]ClusterNode, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListClusterNodes(ctx, clusterID) } func (s *Service) ListNodeGroups(ctx context.Context, actorUserID, clusterID string) ([]ClusterNodeGroup, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListNodeGroups(ctx, clusterID) } func (s *Service) CreateNodeGroup(ctx context.Context, input CreateNodeGroupInput) (ClusterNodeGroup, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return ClusterNodeGroup{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return ClusterNodeGroup{}, err } input.Name = strings.TrimSpace(input.Name) if input.ClusterID == "" || input.Name == "" { return ClusterNodeGroup{}, ErrInvalidPayload } if input.Description != nil { trimmed := strings.TrimSpace(*input.Description) input.Description = &trimmed } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return ClusterNodeGroup{}, errors.New("node group metadata must be valid json") } item, err := s.store.CreateNodeGroup(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return ClusterNodeGroup{}, ErrInvalidPayload } return item, err } func (s *Service) CreateJoinToken(ctx context.Context, input CreateJoinTokenInput) (CreatedJoinToken, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return CreatedJoinToken{}, err } if input.ClusterID == "" { return CreatedJoinToken{}, ErrInvalidCluster } input.Scope = defaultJSON(input.Scope, `{}`) if !json.Valid(input.Scope) { return CreatedJoinToken{}, errors.New("scope must be valid json") } if input.ExpiresAt.IsZero() { input.ExpiresAt = defaultJoinTokenExpiry(s.now()) } if input.ExpiresAt.Before(s.now()) { return CreatedJoinToken{}, errors.New("expires_at must be in the future") } if input.MaxUses <= 0 { input.MaxUses = 1 } rawToken, err := generateJoinToken() if err != nil { return CreatedJoinToken{}, err } tokenHash, err := hashJoinToken(rawToken) if err != nil { return CreatedJoinToken{}, err } item, err := s.store.CreateJoinToken(ctx, input, tokenHash) if err != nil { return CreatedJoinToken{}, err } item, err = s.signJoinToken(ctx, input, item) if err != nil { return CreatedJoinToken{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "node_join_token.created", TargetType: "node_join_token", TargetID: &item.ID, Payload: json.RawMessage(`{"raw_token_returned_once":true}`), CreatedAt: s.now(), }) return CreatedJoinToken{NodeJoinToken: item, Token: rawToken}, nil } func (s *Service) signJoinToken(ctx context.Context, input CreateJoinTokenInput, item NodeJoinToken) (NodeJoinToken, error) { authorityKey, err := s.ensureClusterAuthority(ctx, input.ClusterID, &input.ActorUserID) if err != nil { return NodeJoinToken{}, err } payload := clusterJoinTokenAuthorityPayload{ SchemaVersion: clusterJoinTokenAuthoritySchema, ClusterID: input.ClusterID, TokenID: item.ID, Scope: item.Scope, ExpiresAt: item.ExpiresAt, MaxUses: item.MaxUses, CreatedByUserID: item.CreatedByUserID, IssuedAt: item.CreatedAt, ControlPlaneOnly: true, ProductionForwarding: false, } rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now()) if err != nil { return NodeJoinToken{}, err } return s.store.SetJoinTokenAuthority(ctx, input.ClusterID, item.ID, rawPayload, signature) } func (s *Service) CreateJoinRequest(ctx context.Context, input CreateJoinRequestInput) (NodeJoinRequest, error) { if input.ClusterID == "" { return NodeJoinRequest{}, ErrInvalidCluster } if err := s.store.ExpireJoinTokens(ctx, input.ClusterID); err != nil { return NodeJoinRequest{}, err } input.NodeName = strings.TrimSpace(input.NodeName) input.NodeFingerprint = strings.TrimSpace(input.NodeFingerprint) input.PublicKey = strings.TrimSpace(input.PublicKey) if input.NodeName == "" || input.NodeFingerprint == "" || input.PublicKey == "" { return NodeJoinRequest{}, ErrInvalidPayload } input.ReportedCapabilities = defaultJSON(input.ReportedCapabilities, `{}`) input.ReportedFacts = defaultJSON(input.ReportedFacts, `{}`) input.RequestedRoles = defaultJSON(input.RequestedRoles, `[]`) if !json.Valid(input.ReportedCapabilities) || !json.Valid(input.ReportedFacts) || !json.Valid(input.RequestedRoles) { return NodeJoinRequest{}, errors.New("reported_capabilities, reported_facts, and requested_roles must be valid json") } tokenHash, err := hashJoinToken(input.JoinToken) if err != nil { return NodeJoinRequest{}, ErrInvalidJoinToken } token, err := s.store.GetValidJoinTokenByHash(ctx, input.ClusterID, tokenHash) if err != nil { if errors.Is(err, pgx.ErrNoRows) { return NodeJoinRequest{}, ErrInvalidJoinToken } return NodeJoinRequest{}, err } item, err := s.store.CreateJoinRequest(ctx, input, token.ID) if err != nil { if errors.Is(err, pgx.ErrNoRows) { return NodeJoinRequest{}, ErrInvalidJoinToken } return NodeJoinRequest{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, EventType: "node_join_request.created", TargetType: "node_join_request", TargetID: &item.ID, Payload: json.RawMessage(`{"source":"node_agent"}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListJoinRequests(ctx context.Context, actorUserID, clusterID string) ([]NodeJoinRequest, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListJoinRequests(ctx, clusterID) } func (s *Service) GetJoinRequestBootstrap(ctx context.Context, input GetJoinRequestBootstrapInput) (JoinRequestBootstrapResult, error) { input.ClusterID = strings.TrimSpace(input.ClusterID) input.JoinRequestID = strings.TrimSpace(input.JoinRequestID) input.NodeFingerprint = strings.TrimSpace(input.NodeFingerprint) input.PublicKey = strings.TrimSpace(input.PublicKey) if input.ClusterID == "" || input.JoinRequestID == "" || input.NodeFingerprint == "" || input.PublicKey == "" { return JoinRequestBootstrapResult{}, ErrInvalidJoinRequest } item, err := s.store.GetJoinRequestForBootstrap(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return JoinRequestBootstrapResult{}, ErrInvalidJoinRequest } if err != nil { return JoinRequestBootstrapResult{}, err } result := JoinRequestBootstrapResult{Status: item.Status, JoinRequest: item} if item.Status != JoinRequestStatusApproved { return result, nil } bootstrap, updated, err := s.bootstrapForApprovedJoinRequest(ctx, item) if err != nil { return JoinRequestBootstrapResult{}, err } result.JoinRequest = updated result.Bootstrap = &bootstrap return result, nil } func (s *Service) RevokeJoinToken(ctx context.Context, input RevokeJoinTokenInput) (NodeJoinToken, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return NodeJoinToken{}, err } item, err := s.store.RevokeJoinToken(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return NodeJoinToken{}, ErrInvalidJoinToken } if err != nil { return NodeJoinToken{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "node_join_token.revoked", TargetType: "node_join_token", TargetID: &input.TokenID, Payload: json.RawMessage(`{}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ApproveJoinRequest(ctx context.Context, input ApproveJoinRequestInput) (ApprovedJoinRequest, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return ApprovedJoinRequest{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return ApprovedJoinRequest{}, err } if input.ClusterID == "" || input.JoinRequestID == "" { return ApprovedJoinRequest{}, ErrInvalidJoinRequest } item, err := s.store.ApproveJoinRequest(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return ApprovedJoinRequest{}, ErrInvalidJoinRequest } if err != nil { return ApprovedJoinRequest{}, err } item, err = s.signApprovedJoinRequest(ctx, input, item) if err != nil { return ApprovedJoinRequest{}, err } return item, nil } func (s *Service) signApprovedJoinRequest(ctx context.Context, input ApproveJoinRequestInput, item ApprovedJoinRequest) (ApprovedJoinRequest, error) { authorityKey, err := s.ensureClusterAuthority(ctx, input.ClusterID, &input.ActorUserID) if err != nil { return ApprovedJoinRequest{}, err } if item.Bootstrap.HeartbeatEndpoint == "" { item.Bootstrap.HeartbeatEndpoint = nodeHeartbeatEndpoint(input.ClusterID, item.Bootstrap.NodeID) } payload := clusterNodeApprovalAuthorityPayload{ SchemaVersion: clusterNodeApprovalAuthoritySchema, ClusterID: input.ClusterID, JoinRequestID: item.JoinRequest.ID, NodeID: item.Bootstrap.NodeID, NodeFingerprint: item.JoinRequest.NodeFingerprint, IdentityStatus: item.Bootstrap.IdentityStatus, HeartbeatEndpoint: item.Bootstrap.HeartbeatEndpoint, ApprovedByUserID: input.ActorUserID, IssuedAt: s.now(), ControlPlaneOnly: true, ProductionForwarding: false, } rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now()) if err != nil { return ApprovedJoinRequest{}, err } updated, err := s.store.SetJoinRequestApprovalAuthority(ctx, input.ClusterID, item.JoinRequest.ID, rawPayload, signature) if err != nil { return ApprovedJoinRequest{}, err } item.JoinRequest = updated item.Bootstrap.ClusterAuthority = authorityDescriptor(authorityKey) item.Bootstrap.AuthorityPayload = rawPayload item.Bootstrap.AuthoritySignature = &signature return item, nil } func (s *Service) bootstrapForApprovedJoinRequest(ctx context.Context, item NodeJoinRequest) (NodeBootstrap, NodeJoinRequest, error) { if item.Status != JoinRequestStatusApproved || item.ApprovedNodeID == nil || strings.TrimSpace(*item.ApprovedNodeID) == "" { return NodeBootstrap{}, NodeJoinRequest{}, ErrInvalidJoinRequest } authorityKey, err := s.ensureClusterAuthority(ctx, item.ClusterID, item.ReviewedByUserID) if err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } heartbeatEndpoint := nodeHeartbeatEndpoint(item.ClusterID, *item.ApprovedNodeID) identityStatus := NodeRegistrationActive if rawMessageEmpty(item.ApprovalPayload) || rawMessageEmpty(item.ApprovalSignature) { approvedBy := "system" if item.ReviewedByUserID != nil && strings.TrimSpace(*item.ReviewedByUserID) != "" { approvedBy = strings.TrimSpace(*item.ReviewedByUserID) } payload := clusterNodeApprovalAuthorityPayload{ SchemaVersion: clusterNodeApprovalAuthoritySchema, ClusterID: item.ClusterID, JoinRequestID: item.ID, NodeID: *item.ApprovedNodeID, NodeFingerprint: item.NodeFingerprint, IdentityStatus: identityStatus, HeartbeatEndpoint: heartbeatEndpoint, ApprovedByUserID: approvedBy, IssuedAt: s.now(), ControlPlaneOnly: true, ProductionForwarding: false, } rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now()) if err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } item, err = s.store.SetJoinRequestApprovalAuthority(ctx, item.ClusterID, item.ID, rawPayload, signature) if err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } } else { var signature ClusterSignature if err := json.Unmarshal(item.ApprovalSignature, &signature); err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } if err := clusterauth.VerifyRaw(authorityKey.PublicKey, item.ApprovalPayload, signature); err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } } var signature ClusterSignature if err := json.Unmarshal(item.ApprovalSignature, &signature); err != nil { return NodeBootstrap{}, NodeJoinRequest{}, err } bootstrap := NodeBootstrap{ NodeID: *item.ApprovedNodeID, ClusterID: item.ClusterID, IdentityStatus: identityStatus, Certificate: map[string]any{ "status": "pending_issuer_integration", }, HeartbeatEndpoint: heartbeatEndpoint, ClusterAuthority: authorityDescriptor(authorityKey), AuthorityPayload: item.ApprovalPayload, AuthoritySignature: &signature, } return bootstrap, item, nil } func nodeHeartbeatEndpoint(clusterID, nodeID string) string { return "/api/v1/clusters/" + clusterID + "/nodes/" + nodeID + "/heartbeats" } func rawMessageEmpty(raw json.RawMessage) bool { value := strings.TrimSpace(string(raw)) return value == "" || value == "{}" || value == "null" } func (s *Service) RejectJoinRequest(ctx context.Context, input RejectJoinRequestInput) (NodeJoinRequest, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return NodeJoinRequest{}, err } input.Reason = strings.TrimSpace(input.Reason) if input.Reason == "" { input.Reason = "Rejected by platform administrator." } item, err := s.store.RejectJoinRequest(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return NodeJoinRequest{}, ErrInvalidJoinRequest } return item, err } func (s *Service) AssignNodeRole(ctx context.Context, input AssignNodeRoleInput) (NodeRoleAssignment, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return NodeRoleAssignment{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return NodeRoleAssignment{}, err } if !isAllowedNodeRole(input.Role) { return NodeRoleAssignment{}, ErrInvalidNodeRole } if input.Status == "" { input.Status = "active" } if input.Status != "active" && input.Status != "disabled" && input.Status != "revoked" { return NodeRoleAssignment{}, ErrInvalidPayload } input.Policy = defaultJSON(input.Policy, `{}`) if !json.Valid(input.Policy) { return NodeRoleAssignment{}, errors.New("policy must be valid json") } item, err := s.store.AssignNodeRole(ctx, input) if err != nil { return NodeRoleAssignment{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "node_role." + input.Status, TargetType: "node", TargetID: &input.NodeID, Payload: json.RawMessage(`{"capability_is_not_permission":true}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListNodeRoleAssignments(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeRoleAssignment, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListNodeRoleAssignments(ctx, clusterID, nodeID) } func (s *Service) AttachExistingNodeToCluster(ctx context.Context, input AttachExistingNodeInput) (ClusterNode, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return ClusterNode{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return ClusterNode{}, err } if input.ClusterID == "" || input.NodeID == "" { return ClusterNode{}, ErrInvalidPayload } for _, role := range input.Roles { if !isAllowedNodeRole(role) { return ClusterNode{}, ErrInvalidNodeRole } } item, err := s.store.AttachExistingNodeToCluster(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return ClusterNode{}, ErrInvalidPayload } return item, err } func (s *Service) AssignNodeToGroup(ctx context.Context, input AssignNodeGroupInput) (ClusterNode, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return ClusterNode{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return ClusterNode{}, err } if input.ClusterID == "" || input.NodeID == "" { return ClusterNode{}, ErrInvalidPayload } if input.GroupID != nil { trimmed := strings.TrimSpace(*input.GroupID) if trimmed == "" { input.GroupID = nil } else { input.GroupID = &trimmed } } item, err := s.store.AssignNodeToGroup(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return ClusterNode{}, ErrInvalidPayload } return item, err } func (s *Service) RevokeNodeIdentity(ctx context.Context, input RevokeNodeIdentityInput) error { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return err } input.Reason = strings.TrimSpace(input.Reason) if input.Reason == "" { input.Reason = "revoked by platform administrator" } if err := s.store.RevokeNodeIdentity(ctx, input); err != nil { if errors.Is(err, pgx.ErrNoRows) { return ErrInvalidPayload } return err } return nil } func (s *Service) DisableClusterMembership(ctx context.Context, input DisableMembershipInput) error { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return err } input.Reason = strings.TrimSpace(input.Reason) if input.Reason == "" { input.Reason = "disabled by platform administrator" } if err := s.store.DisableClusterMembership(ctx, input); err != nil { if errors.Is(err, pgx.ErrNoRows) { return ErrInvalidPayload } return err } return nil } func (s *Service) RecordHeartbeat(ctx context.Context, input RecordHeartbeatInput) (NodeHeartbeat, error) { if input.ClusterID == "" || input.NodeID == "" { return NodeHeartbeat{}, ErrInvalidPayload } if input.HealthStatus == "" { input.HealthStatus = "unknown" } input.Capabilities = defaultJSON(input.Capabilities, `{}`) input.ServiceStates = defaultJSON(input.ServiceStates, `{}`) input.Metadata = defaultJSON(input.Metadata, `{}`) return s.store.RecordHeartbeat(ctx, input) } func (s *Service) ListNodeHeartbeats(ctx context.Context, actorUserID, clusterID, nodeID string, limit int) ([]NodeHeartbeat, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, limit) } func (s *Service) UpsertFabricTestingFlag(ctx context.Context, input UpsertFabricTestingFlagInput) (FabricTestingFlag, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return FabricTestingFlag{}, err } input.ScopeType = strings.TrimSpace(input.ScopeType) if input.ScopeType == "" { return FabricTestingFlag{}, ErrInvalidPayload } switch input.ScopeType { case "platform": input.ScopeID = nil case "organization", "node": if input.ScopeID == nil || strings.TrimSpace(*input.ScopeID) == "" { return FabricTestingFlag{}, ErrInvalidPayload } default: return FabricTestingFlag{}, ErrInvalidPayload } if input.HistoryRetentionHours <= 0 { input.HistoryRetentionHours = 24 } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return FabricTestingFlag{}, errors.New("testing flag metadata must be valid json") } item, err := s.store.UpsertFabricTestingFlag(ctx, input) if err != nil { return FabricTestingFlag{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "fabric.testing_flag.updated", TargetType: input.ScopeType, TargetID: input.ScopeID, Payload: json.RawMessage(`{"runtime_mesh_enabled":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListFabricTestingFlags(ctx context.Context, actorUserID string) ([]FabricTestingFlag, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListFabricTestingFlags(ctx) } func (s *Service) GetEffectiveNodeTestingFlags(ctx context.Context, clusterID, nodeID string) (EffectiveNodeTestingFlags, error) { if clusterID == "" || nodeID == "" { return EffectiveNodeTestingFlags{}, ErrInvalidPayload } return s.store.GetEffectiveNodeTestingFlags(ctx, clusterID, nodeID) } func (s *Service) GetNodeSyntheticMeshConfig(ctx context.Context, input GetNodeSyntheticMeshConfigInput) (NodeSyntheticMeshConfig, error) { input.ClusterID = strings.TrimSpace(input.ClusterID) input.NodeID = strings.TrimSpace(input.NodeID) if input.ClusterID == "" || input.NodeID == "" { return NodeSyntheticMeshConfig{}, ErrInvalidPayload } cfg := NodeSyntheticMeshConfig{ Enabled: false, SchemaVersion: "c17z18.synthetic.v1", ClusterID: input.ClusterID, LocalNodeID: input.NodeID, AuthorityRequired: true, ConfigVersion: "disabled", PeerDirectoryVersion: "disabled", PolicyVersion: "disabled", PeerEndpoints: map[string]string{}, PeerEndpointCandidates: map[string][]PeerEndpointCandidate{}, PeerDirectory: []PeerDirectoryEntry{}, RecoverySeeds: []PeerRecoverySeed{}, RendezvousLeases: []PeerRendezvousLease{}, Routes: []SyntheticMeshRouteConfig{}, ProductionForwarding: false, } flags, err := s.store.GetEffectiveNodeTestingFlags(ctx, input.ClusterID, input.NodeID) if err != nil { return NodeSyntheticMeshConfig{}, err } if !flags.Enabled || !flags.SyntheticLinksEnabled { return s.signSyntheticMeshConfig(ctx, cfg) } intents, err := s.store.ListRouteIntents(ctx, input.ClusterID) if err != nil { return NodeSyntheticMeshConfig{}, err } cfg.Enabled = true cfg.ConfigVersion = "c17z18-" + s.now().UTC().Format("20060102T150405Z") cfg.PeerDirectoryVersion = cfg.ConfigVersion cfg.PolicyVersion = cfg.ConfigVersion meshLinks, err := s.store.ListMeshLinks(ctx, input.ClusterID) if err != nil { return NodeSyntheticMeshConfig{}, err } relayPolicy := newRendezvousRelayPolicy(input.NodeID, meshLinks, s.now()) peerDirectory := map[string]*PeerDirectoryEntry{} recoverySeeds := map[string]PeerRecoverySeed{} rendezvousLeases := map[string]PeerRendezvousLease{} routePathDecisions := []RoutePathDecision{} for _, intent := range intents { route, peers, candidates, seeds, policyLeases, ok := s.syntheticRouteFromIntent(input, intent) if !ok { continue } reportedPeers, reportedCandidates, err := s.reportedEndpointConfig(ctx, input.ClusterID, input.NodeID, route.Hops) if err != nil { return NodeSyntheticMeshConfig{}, err } feedback, err := s.rendezvousRelayFeedback(ctx, input.ClusterID, route.Hops, s.now()) if err != nil { return NodeSyntheticMeshConfig{}, err } relayPolicy.addFeedback(feedback) replacementHints, err := s.rendezvousRelayReplacementHints(ctx, input.ClusterID, route.Hops, s.now()) if err != nil { return NodeSyntheticMeshConfig{}, err } relayPolicy.addReplacementHints(replacementHints) relayPolicy.addFeedback(replacementHintFeedback(replacementHints, s.now())) relayPolicy.addFeedback(rendezvousRelayRouteHealthFeedback(input.NodeID, route, meshLinks, s.now())) for nodeID, endpoint := range reportedPeers { peers[nodeID] = endpoint } for nodeID, items := range reportedCandidates { candidates[nodeID] = append(candidates[nodeID], items...) } routeLeases := scopedRendezvousLeases(policyLeases, route, input.NodeID, relayPolicy, s.now()) routeLeases = append(routeLeases, derivedRendezvousLeases(route, peers, candidates, input.NodeID, relayPolicy, s.now())...) cfg.Routes = append(cfg.Routes, route) routePathDecisions = append(routePathDecisions, routePathDecisionForRoute(route, input.NodeID, routeLeases, relayPolicy, cfg.ConfigVersion)) mergePeerDirectoryRoute(peerDirectory, route, input.NodeID) for nodeID, endpoint := range peers { if strings.TrimSpace(nodeID) != "" && strings.TrimSpace(endpoint) != "" { cfg.PeerEndpoints[nodeID] = endpoint peerDirectoryEntry(peerDirectory, nodeID).EndpointCount++ } } for nodeID, nodeCandidates := range candidates { if strings.TrimSpace(nodeID) == "" || len(nodeCandidates) == 0 { continue } cfg.PeerEndpointCandidates[nodeID] = append(cfg.PeerEndpointCandidates[nodeID], nodeCandidates...) mergePeerDirectoryCandidates(peerDirectory, nodeID, nodeCandidates) } mergeRecoverySeeds(recoverySeeds, seeds) mergeRendezvousLeases(rendezvousLeases, routeLeases) } cfg.RecoverySeeds = sortedRecoverySeeds(recoverySeeds, maxScopedRecoverySeeds) cfg.RendezvousLeases = sortedRendezvousLeases(rendezvousLeases, maxScopedRendezvousLeases) cfg.RendezvousRelayPolicy = relayPolicy.report() cfg.RoutePathDecisions = routePathDecisionReport(cfg.ConfigVersion, routePathDecisions) markPeerDirectoryRecoverySeeds(peerDirectory, cfg.RecoverySeeds) markPeerDirectoryRendezvousLeases(peerDirectory, cfg.RendezvousLeases, input.NodeID) cfg.PeerDirectory = sortedPeerDirectory(peerDirectory) return s.signSyntheticMeshConfig(ctx, cfg) } func (s *Service) signSyntheticMeshConfig(ctx context.Context, cfg NodeSyntheticMeshConfig) (NodeSyntheticMeshConfig, error) { authorityKey, err := s.ensureClusterAuthority(ctx, cfg.ClusterID, nil) if err != nil { return NodeSyntheticMeshConfig{}, err } cfg.AuthorityRequired = true cfg.ClusterAuthority = authorityDescriptor(authorityKey) unsigned := cfg unsigned.AuthorityPayload = nil unsigned.AuthoritySignature = nil rawConfig, err := json.Marshal(unsigned) if err != nil { return NodeSyntheticMeshConfig{}, err } configHash, err := clusterauth.HashRaw(rawConfig) if err != nil { return NodeSyntheticMeshConfig{}, err } issuedAt := s.now().UTC() payload := clusterMeshConfigAuthorityPayload{ SchemaVersion: clusterMeshConfigAuthoritySchema, ClusterID: cfg.ClusterID, LocalNodeID: cfg.LocalNodeID, ConfigVersion: cfg.ConfigVersion, ConfigSHA256: configHash, IssuedAt: issuedAt, ExpiresAt: issuedAt.Add(5 * time.Minute), ControlPlaneOnly: true, ProductionForwarding: false, } rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, issuedAt) if err != nil { return NodeSyntheticMeshConfig{}, err } cfg.AuthorityPayload = rawPayload cfg.AuthoritySignature = &signature return cfg, nil } func (s *Service) RecordNodeTelemetry(ctx context.Context, input RecordNodeTelemetryInput) (NodeTelemetryObservation, error) { if input.ClusterID == "" || input.NodeID == "" { return NodeTelemetryObservation{}, ErrInvalidPayload } input.Payload = defaultJSON(input.Payload, `{}`) if !json.Valid(input.Payload) { return NodeTelemetryObservation{}, errors.New("telemetry payload must be valid json") } if input.ObservedAt.IsZero() { input.ObservedAt = s.now() } return s.store.RecordNodeTelemetry(ctx, input) } func (s *Service) ListNodeTelemetry(ctx context.Context, actorUserID, clusterID, nodeID string, limit int) ([]NodeTelemetryObservation, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListNodeTelemetry(ctx, clusterID, nodeID, limit) } func (s *Service) SetDesiredWorkload(ctx context.Context, input SetDesiredWorkloadInput) (NodeWorkloadDesiredState, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return NodeWorkloadDesiredState{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return NodeWorkloadDesiredState{}, err } input.ServiceType = strings.TrimSpace(input.ServiceType) if input.ClusterID == "" || input.NodeID == "" || input.ServiceType == "" { return NodeWorkloadDesiredState{}, ErrInvalidPayload } if input.DesiredState == "" { input.DesiredState = "disabled" } if input.RuntimeMode == "" { input.RuntimeMode = "container" } input.Config = defaultJSON(input.Config, `{}`) input.Environment = defaultJSON(input.Environment, `{}`) if !json.Valid(input.Config) || !json.Valid(input.Environment) { return NodeWorkloadDesiredState{}, errors.New("config and environment must be valid json") } item, err := s.store.SetDesiredWorkload(ctx, input) if err != nil { return NodeWorkloadDesiredState{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "node_workload.desired_state_set", TargetType: "node", TargetID: &input.NodeID, Payload: json.RawMessage(`{"supervision_runtime":"stub_c5"}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListDesiredWorkloads(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeWorkloadDesiredState, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } if clusterID == "" || nodeID == "" { return nil, ErrInvalidPayload } return s.store.ListDesiredWorkloads(ctx, clusterID, nodeID) } func (s *Service) ReportWorkloadStatus(ctx context.Context, input ReportWorkloadStatusInput) (NodeWorkloadStatus, error) { input.ServiceType = strings.TrimSpace(input.ServiceType) if input.ClusterID == "" || input.NodeID == "" || input.ServiceType == "" { return NodeWorkloadStatus{}, ErrInvalidPayload } if input.ReportedState == "" { input.ReportedState = "unknown" } if input.RuntimeMode == "" { input.RuntimeMode = "container" } input.StatusPayload = defaultJSON(input.StatusPayload, `{}`) if !json.Valid(input.StatusPayload) { return NodeWorkloadStatus{}, errors.New("status_payload must be valid json") } return s.store.ReportWorkloadStatus(ctx, input) } func (s *Service) ListLatestWorkloadStatuses(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeWorkloadStatus, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListLatestWorkloadStatuses(ctx, clusterID, nodeID) } func (s *Service) ReportMeshLink(ctx context.Context, input ReportMeshLinkInput) (MeshLinkObservation, error) { if input.ClusterID == "" || input.SourceNodeID == "" || input.TargetNodeID == "" { return MeshLinkObservation{}, ErrInvalidPayload } if input.LinkStatus == "" { input.LinkStatus = "unknown" } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return MeshLinkObservation{}, errors.New("metadata must be valid json") } return s.store.ReportMeshLink(ctx, input) } func (s *Service) ListMeshLinks(ctx context.Context, actorUserID, clusterID string) ([]MeshLinkObservation, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListMeshLinks(ctx, clusterID) } func (s *Service) CreateRouteIntent(ctx context.Context, input CreateRouteIntentInput) (MeshRouteIntent, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return MeshRouteIntent{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return MeshRouteIntent{}, err } if input.ClusterID == "" || input.ServiceClass == "" { return MeshRouteIntent{}, ErrInvalidPayload } if input.Priority == 0 { input.Priority = 100 } input.SourceSelector = defaultJSON(input.SourceSelector, `{}`) input.DestinationSelector = defaultJSON(input.DestinationSelector, `{}`) input.Policy = defaultJSON(input.Policy, `{}`) if !json.Valid(input.SourceSelector) || !json.Valid(input.DestinationSelector) || !json.Valid(input.Policy) { return MeshRouteIntent{}, errors.New("source_selector, destination_selector, and policy must be valid json") } item, err := s.store.CreateRouteIntent(ctx, input) if err != nil { return MeshRouteIntent{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "mesh.route_intent.created", TargetType: "mesh_route_intent", TargetID: &item.ID, Payload: json.RawMessage(`{"traffic_forwarding_enabled":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListRouteIntents(ctx context.Context, actorUserID, clusterID string) ([]MeshRouteIntent, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListRouteIntents(ctx, clusterID) } func (s *Service) ListQoSPolicies(ctx context.Context, actorUserID, clusterID string) ([]MeshQoSPolicy, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListQoSPolicies(ctx, clusterID) } func (s *Service) ListFabricEntryPoints(ctx context.Context, actorUserID, clusterID string) ([]FabricEntryPoint, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListFabricEntryPoints(ctx, clusterID) } func (s *Service) CreateFabricEntryPoint(ctx context.Context, input CreateFabricEntryPointInput) (FabricEntryPoint, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return FabricEntryPoint{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return FabricEntryPoint{}, err } input.Name = strings.TrimSpace(input.Name) input.Status = strings.TrimSpace(input.Status) input.EndpointType = strings.TrimSpace(input.EndpointType) if input.Status == "" { input.Status = "active" } if input.EndpointType == "" { input.EndpointType = "client_access" } if input.ClusterID == "" || input.Name == "" || !isFabricEndpointStatus(input.Status) || !isFabricEntryPointType(input.EndpointType) { return FabricEntryPoint{}, ErrInvalidPayload } if input.PublicEndpoint != nil { trimmed := strings.TrimSpace(*input.PublicEndpoint) if trimmed == "" { input.PublicEndpoint = nil } else { input.PublicEndpoint = &trimmed } } input.Policy = defaultJSON(input.Policy, `{}`) input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Policy) || !json.Valid(input.Metadata) { return FabricEntryPoint{}, errors.New("entry point policy and metadata must be valid json") } item, err := s.store.CreateFabricEntryPoint(ctx, input) if err != nil { return FabricEntryPoint{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "fabric.entry_point.created", TargetType: "fabric_entry_point", TargetID: &item.ID, Payload: json.RawMessage(`{"runtime_routing_enabled":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) SetFabricEntryPointNode(ctx context.Context, input SetFabricEntryPointNodeInput) (FabricEntryPointNode, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return FabricEntryPointNode{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return FabricEntryPointNode{}, err } input.Status = strings.TrimSpace(input.Status) if input.Status == "" { input.Status = "active" } if input.Priority <= 0 { input.Priority = 100 } if input.ClusterID == "" || input.EntryPointID == "" || input.NodeID == "" || !isFabricEndpointStatus(input.Status) { return FabricEntryPointNode{}, ErrInvalidPayload } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return FabricEntryPointNode{}, errors.New("entry point node metadata must be valid json") } return s.store.SetFabricEntryPointNode(ctx, input) } func (s *Service) ListFabricEntryPointNodes(ctx context.Context, actorUserID, clusterID, entryPointID string) ([]FabricEntryPointNode, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } if clusterID == "" || entryPointID == "" { return nil, ErrInvalidPayload } return s.store.ListFabricEntryPointNodes(ctx, clusterID, entryPointID) } func (s *Service) ListFabricEgressPools(ctx context.Context, actorUserID, clusterID string) ([]FabricEgressPool, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListFabricEgressPools(ctx, clusterID) } func (s *Service) CreateFabricEgressPool(ctx context.Context, input CreateFabricEgressPoolInput) (FabricEgressPool, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return FabricEgressPool{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return FabricEgressPool{}, err } input.Name = strings.TrimSpace(input.Name) input.Status = strings.TrimSpace(input.Status) if input.Status == "" { input.Status = "active" } if input.ClusterID == "" || input.Name == "" || !isFabricEndpointStatus(input.Status) { return FabricEgressPool{}, ErrInvalidPayload } if input.Description != nil { trimmed := strings.TrimSpace(*input.Description) if trimmed == "" { input.Description = nil } else { input.Description = &trimmed } } input.RouteScope = defaultJSON(input.RouteScope, `{}`) input.Policy = defaultJSON(input.Policy, `{}`) input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.RouteScope) || !json.Valid(input.Policy) || !json.Valid(input.Metadata) { return FabricEgressPool{}, errors.New("egress pool route_scope, policy, and metadata must be valid json") } item, err := s.store.CreateFabricEgressPool(ctx, input) if err != nil { return FabricEgressPool{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "fabric.egress_pool.created", TargetType: "fabric_egress_pool", TargetID: &item.ID, Payload: json.RawMessage(`{"runtime_routing_enabled":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) SetFabricEgressPoolNode(ctx context.Context, input SetFabricEgressPoolNodeInput) (FabricEgressPoolNode, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return FabricEgressPoolNode{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return FabricEgressPoolNode{}, err } input.Status = strings.TrimSpace(input.Status) if input.Status == "" { input.Status = "active" } if input.Priority <= 0 { input.Priority = 100 } if input.ClusterID == "" || input.EgressPoolID == "" || input.NodeID == "" || !isFabricEndpointStatus(input.Status) { return FabricEgressPoolNode{}, ErrInvalidPayload } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return FabricEgressPoolNode{}, errors.New("egress pool node metadata must be valid json") } return s.store.SetFabricEgressPoolNode(ctx, input) } func (s *Service) ListFabricEgressPoolNodes(ctx context.Context, actorUserID, clusterID, egressPoolID string) ([]FabricEgressPoolNode, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } if clusterID == "" || egressPoolID == "" { return nil, ErrInvalidPayload } return s.store.ListFabricEgressPoolNodes(ctx, clusterID, egressPoolID) } func (s *Service) GetClusterAuthorityState(ctx context.Context, actorUserID, clusterID string) (ClusterAuthorityState, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return ClusterAuthorityState{}, err } return s.store.GetClusterAuthorityState(ctx, clusterID) } func (s *Service) UpdateClusterAuthorityState(ctx context.Context, input UpdateClusterAuthorityInput) (ClusterAuthorityState, error) { role, err := s.store.GetPlatformRole(ctx, strings.TrimSpace(input.ActorUserID)) if err != nil { return ClusterAuthorityState{}, err } if !isPlatformAdminRole(role) { return ClusterAuthorityState{}, ErrAccessDenied } if input.MutationMode == "recovery_override" && role != PlatformRoleRecoveryAdmin { return ClusterAuthorityState{}, ErrAccessDenied } if input.AuthorityState == "" { input.AuthorityState = "authoritative" } if input.MutationMode == "" { input.MutationMode = "normal" } item, err := s.store.UpdateClusterAuthorityState(ctx, input) if err != nil { return ClusterAuthorityState{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "cluster_authority.updated", TargetType: "cluster", TargetID: &input.ClusterID, Payload: json.RawMessage(`{"split_brain_guard":true}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListClusterAdminSummaries(ctx context.Context, actorUserID string) ([]ClusterAdminSummary, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListClusterAdminSummaries(ctx) } func (s *Service) CreateVPNConnection(ctx context.Context, input CreateVPNConnectionInput) (VPNConnection, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnection{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return VPNConnection{}, err } input.Name = strings.TrimSpace(input.Name) input.ProtocolFamily = strings.TrimSpace(input.ProtocolFamily) if input.ProtocolFamily == "" { input.ProtocolFamily = "generic" } input.Mode = strings.TrimSpace(input.Mode) if input.Mode == "" { input.Mode = VPNConnectionModeSingleActive } input.DesiredState = strings.TrimSpace(input.DesiredState) if input.DesiredState == "" { input.DesiredState = VPNConnectionDesiredDisabled } if input.ClusterID == "" || input.OrganizationID == "" || input.Name == "" { return VPNConnection{}, ErrInvalidPayload } if input.Mode != VPNConnectionModeSingleActive { return VPNConnection{}, errors.New("vpn connection mode must be single_active") } if !isAllowedVPNDesiredState(input.DesiredState) { return VPNConnection{}, errors.New("vpn connection desired_state must be enabled or disabled") } input.TargetEndpoint = defaultJSON(input.TargetEndpoint, `{}`) input.AllowedNodePolicy = defaultJSON(input.AllowedNodePolicy, `{"mode":"explicit","node_ids":[]}`) input.RoutingUsage = defaultJSON(input.RoutingUsage, `[]`) input.RoutePolicy = defaultJSON(input.RoutePolicy, `{}`) input.QoSPolicy = defaultJSON(input.QoSPolicy, `{}`) input.PlacementPolicy = defaultJSON(input.PlacementPolicy, `{}`) input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.TargetEndpoint) || !json.Valid(input.AllowedNodePolicy) || !json.Valid(input.RoutingUsage) || !json.Valid(input.RoutePolicy) || !json.Valid(input.QoSPolicy) || !json.Valid(input.PlacementPolicy) || !json.Valid(input.Metadata) { return VPNConnection{}, errors.New("vpn connection json fields must be valid json") } item, err := s.store.CreateVPNConnection(ctx, input) if err != nil { return VPNConnection{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.created", TargetType: "vpn_connection", TargetID: &item.ID, Payload: json.RawMessage(`{"runtime_created":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListVPNConnections(ctx context.Context, actorUserID, clusterID string) ([]VPNConnection, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListVPNConnections(ctx, clusterID) } func (s *Service) GetVPNConnection(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) (VPNConnection, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return VPNConnection{}, err } item, err := s.store.GetVPNConnection(ctx, clusterID, vpnConnectionID) if errors.Is(err, pgx.ErrNoRows) { return VPNConnection{}, ErrInvalidVPNConnection } return item, err } func (s *Service) UpdateVPNConnectionDesiredState(ctx context.Context, input UpdateVPNConnectionDesiredStateInput) (VPNConnection, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnection{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return VPNConnection{}, err } input.DesiredState = strings.TrimSpace(input.DesiredState) if !isAllowedVPNDesiredState(input.DesiredState) { return VPNConnection{}, errors.New("vpn connection desired_state must be enabled or disabled") } item, err := s.store.UpdateVPNConnectionDesiredState(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return VPNConnection{}, ErrInvalidVPNConnection } if err != nil { return VPNConnection{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.desired_state_changed", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"runtime_executed":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) UpsertVPNConnectionRoutePolicy(ctx context.Context, input UpsertVPNConnectionRoutePolicyInput) (VPNConnectionRoutePolicy, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnectionRoutePolicy{}, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return VPNConnectionRoutePolicy{}, err } input.RouteType = strings.TrimSpace(input.RouteType) input.Destination = strings.TrimSpace(input.Destination) input.Action = strings.TrimSpace(input.Action) input.Status = strings.TrimSpace(input.Status) if input.Action == "" { input.Action = "allow" } if input.Status == "" { input.Status = "active" } if input.Priority == 0 { input.Priority = 100 } if input.ClusterID == "" || input.VPNConnectionID == "" || input.RouteType == "" || input.Destination == "" { return VPNConnectionRoutePolicy{}, ErrInvalidPayload } if !isAllowedVPNRouteType(input.RouteType) || !isAllowedVPNRouteAction(input.Action) || !isAllowedVPNPolicyStatus(input.Status) { return VPNConnectionRoutePolicy{}, ErrInvalidPayload } input.Policy = defaultJSON(input.Policy, `{}`) if !json.Valid(input.Policy) { return VPNConnectionRoutePolicy{}, errors.New("vpn route policy json must be valid json") } item, err := s.store.UpsertVPNConnectionRoutePolicy(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionRoutePolicy{}, ErrInvalidVPNConnection } if err != nil { return VPNConnectionRoutePolicy{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.route_policy_changed", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"routing_runtime_changed":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListVPNConnectionRoutePolicies(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) ([]VPNConnectionRoutePolicy, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListVPNConnectionRoutePolicies(ctx, clusterID, vpnConnectionID) } func (s *Service) SetVPNConnectionAllowedNodes(ctx context.Context, input SetVPNConnectionAllowedNodesInput) ([]VPNConnectionAllowedNode, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return nil, err } if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil { return nil, err } input.RolePreference = strings.TrimSpace(input.RolePreference) if input.RolePreference == "" { input.RolePreference = "candidate" } if input.ClusterID == "" || input.VPNConnectionID == "" { return nil, ErrInvalidPayload } if !isAllowedVPNNodePreference(input.RolePreference) { return nil, ErrInvalidPayload } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return nil, errors.New("allowed node metadata must be valid json") } nodes := make([]string, 0, len(input.NodeIDs)) seen := map[string]struct{}{} for _, nodeID := range input.NodeIDs { nodeID = strings.TrimSpace(nodeID) if nodeID == "" { continue } if _, ok := seen[nodeID]; ok { continue } seen[nodeID] = struct{}{} nodes = append(nodes, nodeID) } input.NodeIDs = nodes items, err := s.store.SetVPNConnectionAllowedNodes(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return nil, ErrInvalidVPNConnection } if err != nil { return nil, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.allowed_nodes_changed", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"node_runtime_changed":false}`), CreatedAt: s.now(), }) return items, nil } func (s *Service) ListVPNConnectionAllowedNodes(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) ([]VPNConnectionAllowedNode, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListVPNConnectionAllowedNodes(ctx, clusterID, vpnConnectionID) } func (s *Service) AcquireVPNConnectionLease(ctx context.Context, input AcquireVPNConnectionLeaseInput) (VPNConnectionLease, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnectionLease{}, err } if input.ClusterID == "" || input.VPNConnectionID == "" || input.OwnerNodeID == "" { return VPNConnectionLease{}, ErrInvalidPayload } conn, err := s.store.GetVPNConnection(ctx, input.ClusterID, input.VPNConnectionID) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNConnection } if err != nil { return VPNConnectionLease{}, err } if conn.Mode != VPNConnectionModeSingleActive || conn.DesiredState != VPNConnectionDesiredEnabled { return VPNConnectionLease{}, errors.New("vpn connection must be enabled single_active before lease acquisition") } if err := s.ensureVPNLeaseOwnerEligible(ctx, input.ClusterID, input.VPNConnectionID, input.OwnerNodeID); err != nil { return VPNConnectionLease{}, err } if input.TTL <= 0 { input.TTL = 30 * time.Second } input.Metadata = defaultJSON(input.Metadata, `{}`) if !json.Valid(input.Metadata) { return VPNConnectionLease{}, errors.New("lease metadata must be valid json") } token, err := generateFencingToken() if err != nil { return VPNConnectionLease{}, err } item, err := s.store.AcquireVPNConnectionLease(ctx, input, s.now().Add(input.TTL), token) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNLease } if errors.Is(err, ErrVPNLeaseAlreadyActive) { return VPNConnectionLease{}, ErrVPNLeaseAlreadyActive } if err != nil { return VPNConnectionLease{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.lease_acquired", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"vpn_runtime_started":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) RenewVPNConnectionLease(ctx context.Context, input RenewVPNConnectionLeaseInput) (VPNConnectionLease, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnectionLease{}, err } if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" || input.OwnerNodeID == "" || input.FencingToken == "" { return VPNConnectionLease{}, ErrInvalidPayload } if input.TTL <= 0 { input.TTL = 30 * time.Second } if err := s.ensureVPNLeaseOwnerEligible(ctx, input.ClusterID, input.VPNConnectionID, input.OwnerNodeID); err != nil { return VPNConnectionLease{}, err } item, err := s.store.RenewVPNConnectionLease(ctx, input, s.now().Add(input.TTL)) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNLease } if err != nil { return VPNConnectionLease{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.lease_renewed", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"vpn_runtime_changed":false}`), CreatedAt: s.now(), }) return item, err } func (s *Service) ReleaseVPNConnectionLease(ctx context.Context, input ReleaseVPNConnectionLeaseInput) (VPNConnectionLease, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return VPNConnectionLease{}, err } if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" || input.OwnerNodeID == "" || input.FencingToken == "" { return VPNConnectionLease{}, ErrInvalidPayload } item, err := s.store.ReleaseVPNConnectionLease(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNLease } if err != nil { return VPNConnectionLease{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.lease_released", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"vpn_runtime_stopped":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) FenceVPNConnectionLease(ctx context.Context, input FenceVPNConnectionLeaseInput) (VPNConnectionLease, error) { if err := s.ensurePlatformRecoveryAdmin(ctx, input.ActorUserID); err != nil { return VPNConnectionLease{}, err } input.Reason = strings.TrimSpace(input.Reason) if input.Reason == "" { input.Reason = "fenced by platform recovery administrator" } if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" { return VPNConnectionLease{}, ErrInvalidPayload } item, err := s.store.FenceVPNConnectionLease(ctx, input) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNLease } if err != nil { return VPNConnectionLease{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.owner_fenced", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"split_brain_guard":true}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) GetActiveVPNConnectionLease(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) (VPNConnectionLease, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return VPNConnectionLease{}, err } item, err := s.store.GetActiveVPNConnectionLease(ctx, clusterID, vpnConnectionID) if errors.Is(err, pgx.ErrNoRows) { return VPNConnectionLease{}, ErrInvalidVPNLease } return item, err } func (s *Service) ExpireStaleVPNConnectionLeases(ctx context.Context, input ExpireStaleVPNConnectionLeasesInput) ([]VPNConnectionLease, error) { if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil { return nil, err } if input.ClusterID == "" { return nil, ErrInvalidPayload } items, err := s.store.ExpireStaleVPNConnectionLeases(ctx, input.ClusterID, s.now()) if err != nil { return nil, err } for _, item := range items { vpnConnectionID := item.VPNConnectionID _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, ActorUserID: &input.ActorUserID, EventType: "vpn_connection.lease_expired", TargetType: "vpn_connection", TargetID: &vpnConnectionID, Payload: json.RawMessage(`{"stale_reclamation":true,"vpn_runtime_changed":false}`), CreatedAt: s.now(), }) } return items, nil } func (s *Service) ListNodeVPNAssignments(ctx context.Context, clusterID, nodeID string) ([]NodeVPNAssignment, error) { clusterID = strings.TrimSpace(clusterID) nodeID = strings.TrimSpace(nodeID) if clusterID == "" || nodeID == "" { return nil, ErrInvalidPayload } return s.store.ListNodeVPNAssignments(ctx, clusterID, nodeID) } func (s *Service) ReportNodeVPNAssignmentStatus(ctx context.Context, input ReportNodeVPNAssignmentStatusInput) (NodeVPNAssignmentStatus, error) { input.ClusterID = strings.TrimSpace(input.ClusterID) input.NodeID = strings.TrimSpace(input.NodeID) input.VPNConnectionID = strings.TrimSpace(input.VPNConnectionID) input.ObservedStatus = strings.TrimSpace(input.ObservedStatus) if input.ClusterID == "" || input.NodeID == "" || input.VPNConnectionID == "" { return NodeVPNAssignmentStatus{}, ErrInvalidPayload } if input.ObservedStatus == "" { input.ObservedStatus = VPNAssignmentStatusUnknown } if !isAllowedVPNAssignmentStatus(input.ObservedStatus) { return NodeVPNAssignmentStatus{}, ErrInvalidPayload } input.StatusPayload = defaultJSON(input.StatusPayload, `{}`) if !json.Valid(input.StatusPayload) { return NodeVPNAssignmentStatus{}, errors.New("status_payload must be valid json") } if input.ObservedAt.IsZero() { input.ObservedAt = s.now() } assignments, err := s.store.ListNodeVPNAssignments(ctx, input.ClusterID, input.NodeID) if err != nil { return NodeVPNAssignmentStatus{}, err } visible := false for _, assignment := range assignments { if assignment.VPNConnectionID == input.VPNConnectionID { visible = true break } } if !visible { return NodeVPNAssignmentStatus{}, ErrVPNLeaseOwnerNotAllowed } item, err := s.store.ReportNodeVPNAssignmentStatus(ctx, input) if err != nil { return NodeVPNAssignmentStatus{}, err } _ = s.store.RecordAudit(ctx, ClusterAuditEvent{ ClusterID: &input.ClusterID, EventType: "vpn_connection.assignment_status_reported", TargetType: "vpn_connection", TargetID: &input.VPNConnectionID, Payload: json.RawMessage(`{"node_agent_runtime_executed":false}`), CreatedAt: s.now(), }) return item, nil } func (s *Service) ListAuditEvents(ctx context.Context, actorUserID, clusterID string, limit int) ([]ClusterAuditEvent, error) { if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil { return nil, err } return s.store.ListAuditEvents(ctx, clusterID, limit) } func (s *Service) ensurePlatformAdmin(ctx context.Context, userID string) error { userID = strings.TrimSpace(userID) if userID == "" { return ErrAccessDenied } role, err := s.store.GetPlatformRole(ctx, userID) if err != nil { return err } if !isPlatformAdminRole(role) { return ErrAccessDenied } return nil } func (s *Service) ensurePlatformRecoveryAdmin(ctx context.Context, userID string) error { userID = strings.TrimSpace(userID) if userID == "" { return ErrAccessDenied } role, err := s.store.GetPlatformRole(ctx, userID) if err != nil { return err } if role != PlatformRoleRecoveryAdmin { return ErrAccessDenied } return nil } func (s *Service) ensureClusterMutable(ctx context.Context, actorUserID, clusterID string) error { role, err := s.store.GetPlatformRole(ctx, strings.TrimSpace(actorUserID)) if err != nil { return err } if role == PlatformRoleRecoveryAdmin { return nil } state, err := s.store.GetClusterAuthorityState(ctx, clusterID) if err != nil { if errors.Is(err, pgx.ErrNoRows) { return nil } return err } if state.AuthorityState != "authoritative" || state.MutationMode != "normal" { return ErrClusterReadOnly } return nil } func (s *Service) ensureVPNLeaseOwnerEligible(ctx context.Context, clusterID, vpnConnectionID, ownerNodeID string) error { eligibility, err := s.store.CheckVPNLeaseOwnerEligibility(ctx, clusterID, vpnConnectionID, ownerNodeID) if errors.Is(err, pgx.ErrNoRows) { return ErrInvalidVPNConnection } if err != nil { return err } if eligibility.MembershipStatus != "active" || eligibility.NodeRegistrationStatus != NodeRegistrationActive { return ErrVPNLeaseOwnerNotAllowed } if !eligibility.AllowedByPolicy { return ErrVPNLeaseOwnerNotAllowed } if !eligibility.HasAuthorizedRole { return ErrVPNLeaseOwnerRoleRequired } return nil } func defaultJSON(raw json.RawMessage, fallback string) json.RawMessage { if len(raw) == 0 { return json.RawMessage(fallback) } return raw } func isAllowedVPNDesiredState(state string) bool { return state == VPNConnectionDesiredEnabled || state == VPNConnectionDesiredDisabled } func isAllowedVPNRouteType(routeType string) bool { switch routeType { case "cidr", "dns_suffix", "service", "resource": return true default: return false } } func isAllowedVPNRouteAction(action string) bool { return action == "allow" || action == "deny" } func isAllowedVPNPolicyStatus(status string) bool { return status == "active" || status == "disabled" } func isFabricEndpointStatus(status string) bool { switch status { case "active", "disabled", "maintenance": return true default: return false } } func isFabricEntryPointType(endpointType string) bool { switch endpointType { case "client_access", "admin", "api", "other": return true default: return false } } func isAllowedVPNNodePreference(preference string) bool { switch preference { case "candidate", "standby", "preferred": return true default: return false } } func isAllowedVPNAssignmentStatus(status string) bool { switch status { case VPNAssignmentStatusNotStarted, VPNAssignmentStatusAssigned, VPNAssignmentStatusLeaseRequired, VPNAssignmentStatusBlocked, VPNAssignmentStatusUnknown: return true default: return false } } type syntheticRoutePolicy struct { SyntheticEnabled bool `json:"synthetic_enabled"` PeerEndpoints map[string]string `json:"peer_endpoints"` PeerEndpointCandidates map[string][]PeerEndpointCandidate `json:"peer_endpoint_candidates"` RecoverySeeds []PeerRecoverySeed `json:"recovery_seeds"` RendezvousLeases []PeerRendezvousLease `json:"rendezvous_leases"` Hops []string `json:"hops"` AllowedChannels []string `json:"allowed_channels"` MaxTTL int `json:"max_ttl"` MaxHops int `json:"max_hops"` ExpiresAt *time.Time `json:"expires_at"` RouteVersion string `json:"route_version"` PolicyVersion string `json:"policy_version"` PeerDirectoryVersion string `json:"peer_directory_version"` } type heartbeatMeshEndpointReport struct { SchemaVersion string `json:"schema_version"` ClusterID string `json:"cluster_id"` NodeID string `json:"node_id"` PeerEndpoint string `json:"peer_endpoint"` Transport string `json:"transport"` ConnectivityMode string `json:"connectivity_mode"` NATType string `json:"nat_type"` Region string `json:"region"` EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates"` ObservedAt *time.Time `json:"observed_at"` } type heartbeatRendezvousLeaseReport struct { SchemaVersion string `json:"schema_version"` ClusterID string `json:"cluster_id"` NodeID string `json:"node_id"` ObservedAt string `json:"observed_at"` Leases []heartbeatRendezvousLeaseDetails `json:"leases"` } type heartbeatRendezvousLeaseDetails struct { LeaseID string `json:"lease_id"` PeerNodeID string `json:"peer_node_id"` RelayNodeID string `json:"relay_node_id"` RouteIDs []string `json:"route_ids"` StaleRelay bool `json:"stale_relay"` WithdrawalNeeded bool `json:"withdrawal_needed"` ReselectionNeeded bool `json:"reselection_needed"` ConnectionState string `json:"connection_state"` Reason string `json:"reason"` } type meshRouteHealthObservationMetadata struct { ObservationType string `json:"observation_type"` RouteID string `json:"route_id"` RoutePathDecisionApplied bool `json:"route_path_decision_applied"` RoutePathDecisionSelectedRelayID string `json:"route_path_decision_selected_relay_id"` RoutePathDecisionStaleRelayNodeID string `json:"route_path_decision_stale_relay_node_id"` RoutePathDecisionRendezvousPeerNodeID string `json:"route_path_decision_rendezvous_peer_node_id"` RoutePathDecisionRendezvousLeaseID string `json:"route_path_decision_rendezvous_lease_id"` RoutePathDecisionRendezvousLeaseReason string `json:"route_path_decision_rendezvous_lease_reason"` RoutePathDecisionSource string `json:"route_path_decision_source"` ExpectedEffectiveHops []string `json:"expected_effective_hops"` ObservedAckPath []string `json:"observed_ack_path"` RoutePathDriftDetected bool `json:"route_path_drift_detected"` FailureReason string `json:"failure_reason"` ControlPlaneOnly bool `json:"control_plane_only"` ProductionForwarding bool `json:"production_forwarding"` ProductionPayloadForwarding bool `json:"production_payload_forwarding"` RouteHealthProductionPayloadForwarding bool `json:"route_health_production_payload_forwarding"` RouteHealthServicePayloadForwarding bool `json:"route_health_service_payload_forwarding"` } type rendezvousRelayFeedbackEntry struct { ReporterNodeID string RouteIDs []string LeaseID string PeerNodeID string RelayNodeID string ConnectionState string Reason string WithdrawalNeeded bool ReselectionNeeded bool ObservedAt time.Time } type rendezvousRelaySelection struct { RelayNodeID string Endpoint string Score int Reasons []string } type rendezvousRelayPolicy struct { localNodeID string now time.Time links []MeshLinkObservation feedback []rendezvousRelayFeedbackEntry withdrawn map[string]RendezvousRelayPolicyDecision replacements map[string]RendezvousRelayPolicyDecision } const ( maxScopedRecoverySeeds = 20 maxScopedRendezvousLeases = 20 rendezvousRelayFeedbackMaxAge = 2 * time.Minute ) type nodeSelector struct { NodeID string `json:"node_id"` NodeIDs []string `json:"node_ids"` } func (s *Service) syntheticRouteFromIntent(input GetNodeSyntheticMeshConfigInput, intent MeshRouteIntent) (SyntheticMeshRouteConfig, map[string]string, map[string][]PeerEndpointCandidate, []PeerRecoverySeed, []PeerRendezvousLease, bool) { if intent.Status != "active" { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } var policy syntheticRoutePolicy if err := json.Unmarshal(intent.Policy, &policy); err != nil { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } if !policy.SyntheticEnabled { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } var source nodeSelector var destination nodeSelector _ = json.Unmarshal(intent.SourceSelector, &source) _ = json.Unmarshal(intent.DestinationSelector, &destination) sourceNodeID := firstNodeID(source) destinationNodeID := firstNodeID(destination) hops := append([]string{}, policy.Hops...) if len(hops) == 0 && sourceNodeID != "" && destinationNodeID != "" { hops = []string{sourceNodeID, destinationNodeID} } if len(hops) < 2 || !containsString(hops, input.NodeID) { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } if err := validatePeerEndpointCandidates(policy.PeerEndpointCandidates, hops); err != nil { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } if err := validatePeerRecoverySeeds(policy.RecoverySeeds); err != nil { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } if err := validatePeerRendezvousLeases(policy.RendezvousLeases, hops, s.now()); err != nil { return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false } if sourceNodeID == "" { sourceNodeID = hops[0] } if destinationNodeID == "" { destinationNodeID = hops[len(hops)-1] } expiresAt := s.now().UTC().Add(5 * time.Minute) if policy.ExpiresAt != nil { expiresAt = policy.ExpiresAt.UTC() } allowedChannels := policy.AllowedChannels if len(allowedChannels) == 0 { allowedChannels = []string{"fabric_control", "route_control"} } maxTTL := policy.MaxTTL if maxTTL <= 0 { maxTTL = 8 } maxHops := policy.MaxHops if maxHops <= 0 { maxHops = 8 } routeVersion := policy.RouteVersion if routeVersion == "" { routeVersion = intent.UpdatedAt.UTC().Format(time.RFC3339) } policyVersion := policy.PolicyVersion if policyVersion == "" { policyVersion = routeVersion } peerDirectoryVersion := policy.PeerDirectoryVersion if peerDirectoryVersion == "" { peerDirectoryVersion = routeVersion } route := SyntheticMeshRouteConfig{ RouteID: intent.ID, ClusterID: input.ClusterID, SourceNodeID: sourceNodeID, DestinationNodeID: destinationNodeID, Hops: hops, AllowedChannels: allowedChannels, ExpiresAt: expiresAt, MaxTTL: maxTTL, MaxHops: maxHops, RouteVersion: routeVersion, PolicyVersion: policyVersion, PeerDirectoryVersion: peerDirectoryVersion, } return route, scopedPeerEndpoints(policy.PeerEndpoints, hops), scopedPeerEndpointCandidates(policy.PeerEndpointCandidates, hops), policy.RecoverySeeds, normalizeRendezvousLeases(policy.RendezvousLeases, route, s.now()), true } func (s *Service) reportedEndpointConfig(ctx context.Context, clusterID string, localNodeID string, routePath []string) (map[string]string, map[string][]PeerEndpointCandidate, error) { peers := map[string]string{} candidates := map[string][]PeerEndpointCandidate{} for _, nodeID := range routePath { nodeID = strings.TrimSpace(nodeID) if nodeID == "" || nodeID == localNodeID { continue } heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1) if err != nil { return nil, nil, err } if len(heartbeats) == 0 { continue } peerEndpoint, nodeCandidates, ok := endpointReportFromHeartbeat(heartbeats[0]) if !ok { continue } if peerEndpoint != "" { peers[nodeID] = peerEndpoint } if len(nodeCandidates) > 0 { candidates[nodeID] = append(candidates[nodeID], nodeCandidates...) } } return peers, candidates, nil } func endpointReportFromHeartbeat(heartbeat NodeHeartbeat) (string, []PeerEndpointCandidate, bool) { var metadata struct { MeshEndpointReport heartbeatMeshEndpointReport `json:"mesh_endpoint_report"` } if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) { return "", nil, false } if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil { return "", nil, false } report := metadata.MeshEndpointReport if report.NodeID != "" && report.NodeID != heartbeat.NodeID { return "", nil, false } if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID { return "", nil, false } nodeID := heartbeat.NodeID peerEndpoint := strings.TrimSpace(report.PeerEndpoint) out := make([]PeerEndpointCandidate, 0, len(report.EndpointCandidates)) for _, candidate := range report.EndpointCandidates { if candidate.NodeID == "" { candidate.NodeID = nodeID } if candidate.EndpointID == "" { candidate.EndpointID = nodeID + "-reported" } if candidate.Address == "" { candidate.Address = peerEndpoint } if candidate.Transport == "" { candidate.Transport = report.Transport } if candidate.ConnectivityMode == "" { candidate.ConnectivityMode = report.ConnectivityMode } if candidate.NATType == "" { candidate.NATType = report.NATType } if candidate.Region == "" { candidate.Region = report.Region } if candidate.Reachability == "" { candidate.Reachability = reachabilityFromConnectivityMode(candidate.ConnectivityMode) } if candidate.Metadata == nil { candidate.Metadata = json.RawMessage(`{}`) } if candidate.NodeID != nodeID { return "", nil, false } out = append(out, candidate) } if len(out) > 0 { if err := validatePeerEndpointCandidates(map[string][]PeerEndpointCandidate{nodeID: out}, []string{nodeID}); err != nil { return "", nil, false } } return peerEndpoint, out, peerEndpoint != "" || len(out) > 0 } func (s *Service) rendezvousRelayFeedback(ctx context.Context, clusterID string, routePath []string, now time.Time) ([]rendezvousRelayFeedbackEntry, error) { out := []rendezvousRelayFeedbackEntry{} seenNodes := map[string]struct{}{} for _, nodeID := range routePath { nodeID = strings.TrimSpace(nodeID) if nodeID == "" { continue } if _, duplicate := seenNodes[nodeID]; duplicate { continue } seenNodes[nodeID] = struct{}{} heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1) if err != nil { return nil, err } if len(heartbeats) == 0 { continue } out = append(out, rendezvousRelayFeedbackFromHeartbeat(heartbeats[0], now)...) } return out, nil } func rendezvousRelayFeedbackFromHeartbeat(heartbeat NodeHeartbeat, now time.Time) []rendezvousRelayFeedbackEntry { if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) { return nil } if now.IsZero() { now = time.Now().UTC() } else { now = now.UTC() } if heartbeat.ObservedAt.IsZero() || heartbeat.ObservedAt.After(now.Add(time.Minute)) || now.Sub(heartbeat.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge { return nil } var metadata struct { MeshRendezvousLeaseReport heartbeatRendezvousLeaseReport `json:"mesh_rendezvous_lease_report"` } if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil { return nil } report := metadata.MeshRendezvousLeaseReport if report.NodeID != "" && report.NodeID != heartbeat.NodeID { return nil } if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID { return nil } out := []rendezvousRelayFeedbackEntry{} for _, lease := range report.Leases { if !lease.StaleRelay && !lease.WithdrawalNeeded && !lease.ReselectionNeeded { continue } if strings.TrimSpace(lease.PeerNodeID) == "" || strings.TrimSpace(lease.RelayNodeID) == "" { continue } out = append(out, rendezvousRelayFeedbackEntry{ ReporterNodeID: heartbeat.NodeID, RouteIDs: append([]string{}, lease.RouteIDs...), LeaseID: strings.TrimSpace(lease.LeaseID), PeerNodeID: strings.TrimSpace(lease.PeerNodeID), RelayNodeID: strings.TrimSpace(lease.RelayNodeID), ConnectionState: strings.TrimSpace(lease.ConnectionState), Reason: strings.TrimSpace(lease.Reason), WithdrawalNeeded: lease.WithdrawalNeeded, ReselectionNeeded: lease.ReselectionNeeded, ObservedAt: heartbeat.ObservedAt.UTC(), }) } return out } func (s *Service) rendezvousRelayReplacementHints(ctx context.Context, clusterID string, routePath []string, now time.Time) ([]RendezvousRelayPolicyDecision, error) { out := []RendezvousRelayPolicyDecision{} seenNodes := map[string]struct{}{} for _, nodeID := range routePath { nodeID = strings.TrimSpace(nodeID) if nodeID == "" { continue } if _, duplicate := seenNodes[nodeID]; duplicate { continue } seenNodes[nodeID] = struct{}{} heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1) if err != nil { return nil, err } if len(heartbeats) == 0 { continue } out = append(out, rendezvousRelayReplacementHintsFromHeartbeat(heartbeats[0], now)...) } return out, nil } func rendezvousRelayReplacementHintsFromHeartbeat(heartbeat NodeHeartbeat, now time.Time) []RendezvousRelayPolicyDecision { if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) { return nil } if now.IsZero() { now = time.Now().UTC() } else { now = now.UTC() } if heartbeat.ObservedAt.IsZero() || heartbeat.ObservedAt.After(now.Add(time.Minute)) || now.Sub(heartbeat.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge { return nil } var metadata struct { MeshRoutePathDecisionReport struct { ClusterID string `json:"cluster_id"` NodeID string `json:"node_id"` Decisions []RoutePathDecision `json:"decisions"` } `json:"mesh_route_path_decision_report"` } if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil { return nil } report := metadata.MeshRoutePathDecisionReport if report.NodeID != "" && report.NodeID != heartbeat.NodeID { return nil } if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID { return nil } out := []RendezvousRelayPolicyDecision{} for _, decision := range report.Decisions { if strings.TrimSpace(decision.RouteID) == "" || decision.DecisionSource != "stale_relay_replacement" || strings.TrimSpace(decision.SelectedRelayID) == "" || strings.TrimSpace(decision.StaleRelayNodeID) == "" || decision.ProductionForwarding || !decision.ControlPlaneOnly || (!decision.ExpiresAt.IsZero() && !decision.ExpiresAt.After(now)) { continue } peerNodeID := strings.TrimSpace(decision.RendezvousPeerNodeID) if peerNodeID == "" { peerNodeID = replacementPeerNodeIDFromDecision(decision) } if peerNodeID == "" { continue } out = append(out, RendezvousRelayPolicyDecision{ RouteID: strings.TrimSpace(decision.RouteID), PeerNodeID: peerNodeID, StaleRelayNodeID: strings.TrimSpace(decision.StaleRelayNodeID), SelectedRelayID: strings.TrimSpace(decision.SelectedRelayID), SelectedEndpoint: strings.TrimRight(strings.TrimSpace(decision.SelectedRelayEndpoint), "/"), Score: decision.PathScore, Reason: "stale_relay_replacement", ScoreReasons: append([]string{}, decision.ScoreReasons...), ReporterNodeID: heartbeat.NodeID, }) } return out } func replacementPeerNodeIDFromDecision(decision RoutePathDecision) string { effectiveHops := cleanRouteNodePath(decision.EffectiveHops) selectedRelayID := strings.TrimSpace(decision.SelectedRelayID) for index, nodeID := range effectiveHops { if nodeID == selectedRelayID && index+1 < len(effectiveHops) { return effectiveHops[index+1] } } return strings.TrimSpace(decision.DestinationNodeID) } func replacementHintFeedback(hints []RendezvousRelayPolicyDecision, now time.Time) []rendezvousRelayFeedbackEntry { if len(hints) == 0 { return nil } if now.IsZero() { now = time.Now().UTC() } else { now = now.UTC() } out := make([]rendezvousRelayFeedbackEntry, 0, len(hints)) for _, hint := range hints { if strings.TrimSpace(hint.RouteID) == "" || strings.TrimSpace(hint.PeerNodeID) == "" || strings.TrimSpace(hint.StaleRelayNodeID) == "" || strings.TrimSpace(hint.SelectedRelayID) == "" { continue } out = append(out, rendezvousRelayFeedbackEntry{ ReporterNodeID: strings.TrimSpace(hint.ReporterNodeID), RouteIDs: []string{strings.TrimSpace(hint.RouteID)}, PeerNodeID: strings.TrimSpace(hint.PeerNodeID), RelayNodeID: strings.TrimSpace(hint.StaleRelayNodeID), ConnectionState: "replacement_hint", Reason: "stale_relay_replacement_hint", WithdrawalNeeded: true, ReselectionNeeded: true, ObservedAt: now, }) } return out } func rendezvousRelayRouteHealthFeedback(localNodeID string, route SyntheticMeshRouteConfig, links []MeshLinkObservation, now time.Time) []rendezvousRelayFeedbackEntry { out := []rendezvousRelayFeedbackEntry{} for _, link := range links { item, ok := rendezvousRelayRouteHealthFeedbackFromLink(localNodeID, route, link, now) if ok { out = append(out, item) } } return out } func rendezvousRelayRouteHealthFeedbackFromLink(localNodeID string, route SyntheticMeshRouteConfig, link MeshLinkObservation, now time.Time) (rendezvousRelayFeedbackEntry, bool) { localNodeID = strings.TrimSpace(localNodeID) if localNodeID == "" || link.SourceNodeID != localNodeID || strings.TrimSpace(route.RouteID) == "" { return rendezvousRelayFeedbackEntry{}, false } if !meshLinkObservationFresh(link, now) { return rendezvousRelayFeedbackEntry{}, false } metadata, ok := routeHealthMetadataFromLink(link) if !ok || metadata.ObservationType != "synthetic_route_health" || strings.TrimSpace(metadata.RouteID) != route.RouteID || !metadata.RoutePathDecisionApplied || metadata.ProductionForwarding || metadata.ProductionPayloadForwarding || metadata.RouteHealthProductionPayloadForwarding || metadata.RouteHealthServicePayloadForwarding { return rendezvousRelayFeedbackEntry{}, false } selectedRelayID := strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID) if selectedRelayID == "" { return rendezvousRelayFeedbackEntry{}, false } reason := "" switch { case metadata.RoutePathDriftDetected: reason = "synthetic_route_health_drift" case link.LinkStatus == "unreachable": reason = "synthetic_route_health_unreachable" case strings.TrimSpace(metadata.FailureReason) != "": reason = "synthetic_route_health_failure" default: return rendezvousRelayFeedbackEntry{}, false } peerNodeID := routeHealthPeerNodeID(metadata, route, link.TargetNodeID) if peerNodeID == "" { return rendezvousRelayFeedbackEntry{}, false } return rendezvousRelayFeedbackEntry{ ReporterNodeID: link.SourceNodeID, RouteIDs: []string{route.RouteID}, LeaseID: strings.TrimSpace(metadata.RoutePathDecisionRendezvousLeaseID), PeerNodeID: peerNodeID, RelayNodeID: selectedRelayID, ConnectionState: reason, Reason: reason, WithdrawalNeeded: true, ReselectionNeeded: true, ObservedAt: link.ObservedAt.UTC(), }, true } func routeHealthMetadataFromLink(link MeshLinkObservation) (meshRouteHealthObservationMetadata, bool) { if len(link.Metadata) == 0 || !json.Valid(link.Metadata) { return meshRouteHealthObservationMetadata{}, false } var metadata meshRouteHealthObservationMetadata if err := json.Unmarshal(link.Metadata, &metadata); err != nil { return meshRouteHealthObservationMetadata{}, false } return metadata, true } func meshLinkObservationFresh(link MeshLinkObservation, now time.Time) bool { if now.IsZero() { now = time.Now().UTC() } else { now = now.UTC() } return !link.ObservedAt.IsZero() && !link.ObservedAt.After(now.Add(time.Minute)) && now.Sub(link.ObservedAt.UTC()) <= rendezvousRelayFeedbackMaxAge } func routeHealthPeerNodeID(metadata meshRouteHealthObservationMetadata, route SyntheticMeshRouteConfig, targetNodeID string) string { if peerNodeID := strings.TrimSpace(metadata.RoutePathDecisionRendezvousPeerNodeID); peerNodeID != "" { return peerNodeID } selectedRelayID := strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID) if peerNodeID := nodeAfterInPath(cleanRouteNodePath(metadata.ExpectedEffectiveHops), selectedRelayID); peerNodeID != "" { return peerNodeID } if peerNodeID := nodeAfterInPath(cleanRouteNodePath(route.Hops), selectedRelayID); peerNodeID != "" { return peerNodeID } if targetNodeID = strings.TrimSpace(targetNodeID); targetNodeID != "" { return targetNodeID } return strings.TrimSpace(route.DestinationNodeID) } func nodeAfterInPath(path []string, nodeID string) string { nodeID = strings.TrimSpace(nodeID) if nodeID == "" { return "" } for index, item := range path { if item == nodeID && index+1 < len(path) { return path[index+1] } } return "" } func newRendezvousRelayPolicy(localNodeID string, links []MeshLinkObservation, now time.Time) *rendezvousRelayPolicy { if now.IsZero() { now = time.Now().UTC() } else { now = now.UTC() } return &rendezvousRelayPolicy{ localNodeID: strings.TrimSpace(localNodeID), now: now, links: append([]MeshLinkObservation{}, links...), withdrawn: map[string]RendezvousRelayPolicyDecision{}, replacements: map[string]RendezvousRelayPolicyDecision{}, } } func (p *rendezvousRelayPolicy) addFeedback(items []rendezvousRelayFeedbackEntry) { if p == nil { return } p.feedback = append(p.feedback, items...) } func (p *rendezvousRelayPolicy) staleForLease(routeID string, lease PeerRendezvousLease) (rendezvousRelayFeedbackEntry, bool) { if p == nil { return rendezvousRelayFeedbackEntry{}, false } for _, item := range p.feedback { if !rendezvousFeedbackAppliesToRoute(item, routeID) { continue } if item.LeaseID != "" && lease.LeaseID != "" && item.LeaseID == lease.LeaseID { return item, true } if item.PeerNodeID == lease.PeerNodeID && item.RelayNodeID == lease.RelayNodeID { return item, true } } return rendezvousRelayFeedbackEntry{}, false } func (p *rendezvousRelayPolicy) relayStale(routeID string, peerNodeID string, relayNodeID string) (rendezvousRelayFeedbackEntry, bool) { if p == nil { return rendezvousRelayFeedbackEntry{}, false } for _, item := range p.feedback { if item.PeerNodeID == peerNodeID && item.RelayNodeID == relayNodeID && rendezvousFeedbackAppliesToRoute(item, routeID) { return item, true } } return rendezvousRelayFeedbackEntry{}, false } func (p *rendezvousRelayPolicy) hasStalePeer(routeID string, peerNodeID string) (rendezvousRelayFeedbackEntry, bool) { if p == nil { return rendezvousRelayFeedbackEntry{}, false } for _, item := range p.feedback { if item.PeerNodeID == peerNodeID && rendezvousFeedbackAppliesToRoute(item, routeID) { return item, true } } return rendezvousRelayFeedbackEntry{}, false } func (p *rendezvousRelayPolicy) recordWithdrawal(route SyntheticMeshRouteConfig, lease PeerRendezvousLease, feedback rendezvousRelayFeedbackEntry) { if p == nil { return } key := route.RouteID + "\x00" + lease.LeaseID + "\x00" + lease.RelayNodeID p.withdrawn[key] = RendezvousRelayPolicyDecision{ RouteID: route.RouteID, PeerNodeID: lease.PeerNodeID, WithdrawnLeaseID: lease.LeaseID, StaleRelayNodeID: lease.RelayNodeID, Reason: "stale_relay_withdrawn", ReporterNodeID: feedback.ReporterNodeID, } } func (p *rendezvousRelayPolicy) recordReplacement(route SyntheticMeshRouteConfig, peerNodeID string, feedback rendezvousRelayFeedbackEntry, selection rendezvousRelaySelection) { if p == nil || selection.RelayNodeID == "" { return } key := rendezvousRelayReplacementKey(route.RouteID, peerNodeID, feedback.RelayNodeID, selection.RelayNodeID) p.replacements[key] = RendezvousRelayPolicyDecision{ RouteID: route.RouteID, PeerNodeID: peerNodeID, StaleRelayNodeID: feedback.RelayNodeID, SelectedRelayID: selection.RelayNodeID, SelectedEndpoint: selection.Endpoint, Score: selection.Score, Reason: "stale_relay_replacement", ScoreReasons: append([]string{}, selection.Reasons...), ReporterNodeID: feedback.ReporterNodeID, } } func (p *rendezvousRelayPolicy) addReplacementHints(hints []RendezvousRelayPolicyDecision) { if p == nil { return } for _, hint := range hints { hint.RouteID = strings.TrimSpace(hint.RouteID) hint.PeerNodeID = strings.TrimSpace(hint.PeerNodeID) hint.StaleRelayNodeID = strings.TrimSpace(hint.StaleRelayNodeID) hint.SelectedRelayID = strings.TrimSpace(hint.SelectedRelayID) hint.SelectedEndpoint = strings.TrimRight(strings.TrimSpace(hint.SelectedEndpoint), "/") if hint.RouteID == "" || hint.PeerNodeID == "" || hint.StaleRelayNodeID == "" || hint.SelectedRelayID == "" { continue } if hint.Reason == "" { hint.Reason = "stale_relay_replacement" } if len(hint.ScoreReasons) == 0 { hint.ScoreReasons = []string{"route_path_decision_hint"} } key := rendezvousRelayReplacementKey(hint.RouteID, hint.PeerNodeID, hint.StaleRelayNodeID, hint.SelectedRelayID) existing, exists := p.replacements[key] if !exists || hint.Score > existing.Score { p.replacements[key] = hint } } } func (p *rendezvousRelayPolicy) report() *RendezvousRelayPolicyReport { if p == nil || (len(p.feedback) == 0 && len(p.withdrawn) == 0 && len(p.replacements) == 0) { return nil } decisions := make([]RendezvousRelayPolicyDecision, 0, len(p.withdrawn)+len(p.replacements)) for _, decision := range p.withdrawn { decisions = append(decisions, decision) } for _, decision := range p.replacements { decisions = append(decisions, decision) } sort.SliceStable(decisions, func(i, j int) bool { if decisions[i].RouteID != decisions[j].RouteID { return decisions[i].RouteID < decisions[j].RouteID } if decisions[i].PeerNodeID != decisions[j].PeerNodeID { return decisions[i].PeerNodeID < decisions[j].PeerNodeID } if decisions[i].Reason != decisions[j].Reason { return decisions[i].Reason < decisions[j].Reason } return decisions[i].SelectedRelayID < decisions[j].SelectedRelayID }) return &RendezvousRelayPolicyReport{ SchemaVersion: "c17z15.rendezvous_relay_policy.v1", ScoringMode: "route_adjacency_endpoint_priority_mesh_link_health_synthetic_route_health_feedback", FeedbackMaxAgeSeconds: int(rendezvousRelayFeedbackMaxAge / time.Second), StaleRelayCount: len(p.feedback), WithdrawnLeaseCount: len(p.withdrawn), ReplacementLeaseCount: len(p.replacements), Decisions: decisions, } } func (p *rendezvousRelayPolicy) replacementDecision(routeID string, peerNodeID string, selectedRelayID string) (RendezvousRelayPolicyDecision, bool) { if p == nil { return RendezvousRelayPolicyDecision{}, false } for _, decision := range p.replacements { if decision.RouteID == routeID && decision.PeerNodeID == peerNodeID && decision.SelectedRelayID == selectedRelayID { return decision, true } } return RendezvousRelayPolicyDecision{}, false } func rendezvousRelayReplacementKey(routeID string, peerNodeID string, staleRelayNodeID string, selectedRelayID string) string { return strings.TrimSpace(routeID) + "\x00" + strings.TrimSpace(peerNodeID) + "\x00" + strings.TrimSpace(staleRelayNodeID) + "\x00" + strings.TrimSpace(selectedRelayID) } func routePathDecisionReport(generation string, decisions []RoutePathDecision) *RoutePathDecisionReport { if len(decisions) == 0 { return nil } out := append([]RoutePathDecision{}, decisions...) sort.SliceStable(out, func(i, j int) bool { if out[i].RouteID != out[j].RouteID { return out[i].RouteID < out[j].RouteID } return out[i].DecisionID < out[j].DecisionID }) replacements := 0 for _, decision := range out { if decision.DecisionSource == "stale_relay_replacement" { replacements++ } } return &RoutePathDecisionReport{ SchemaVersion: "c17z18.route_path_decisions.v1", DecisionMode: "control_plane_effective_path_from_relay_policy", Generation: generation, DecisionCount: len(out), ReplacementDecisionCount: replacements, ControlPlaneOnly: true, ProductionForwarding: false, Decisions: out, } } func routePathDecisionForRoute(route SyntheticMeshRouteConfig, localNodeID string, leases []PeerRendezvousLease, relayPolicy *rendezvousRelayPolicy, generation string) RoutePathDecision { decision := RoutePathDecision{ DecisionID: route.RouteID + "-path-" + localNodeID, RouteID: route.RouteID, ClusterID: route.ClusterID, LocalNodeID: localNodeID, SourceNodeID: route.SourceNodeID, DestinationNodeID: route.DestinationNodeID, OriginalHops: append([]string{}, route.Hops...), EffectiveHops: append([]string{}, route.Hops...), DecisionSource: "route_intent", Generation: generation, PathScore: 1000, ScoreReasons: []string{"route_intent_hops"}, ControlPlaneOnly: true, ProductionForwarding: false, ExpiresAt: route.ExpiresAt.UTC(), } var replacementLease PeerRendezvousLease var replacementDecision RendezvousRelayPolicyDecision replacementFound := false for _, lease := range leases { if !containsString(lease.RouteIDs, route.RouteID) { continue } relayDecision, ok := relayPolicy.replacementDecision(route.RouteID, lease.PeerNodeID, lease.RelayNodeID) if !ok && lease.Reason != "stale_relay_replacement" { continue } if !ok { relayDecision = RendezvousRelayPolicyDecision{ RouteID: route.RouteID, PeerNodeID: lease.PeerNodeID, SelectedRelayID: lease.RelayNodeID, SelectedEndpoint: lease.RelayEndpoint, Reason: "stale_relay_replacement", } } if !replacementFound || relayDecision.Score > replacementDecision.Score { replacementFound = true replacementLease = lease replacementDecision = relayDecision } } if replacementFound { decision.DecisionID = route.RouteID + "-path-" + localNodeID + "-via-" + replacementLease.RelayNodeID decision.EffectiveHops = effectiveRoutePathWithReplacement(route.Hops, replacementLease.PeerNodeID, replacementDecision.StaleRelayNodeID, replacementLease.RelayNodeID) decision.SelectedRelayID = replacementLease.RelayNodeID decision.SelectedRelayEndpoint = replacementLease.RelayEndpoint decision.StaleRelayNodeID = replacementDecision.StaleRelayNodeID decision.RendezvousPeerNodeID = replacementLease.PeerNodeID decision.RendezvousLeaseID = replacementLease.LeaseID decision.RendezvousLeaseReason = replacementLease.Reason decision.DecisionSource = "stale_relay_replacement" decision.PathScore = replacementDecision.Score if decision.PathScore == 0 { decision.PathScore = 1000 } decision.ScoreReasons = append([]string{}, replacementDecision.ScoreReasons...) if len(decision.ScoreReasons) == 0 { decision.ScoreReasons = []string{"relay_replacement_policy"} } } decision.PreviousHopID, decision.NextHopID, decision.LocalRole = routePathLocalPosition(decision.EffectiveHops, localNodeID, decision.SelectedRelayID, decision.StaleRelayNodeID) return decision } func effectiveRoutePathWithReplacement(original []string, peerNodeID string, staleRelayNodeID string, selectedRelayID string) []string { out := make([]string, 0, len(original)+1) for _, nodeID := range original { nodeID = strings.TrimSpace(nodeID) if nodeID == "" || (staleRelayNodeID != "" && nodeID == staleRelayNodeID) { continue } out = append(out, nodeID) } if selectedRelayID == "" || containsString(out, selectedRelayID) { return out } peerIndex := -1 for index, nodeID := range out { if nodeID == peerNodeID { peerIndex = index break } } if peerIndex < 0 { return append(out, selectedRelayID) } out = append(out, "") copy(out[peerIndex+1:], out[peerIndex:]) out[peerIndex] = selectedRelayID return out } func routePathLocalPosition(path []string, localNodeID string, selectedRelayID string, staleRelayNodeID string) (string, string, string) { localIndex := -1 for index, nodeID := range path { if nodeID == localNodeID { localIndex = index break } } if localIndex < 0 { if staleRelayNodeID != "" && localNodeID == staleRelayNodeID { return "", "", "withdrawn_relay" } return "", "", "not_on_effective_path" } previous := "" next := "" if localIndex > 0 { previous = path[localIndex-1] } if localIndex < len(path)-1 { next = path[localIndex+1] } role := "transit" switch { case localIndex == 0: role = "entry" case localIndex == len(path)-1: role = "exit" case selectedRelayID != "" && localNodeID == selectedRelayID: role = "selected_relay" } return previous, next, role } func rendezvousFeedbackAppliesToRoute(item rendezvousRelayFeedbackEntry, routeID string) bool { if strings.TrimSpace(routeID) == "" || len(item.RouteIDs) == 0 { return true } return containsString(item.RouteIDs, routeID) } func reachabilityFromConnectivityMode(connectivityMode string) string { switch connectivityMode { case "outbound_only": return "outbound_only" case "relay_required": return "relay" case "direct": return "public" default: return "unknown" } } func validatePeerRecoverySeeds(seeds []PeerRecoverySeed) error { if len(seeds) > maxScopedRecoverySeeds { return ErrInvalidPayload } seen := map[string]struct{}{} for _, seed := range seeds { key := strings.TrimSpace(seed.NodeID) + "\x00" + strings.TrimSpace(seed.Endpoint) if strings.TrimSpace(seed.NodeID) == "" || strings.TrimSpace(seed.Endpoint) == "" || !isPeerEndpointTransport(seed.Transport) || (seed.ConnectivityMode != "" && !isPeerEndpointConnectivityMode(seed.ConnectivityMode)) || (len(seed.Metadata) > 0 && !json.Valid(seed.Metadata)) { return ErrInvalidPayload } if _, duplicate := seen[key]; duplicate { return ErrInvalidPayload } seen[key] = struct{}{} } return nil } func validatePeerRendezvousLeases(leases []PeerRendezvousLease, routePath []string, now time.Time) error { if len(leases) > maxScopedRendezvousLeases { return ErrInvalidPayload } now = now.UTC() seen := map[string]struct{}{} for _, lease := range leases { peerNodeID := strings.TrimSpace(lease.PeerNodeID) relayNodeID := strings.TrimSpace(lease.RelayNodeID) relayEndpoint := strings.TrimSpace(lease.RelayEndpoint) transport := strings.TrimSpace(lease.Transport) if peerNodeID == "" || relayNodeID == "" || relayEndpoint == "" || peerNodeID == relayNodeID || !containsString(routePath, peerNodeID) || !containsString(routePath, relayNodeID) || (transport != "" && !isPeerRendezvousTransport(transport)) || (!lease.ExpiresAt.IsZero() && !lease.ExpiresAt.After(now)) || (len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) { return ErrInvalidPayload } if strings.TrimSpace(lease.LeaseID) == "" { continue } if _, duplicate := seen[lease.LeaseID]; duplicate { return ErrInvalidPayload } seen[lease.LeaseID] = struct{}{} } return nil } func normalizeRendezvousLeases(leases []PeerRendezvousLease, route SyntheticMeshRouteConfig, now time.Time) []PeerRendezvousLease { out := make([]PeerRendezvousLease, 0, len(leases)) now = now.UTC() for _, lease := range leases { lease.PeerNodeID = strings.TrimSpace(lease.PeerNodeID) lease.RelayNodeID = strings.TrimSpace(lease.RelayNodeID) lease.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/") if lease.LeaseID == "" { lease.LeaseID = route.RouteID + "-rv-" + lease.PeerNodeID + "-via-" + lease.RelayNodeID } if lease.Transport == "" { lease.Transport = "relay_control" } if lease.ConnectivityMode == "" { lease.ConnectivityMode = "relay_required" } if lease.Priority <= 0 { lease.Priority = 100 } if len(lease.RouteIDs) == 0 { lease.RouteIDs = []string{route.RouteID} } else if !containsString(lease.RouteIDs, route.RouteID) { lease.RouteIDs = append(append([]string{}, lease.RouteIDs...), route.RouteID) } lease.AllowedChannels = controlPlaneAllowedChannels(firstNonEmptyStringSlice(lease.AllowedChannels, route.AllowedChannels)) if len(lease.AllowedChannels) == 0 { lease.AllowedChannels = []string{"fabric_control", "route_control"} } lease.ControlPlaneOnly = true if lease.IssuedAt.IsZero() { lease.IssuedAt = now } else { lease.IssuedAt = lease.IssuedAt.UTC() } if lease.ExpiresAt.IsZero() || (!route.ExpiresAt.IsZero() && lease.ExpiresAt.After(route.ExpiresAt)) { lease.ExpiresAt = route.ExpiresAt.UTC() } else { lease.ExpiresAt = lease.ExpiresAt.UTC() } if lease.Reason == "" { lease.Reason = "policy_rendezvous_lease" } if lease.Metadata == nil { lease.Metadata = json.RawMessage(`{}`) } if !lease.ExpiresAt.IsZero() && lease.ExpiresAt.After(now) { out = append(out, lease) } } return out } func scopedRendezvousLeases(leases []PeerRendezvousLease, route SyntheticMeshRouteConfig, localNodeID string, relayPolicy *rendezvousRelayPolicy, now time.Time) []PeerRendezvousLease { if !containsString(route.Hops, localNodeID) { return nil } normalized := normalizeRendezvousLeases(leases, route, now) out := make([]PeerRendezvousLease, 0, len(normalized)) for _, lease := range normalized { if feedback, stale := relayPolicy.staleForLease(route.RouteID, lease); stale { relayPolicy.recordWithdrawal(route, lease, feedback) continue } if containsString(route.Hops, lease.PeerNodeID) && containsString(route.Hops, lease.RelayNodeID) { out = append(out, lease) } } return out } func derivedRendezvousLeases(route SyntheticMeshRouteConfig, peers map[string]string, candidates map[string][]PeerEndpointCandidate, localNodeID string, relayPolicy *rendezvousRelayPolicy, now time.Time) []PeerRendezvousLease { if !containsString(route.Hops, localNodeID) { return nil } out := []PeerRendezvousLease{} for peerNodeID, items := range candidates { peerNodeID = strings.TrimSpace(peerNodeID) if peerNodeID == "" || !containsString(route.Hops, peerNodeID) || !peerEndpointCandidatesRequireRendezvous(items) { continue } selection := selectRendezvousRelay(route, peerNodeID, localNodeID, peers, candidates, relayPolicy) if selection.RelayNodeID == "" || selection.Endpoint == "" { continue } _, replacement := relayPolicy.hasStalePeer(route.RouteID, peerNodeID) reason := rendezvousLeaseReason(items) if replacement { reason = "stale_relay_replacement" } lease := PeerRendezvousLease{ LeaseID: route.RouteID + "-rv-" + peerNodeID + "-via-" + selection.RelayNodeID, PeerNodeID: peerNodeID, RelayNodeID: selection.RelayNodeID, RelayEndpoint: selection.Endpoint, Transport: "relay_control", ConnectivityMode: "relay_required", RouteIDs: []string{route.RouteID}, AllowedChannels: controlPlaneAllowedChannels(route.AllowedChannels), Priority: rendezvousLeasePriority(items), ControlPlaneOnly: true, IssuedAt: now.UTC(), ExpiresAt: route.ExpiresAt.UTC(), Reason: reason, Metadata: rendezvousRelayLeaseMetadata(selection, replacement), } if len(lease.AllowedChannels) == 0 { lease.AllowedChannels = []string{"fabric_control", "route_control"} } if lease.Priority <= 0 { lease.Priority = 100 } if lease.ExpiresAt.After(now.UTC()) { out = append(out, lease) if feedback, ok := relayPolicy.hasStalePeer(route.RouteID, peerNodeID); ok && feedback.RelayNodeID != selection.RelayNodeID { relayPolicy.recordReplacement(route, peerNodeID, feedback, selection) } } } return out } func selectRendezvousRelay(route SyntheticMeshRouteConfig, peerNodeID string, localNodeID string, peers map[string]string, candidates map[string][]PeerEndpointCandidate, relayPolicy *rendezvousRelayPolicy) rendezvousRelaySelection { routePath := route.Hops peerIndex := -1 for index, nodeID := range routePath { if nodeID == peerNodeID { peerIndex = index break } } preferred := []string{} if peerIndex > 0 { preferred = append(preferred, routePath[peerIndex-1]) } if peerIndex >= 0 && peerIndex < len(routePath)-1 { preferred = append(preferred, routePath[peerIndex+1]) } preferred = append(preferred, routePath...) seen := map[string]struct{}{} relayCandidates := []rendezvousRelaySelection{} for _, relayNodeID := range preferred { relayNodeID = strings.TrimSpace(relayNodeID) if relayNodeID == "" || relayNodeID == peerNodeID { continue } if _, duplicate := seen[relayNodeID]; duplicate { continue } seen[relayNodeID] = struct{}{} if _, stale := relayPolicy.relayStale(route.RouteID, peerNodeID, relayNodeID); stale { continue } endpoint, endpointScore, endpointReasons := relayControlEndpointForNode(relayNodeID, peers, candidates) if endpoint == "" { continue } score, scoreReasons := rendezvousRelayCandidateScore(route.RouteID, routePath, peerIndex, relayNodeID, localNodeID, endpointScore, endpointReasons, relayPolicy) relayCandidates = append(relayCandidates, rendezvousRelaySelection{ RelayNodeID: relayNodeID, Endpoint: endpoint, Score: score, Reasons: scoreReasons, }) } if len(relayCandidates) == 0 { return rendezvousRelaySelection{} } sort.SliceStable(relayCandidates, func(i, j int) bool { if relayCandidates[i].Score != relayCandidates[j].Score { return relayCandidates[i].Score > relayCandidates[j].Score } return relayCandidates[i].RelayNodeID < relayCandidates[j].RelayNodeID }) return relayCandidates[0] } func relayControlEndpointForNode(nodeID string, peers map[string]string, candidates map[string][]PeerEndpointCandidate) (string, int, []string) { if endpoint := strings.TrimRight(strings.TrimSpace(peers[nodeID]), "/"); isHTTPControlEndpoint(endpoint) { return endpoint, 80, []string{"reported_peer_endpoint"} } items := append([]PeerEndpointCandidate{}, candidates[nodeID]...) sort.SliceStable(items, func(i, j int) bool { if items[i].Priority != items[j].Priority { return items[i].Priority < items[j].Priority } return items[i].EndpointID < items[j].EndpointID }) for _, candidate := range items { if endpointCandidateRequiresRendezvous(candidate) { continue } endpoint := strings.TrimRight(strings.TrimSpace(candidate.Address), "/") if isHTTPControlEndpoint(endpoint) { score := 70 reasons := []string{"endpoint_candidate"} if candidate.Priority > 0 { score += maxInt(0, 50-candidate.Priority) } if hasPolicyTag(candidate.PolicyTags, "fast-path") { score += 25 reasons = append(reasons, "fast_path") } if hasPolicyTag(candidate.PolicyTags, "same-site") || hasPolicyTag(candidate.PolicyTags, "corp-lan") || hasPolicyTag(candidate.PolicyTags, "private-lan") { score += 20 reasons = append(reasons, "same_site") } if strings.EqualFold(candidate.ConnectivityMode, "direct") { score += 10 reasons = append(reasons, "direct") } return endpoint, score, reasons } } return "", 0, nil } func rendezvousRelayCandidateScore(routeID string, routePath []string, peerIndex int, relayNodeID string, localNodeID string, endpointScore int, endpointReasons []string, relayPolicy *rendezvousRelayPolicy) (int, []string) { score := 500 + endpointScore reasons := append([]string{}, endpointReasons...) relayIndex := -1 for index, nodeID := range routePath { if nodeID == relayNodeID { relayIndex = index break } } if peerIndex >= 0 && relayIndex >= 0 { distance := absInt(peerIndex - relayIndex) switch { case distance == 1: score += 180 reasons = append(reasons, "adjacent_to_peer") case distance == 2: score += 120 reasons = append(reasons, "near_peer") default: score += maxInt(0, 80-distance*10) reasons = append(reasons, "route_path_candidate") } } if relayIndex == 0 && len(routePath) > 2 { score -= 120 reasons = append(reasons, "entry_relay_fallback") } if relayNodeID == localNodeID { score += 40 reasons = append(reasons, "local_entry_relay") } linkScore, linkReasons := rendezvousRelayLinkScore(relayNodeID, relayPolicy) score += linkScore reasons = append(reasons, linkReasons...) routeHealthScore, routeHealthReasons := rendezvousRelayRouteHealthScore(routeID, relayNodeID, relayPolicy) score += routeHealthScore reasons = append(reasons, routeHealthReasons...) return score, reasons } func rendezvousRelayLinkScore(relayNodeID string, relayPolicy *rendezvousRelayPolicy) (int, []string) { if relayPolicy == nil || relayPolicy.localNodeID == "" { return 0, nil } var latest *MeshLinkObservation for i := range relayPolicy.links { link := &relayPolicy.links[i] if link.SourceNodeID != relayPolicy.localNodeID || link.TargetNodeID != relayNodeID { continue } if !link.ObservedAt.IsZero() && relayPolicy.now.Sub(link.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge { continue } if latest == nil || link.ObservedAt.After(latest.ObservedAt) { latest = link } } if latest == nil { return 0, nil } switch latest.LinkStatus { case "reachable": score := 60 reasons := []string{"mesh_link_reachable"} if latest.QualityScore != nil { score += *latest.QualityScore reasons = append(reasons, "mesh_link_quality") } if latest.LatencyMs != nil { score += maxInt(0, 80-*latest.LatencyMs) reasons = append(reasons, "mesh_link_latency") } return score, reasons case "unreachable": return -250, []string{"mesh_link_unreachable"} default: return 0, nil } } func rendezvousRelayRouteHealthScore(routeID string, relayNodeID string, relayPolicy *rendezvousRelayPolicy) (int, []string) { if relayPolicy == nil || relayPolicy.localNodeID == "" { return 0, nil } routeID = strings.TrimSpace(routeID) relayNodeID = strings.TrimSpace(relayNodeID) if routeID == "" || relayNodeID == "" { return 0, nil } var latest *MeshLinkObservation var latestMetadata meshRouteHealthObservationMetadata for i := range relayPolicy.links { link := &relayPolicy.links[i] if link.SourceNodeID != relayPolicy.localNodeID || !meshLinkObservationFresh(*link, relayPolicy.now) { continue } metadata, ok := routeHealthMetadataFromLink(*link) if !ok || metadata.ObservationType != "synthetic_route_health" || strings.TrimSpace(metadata.RouteID) != routeID || strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID) != relayNodeID || metadata.ProductionForwarding || metadata.ProductionPayloadForwarding || metadata.RouteHealthProductionPayloadForwarding || metadata.RouteHealthServicePayloadForwarding { continue } if latest == nil || link.ObservedAt.After(latest.ObservedAt) { latest = link latestMetadata = metadata } } if latest == nil { return 0, nil } if latestMetadata.RoutePathDriftDetected { return -360, []string{"route_health_drift"} } if latest.LinkStatus == "unreachable" || strings.TrimSpace(latestMetadata.FailureReason) != "" { return -320, []string{"route_health_unreachable"} } if latest.LinkStatus != "reachable" { return 0, nil } score := 90 reasons := []string{"route_health_reachable", "route_health_no_drift"} if latest.QualityScore != nil { score += *latest.QualityScore reasons = append(reasons, "route_health_quality") } if latest.LatencyMs != nil { score += maxInt(0, 100-*latest.LatencyMs) reasons = append(reasons, "route_health_latency") } return score, reasons } func rendezvousRelayLeaseMetadata(selection rendezvousRelaySelection, replacement bool) json.RawMessage { payload := map[string]any{ "source": "control-plane", "derived_from": "endpoint_candidate", "lease_refresh_contract": "node_scoped_synthetic_config_get", "relay_replacement_contract": "stale_relay_feedback_policy", "relay_selection_score": selection.Score, "relay_selection_score_reasons": selection.Reasons, "production_payload_forwarding": false, } if replacement { payload["replacement_for_stale_relay"] = true } raw, err := json.Marshal(payload) if err != nil { return json.RawMessage(`{"source":"control-plane","derived_from":"endpoint_candidate","lease_refresh_contract":"node_scoped_synthetic_config_get","relay_replacement_contract":"stale_relay_feedback_policy","production_payload_forwarding":false}`) } return raw } func hasPolicyTag(tags []string, want string) bool { want = strings.ToLower(strings.TrimSpace(want)) for _, tag := range tags { if strings.ToLower(strings.TrimSpace(tag)) == want { return true } } return false } func maxInt(a int, b int) int { if a > b { return a } return b } func absInt(value int) int { if value < 0 { return -value } return value } func peerEndpointCandidatesRequireRendezvous(candidates []PeerEndpointCandidate) bool { for _, candidate := range candidates { if endpointCandidateRequiresRendezvous(candidate) { return true } } return false } func endpointCandidateRequiresRendezvous(candidate PeerEndpointCandidate) bool { transport := strings.ToLower(strings.TrimSpace(candidate.Transport)) reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability)) connectivityMode := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode)) return strings.Contains(transport, "relay") || strings.Contains(transport, "outbound") || reachability == "relay" || reachability == "outbound_only" || connectivityMode == "relay_required" || connectivityMode == "outbound_only" } func rendezvousLeasePriority(candidates []PeerEndpointCandidate) int { priority := 0 for _, candidate := range candidates { if !endpointCandidateRequiresRendezvous(candidate) { continue } if priority == 0 || (candidate.Priority > 0 && candidate.Priority < priority) { priority = candidate.Priority } } return priority } func rendezvousLeaseReason(candidates []PeerEndpointCandidate) string { for _, candidate := range candidates { connectivityMode := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode)) reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability)) if connectivityMode == "outbound_only" || reachability == "outbound_only" { return "auto_outbound_only" } if connectivityMode == "relay_required" || reachability == "relay" { return "auto_relay_required" } } return "auto_rendezvous_required" } func mergeRendezvousLeases(out map[string]PeerRendezvousLease, leases []PeerRendezvousLease) { for _, lease := range leases { if lease.Metadata == nil { lease.Metadata = json.RawMessage(`{}`) } key := strings.TrimSpace(lease.LeaseID) if key == "" { key = lease.PeerNodeID + "\x00" + lease.RelayNodeID + "\x00" + lease.RelayEndpoint } existing, ok := out[key] if !ok || lease.Priority < existing.Priority || existing.ExpiresAt.Before(lease.ExpiresAt) { out[key] = lease } } } func sortedRendezvousLeases(items map[string]PeerRendezvousLease, limit int) []PeerRendezvousLease { out := make([]PeerRendezvousLease, 0, len(items)) for _, item := range items { out = append(out, item) } sort.SliceStable(out, func(i, j int) bool { if out[i].Priority != out[j].Priority { return out[i].Priority < out[j].Priority } if out[i].PeerNodeID != out[j].PeerNodeID { return out[i].PeerNodeID < out[j].PeerNodeID } if out[i].RelayNodeID != out[j].RelayNodeID { return out[i].RelayNodeID < out[j].RelayNodeID } return out[i].LeaseID < out[j].LeaseID }) if len(out) > limit { out = out[:limit] } return out } func markPeerDirectoryRendezvousLeases(directory map[string]*PeerDirectoryEntry, leases []PeerRendezvousLease, localNodeID string) { for _, lease := range leases { if lease.PeerNodeID != "" && lease.PeerNodeID != localNodeID { entry := peerDirectoryEntry(directory, lease.PeerNodeID) entry.CandidateCount++ if !containsString(entry.ConnectivityModes, "relay_required") { entry.ConnectivityModes = append(entry.ConnectivityModes, "relay_required") } } if lease.RelayNodeID != "" && lease.RelayNodeID != localNodeID { entry := peerDirectoryEntry(directory, lease.RelayNodeID) entry.EndpointCount++ if !containsString(entry.ConnectivityModes, "relay_control") { entry.ConnectivityModes = append(entry.ConnectivityModes, "relay_control") } } } } func mergePeerDirectoryRoute(directory map[string]*PeerDirectoryEntry, route SyntheticMeshRouteConfig, localNodeID string) { for _, nodeID := range route.Hops { nodeID = strings.TrimSpace(nodeID) if nodeID == "" || nodeID == localNodeID { continue } entry := peerDirectoryEntry(directory, nodeID) if !containsString(entry.RouteIDs, route.RouteID) { entry.RouteIDs = append(entry.RouteIDs, route.RouteID) } } } func mergePeerDirectoryCandidates(directory map[string]*PeerDirectoryEntry, nodeID string, candidates []PeerEndpointCandidate) { entry := peerDirectoryEntry(directory, nodeID) entry.CandidateCount += len(candidates) for _, candidate := range candidates { if strings.TrimSpace(candidate.ConnectivityMode) != "" && !containsString(entry.ConnectivityModes, candidate.ConnectivityMode) { entry.ConnectivityModes = append(entry.ConnectivityModes, candidate.ConnectivityMode) } } } func peerDirectoryEntry(directory map[string]*PeerDirectoryEntry, nodeID string) *PeerDirectoryEntry { if entry, ok := directory[nodeID]; ok { return entry } entry := &PeerDirectoryEntry{NodeID: nodeID} directory[nodeID] = entry return entry } func mergeRecoverySeeds(out map[string]PeerRecoverySeed, seeds []PeerRecoverySeed) { for _, seed := range seeds { if seed.Metadata == nil { seed.Metadata = json.RawMessage(`{}`) } key := seed.NodeID + "\x00" + seed.Endpoint existing, ok := out[key] if !ok || seed.Priority < existing.Priority { out[key] = seed } } } func sortedRecoverySeeds(items map[string]PeerRecoverySeed, limit int) []PeerRecoverySeed { out := make([]PeerRecoverySeed, 0, len(items)) for _, item := range items { out = append(out, item) } sort.SliceStable(out, func(i, j int) bool { if out[i].Priority != out[j].Priority { return out[i].Priority < out[j].Priority } if out[i].NodeID != out[j].NodeID { return out[i].NodeID < out[j].NodeID } return out[i].Endpoint < out[j].Endpoint }) if len(out) > limit { out = out[:limit] } return out } func markPeerDirectoryRecoverySeeds(directory map[string]*PeerDirectoryEntry, seeds []PeerRecoverySeed) { for _, seed := range seeds { entry := peerDirectoryEntry(directory, seed.NodeID) entry.RecoverySeed = true if strings.TrimSpace(seed.ConnectivityMode) != "" && !containsString(entry.ConnectivityModes, seed.ConnectivityMode) { entry.ConnectivityModes = append(entry.ConnectivityModes, seed.ConnectivityMode) } } } func sortedPeerDirectory(items map[string]*PeerDirectoryEntry) []PeerDirectoryEntry { out := make([]PeerDirectoryEntry, 0, len(items)) for _, entry := range items { sort.Strings(entry.RouteIDs) sort.Strings(entry.ConnectivityModes) if entry.NodeID != "" { out = append(out, *entry) } } sort.SliceStable(out, func(i, j int) bool { return out[i].NodeID < out[j].NodeID }) return out } func validatePeerEndpointCandidates(candidates map[string][]PeerEndpointCandidate, routePath []string) error { if len(candidates) == 0 { return nil } for nodeID, items := range candidates { if strings.TrimSpace(nodeID) == "" || !containsString(routePath, nodeID) { return ErrInvalidPayload } for _, candidate := range items { if strings.TrimSpace(candidate.EndpointID) == "" || strings.TrimSpace(candidate.NodeID) == "" || candidate.NodeID != nodeID || strings.TrimSpace(candidate.Address) == "" || !isPeerEndpointTransport(candidate.Transport) || !isPeerEndpointReachability(candidate.Reachability) || !isPeerEndpointConnectivityMode(candidate.ConnectivityMode) || (candidate.NATType != "" && !isPeerEndpointNATType(candidate.NATType)) { return ErrInvalidPayload } if len(candidate.Metadata) > 0 && !json.Valid(candidate.Metadata) { return ErrInvalidPayload } } } return nil } func scopedPeerEndpoints(peers map[string]string, routePath []string) map[string]string { out := map[string]string{} for nodeID, endpoint := range peers { if containsString(routePath, nodeID) && strings.TrimSpace(endpoint) != "" { out[nodeID] = endpoint } } return out } func scopedPeerEndpointCandidates(candidates map[string][]PeerEndpointCandidate, routePath []string) map[string][]PeerEndpointCandidate { out := map[string][]PeerEndpointCandidate{} for nodeID, items := range candidates { if !containsString(routePath, nodeID) { continue } for _, candidate := range items { if candidate.Metadata == nil { candidate.Metadata = json.RawMessage(`{}`) } out[nodeID] = append(out[nodeID], candidate) } } return out } func isPeerEndpointTransport(value string) bool { switch value { case "direct_tcp_tls", "wss", "relay", "outbound_reverse": return true default: return false } } func isPeerRendezvousTransport(value string) bool { switch value { case "relay_control", "relay", "wss", "direct_tcp_tls": return true default: return false } } func isPeerEndpointReachability(value string) bool { switch value { case "public", "private", "relay", "outbound_only", "unknown": return true default: return false } } func isPeerEndpointConnectivityMode(value string) bool { switch value { case "direct", "relay_required", "outbound_only", "unknown": return true default: return false } } func isPeerEndpointNATType(value string) bool { switch value { case "unknown", "none", "full_cone", "restricted", "port_restricted", "symmetric", "blocked": return true default: return false } } func controlPlaneAllowedChannels(channels []string) []string { out := []string{} for _, channel := range channels { channel = strings.TrimSpace(channel) switch channel { case "fabric_control", "route_control": if !containsString(out, channel) { out = append(out, channel) } } } return out } func firstNonEmptyStringSlice(values ...[]string) []string { for _, value := range values { if len(value) > 0 { return value } } return nil } func isHTTPControlEndpoint(endpoint string) bool { endpoint = strings.ToLower(strings.TrimSpace(endpoint)) return strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://") } func firstNodeID(selector nodeSelector) string { if strings.TrimSpace(selector.NodeID) != "" { return strings.TrimSpace(selector.NodeID) } for _, nodeID := range selector.NodeIDs { if strings.TrimSpace(nodeID) != "" { return strings.TrimSpace(nodeID) } } return "" } func cleanRouteNodePath(values []string) []string { out := make([]string, 0, len(values)) for _, value := range values { value = strings.TrimSpace(value) if value != "" { out = append(out, value) } } return out } func containsString(values []string, needle string) bool { needle = strings.TrimSpace(needle) if needle == "" { return false } for _, value := range values { if strings.TrimSpace(value) == needle { return true } } return false } func generateFencingToken() (string, error) { buf := make([]byte, 32) if _, err := rand.Read(buf); err != nil { return "", err } return "rap_vpn_fence_" + hex.EncodeToString(buf), nil }