Files
rdp-proxy/backend/internal/modules/cluster/service.go
T
2026-04-28 22:29:50 +03:00

3701 lines
127 KiB
Go

package cluster
import (
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"errors"
"sort"
"strings"
"time"
"github.com/jackc/pgx/v5"
"github.com/example/remote-access-platform/backend/internal/platform/clusterauth"
)
var (
ErrAccessDenied = errors.New("platform admin role is required")
ErrInvalidPayload = errors.New("invalid cluster payload")
ErrInvalidJoinToken = errors.New("invalid or expired join token")
ErrInvalidNodeRole = errors.New("invalid node role")
ErrInvalidCluster = errors.New("cluster not found")
ErrInvalidJoinRequest = errors.New("join request not found")
ErrClusterReadOnly = errors.New("cluster is not authoritative for policy mutation")
ErrInvalidVPNConnection = errors.New("vpn connection not found")
ErrInvalidVPNLease = errors.New("vpn connection lease not found")
ErrVPNLeaseAlreadyActive = errors.New("vpn connection already has an active lease")
ErrVPNLeaseOwnerNotAllowed = errors.New("vpn lease owner is not allowed")
ErrVPNLeaseOwnerRoleRequired = errors.New("vpn lease owner requires active vpn-exit or vpn-connector role")
)
type Service struct {
store Repository
now func() time.Time
}
func NewService(store Repository) *Service {
return &Service{store: store, now: func() time.Time { return time.Now().UTC() }}
}
const (
clusterJoinTokenAuthoritySchema = "rap.cluster.join_token.v1"
clusterNodeApprovalAuthoritySchema = "rap.cluster.node_approval.v1"
clusterMeshConfigAuthoritySchema = "rap.cluster.mesh_config_snapshot.v1"
)
type clusterJoinTokenAuthorityPayload struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
TokenID string `json:"token_id"`
Scope json.RawMessage `json:"scope"`
ExpiresAt time.Time `json:"expires_at"`
MaxUses int `json:"max_uses"`
CreatedByUserID *string `json:"created_by_user_id,omitempty"`
IssuedAt time.Time `json:"issued_at"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
}
type clusterNodeApprovalAuthorityPayload struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
JoinRequestID string `json:"join_request_id"`
NodeID string `json:"node_id"`
NodeFingerprint string `json:"node_fingerprint"`
IdentityStatus string `json:"identity_status"`
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
ApprovedByUserID string `json:"approved_by_user_id"`
IssuedAt time.Time `json:"issued_at"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
}
type clusterMeshConfigAuthorityPayload struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
LocalNodeID string `json:"local_node_id"`
ConfigVersion string `json:"config_version"`
ConfigSHA256 string `json:"config_sha256"`
IssuedAt time.Time `json:"issued_at"`
ExpiresAt time.Time `json:"expires_at"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
}
func (s *Service) ListClusters(ctx context.Context, actorUserID string) ([]Cluster, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListClusters(ctx)
}
func (s *Service) GetCluster(ctx context.Context, actorUserID, clusterID string) (Cluster, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return Cluster{}, err
}
item, err := s.store.GetCluster(ctx, clusterID)
if errors.Is(err, pgx.ErrNoRows) {
return Cluster{}, ErrInvalidCluster
}
return item, err
}
func (s *Service) CreateCluster(ctx context.Context, input CreateClusterInput) (Cluster, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return Cluster{}, err
}
input.Slug = strings.TrimSpace(input.Slug)
input.Name = strings.TrimSpace(input.Name)
if input.Slug == "" || input.Name == "" {
return Cluster{}, ErrInvalidPayload
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return Cluster{}, errors.New("metadata must be valid json")
}
item, err := s.store.CreateCluster(ctx, input)
if err != nil {
return Cluster{}, err
}
auditPayload := json.RawMessage(`{}`)
if authorityKey, err := s.ensureClusterAuthority(ctx, item.ID, &input.ActorUserID); err == nil {
auditPayload, _ = json.Marshal(map[string]any{
"cluster_authority": map[string]any{
"key_algorithm": authorityKey.KeyAlgorithm,
"public_key_fingerprint": authorityKey.PublicKeyFingerprint,
},
})
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &item.ID,
ActorUserID: &input.ActorUserID,
EventType: "cluster.created",
TargetType: "cluster",
TargetID: &item.ID,
Payload: auditPayload,
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ensureClusterAuthority(ctx context.Context, clusterID string, actorUserID *string) (ClusterAuthorityKey, error) {
authorityKey, err := s.store.GetClusterAuthority(ctx, clusterID)
if errors.Is(err, pgx.ErrNoRows) {
return s.store.EnsureClusterAuthority(ctx, clusterID, actorUserID)
}
return authorityKey, err
}
func authorityDescriptor(authorityKey ClusterAuthorityKey) *ClusterAuthorityDescriptor {
descriptor := authorityKey.ClusterAuthorityDescriptor
if descriptor.SchemaVersion == "" {
descriptor.SchemaVersion = clusterauth.AuthoritySchemaVersion
}
return &descriptor
}
func (s *Service) UpdateCluster(ctx context.Context, input UpdateClusterInput) (Cluster, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return Cluster{}, err
}
if input.ClusterID == "" {
return Cluster{}, ErrInvalidCluster
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return Cluster{}, err
}
input.Name = strings.TrimSpace(input.Name)
input.Status = strings.TrimSpace(input.Status)
if input.Name == "" {
return Cluster{}, ErrInvalidPayload
}
if input.Status == "" {
input.Status = ClusterStatusActive
}
if input.Status != ClusterStatusActive && input.Status != ClusterStatusDisabled {
return Cluster{}, ErrInvalidPayload
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return Cluster{}, errors.New("metadata must be valid json")
}
item, err := s.store.UpdateCluster(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return Cluster{}, ErrInvalidCluster
}
if err != nil {
return Cluster{}, err
}
payload, _ := json.Marshal(map[string]any{
"name": item.Name,
"status": item.Status,
"region": item.Region,
})
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &item.ID,
ActorUserID: &input.ActorUserID,
EventType: "cluster.updated",
TargetType: "cluster",
TargetID: &item.ID,
Payload: payload,
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListClusterNodes(ctx context.Context, actorUserID, clusterID string) ([]ClusterNode, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListClusterNodes(ctx, clusterID)
}
func (s *Service) ListNodeGroups(ctx context.Context, actorUserID, clusterID string) ([]ClusterNodeGroup, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListNodeGroups(ctx, clusterID)
}
func (s *Service) CreateNodeGroup(ctx context.Context, input CreateNodeGroupInput) (ClusterNodeGroup, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return ClusterNodeGroup{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return ClusterNodeGroup{}, err
}
input.Name = strings.TrimSpace(input.Name)
if input.ClusterID == "" || input.Name == "" {
return ClusterNodeGroup{}, ErrInvalidPayload
}
if input.Description != nil {
trimmed := strings.TrimSpace(*input.Description)
input.Description = &trimmed
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return ClusterNodeGroup{}, errors.New("node group metadata must be valid json")
}
item, err := s.store.CreateNodeGroup(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return ClusterNodeGroup{}, ErrInvalidPayload
}
return item, err
}
func (s *Service) CreateJoinToken(ctx context.Context, input CreateJoinTokenInput) (CreatedJoinToken, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return CreatedJoinToken{}, err
}
if input.ClusterID == "" {
return CreatedJoinToken{}, ErrInvalidCluster
}
input.Scope = defaultJSON(input.Scope, `{}`)
if !json.Valid(input.Scope) {
return CreatedJoinToken{}, errors.New("scope must be valid json")
}
if input.ExpiresAt.IsZero() {
input.ExpiresAt = defaultJoinTokenExpiry(s.now())
}
if input.ExpiresAt.Before(s.now()) {
return CreatedJoinToken{}, errors.New("expires_at must be in the future")
}
if input.MaxUses <= 0 {
input.MaxUses = 1
}
rawToken, err := generateJoinToken()
if err != nil {
return CreatedJoinToken{}, err
}
tokenHash, err := hashJoinToken(rawToken)
if err != nil {
return CreatedJoinToken{}, err
}
item, err := s.store.CreateJoinToken(ctx, input, tokenHash)
if err != nil {
return CreatedJoinToken{}, err
}
item, err = s.signJoinToken(ctx, input, item)
if err != nil {
return CreatedJoinToken{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "node_join_token.created",
TargetType: "node_join_token",
TargetID: &item.ID,
Payload: json.RawMessage(`{"raw_token_returned_once":true}`),
CreatedAt: s.now(),
})
return CreatedJoinToken{NodeJoinToken: item, Token: rawToken}, nil
}
func (s *Service) signJoinToken(ctx context.Context, input CreateJoinTokenInput, item NodeJoinToken) (NodeJoinToken, error) {
authorityKey, err := s.ensureClusterAuthority(ctx, input.ClusterID, &input.ActorUserID)
if err != nil {
return NodeJoinToken{}, err
}
payload := clusterJoinTokenAuthorityPayload{
SchemaVersion: clusterJoinTokenAuthoritySchema,
ClusterID: input.ClusterID,
TokenID: item.ID,
Scope: item.Scope,
ExpiresAt: item.ExpiresAt,
MaxUses: item.MaxUses,
CreatedByUserID: item.CreatedByUserID,
IssuedAt: item.CreatedAt,
ControlPlaneOnly: true,
ProductionForwarding: false,
}
rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now())
if err != nil {
return NodeJoinToken{}, err
}
return s.store.SetJoinTokenAuthority(ctx, input.ClusterID, item.ID, rawPayload, signature)
}
func (s *Service) CreateJoinRequest(ctx context.Context, input CreateJoinRequestInput) (NodeJoinRequest, error) {
if input.ClusterID == "" {
return NodeJoinRequest{}, ErrInvalidCluster
}
if err := s.store.ExpireJoinTokens(ctx, input.ClusterID); err != nil {
return NodeJoinRequest{}, err
}
input.NodeName = strings.TrimSpace(input.NodeName)
input.NodeFingerprint = strings.TrimSpace(input.NodeFingerprint)
input.PublicKey = strings.TrimSpace(input.PublicKey)
if input.NodeName == "" || input.NodeFingerprint == "" || input.PublicKey == "" {
return NodeJoinRequest{}, ErrInvalidPayload
}
input.ReportedCapabilities = defaultJSON(input.ReportedCapabilities, `{}`)
input.ReportedFacts = defaultJSON(input.ReportedFacts, `{}`)
input.RequestedRoles = defaultJSON(input.RequestedRoles, `[]`)
if !json.Valid(input.ReportedCapabilities) || !json.Valid(input.ReportedFacts) || !json.Valid(input.RequestedRoles) {
return NodeJoinRequest{}, errors.New("reported_capabilities, reported_facts, and requested_roles must be valid json")
}
tokenHash, err := hashJoinToken(input.JoinToken)
if err != nil {
return NodeJoinRequest{}, ErrInvalidJoinToken
}
token, err := s.store.GetValidJoinTokenByHash(ctx, input.ClusterID, tokenHash)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return NodeJoinRequest{}, ErrInvalidJoinToken
}
return NodeJoinRequest{}, err
}
item, err := s.store.CreateJoinRequest(ctx, input, token.ID)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return NodeJoinRequest{}, ErrInvalidJoinToken
}
return NodeJoinRequest{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
EventType: "node_join_request.created",
TargetType: "node_join_request",
TargetID: &item.ID,
Payload: json.RawMessage(`{"source":"node_agent"}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListJoinRequests(ctx context.Context, actorUserID, clusterID string) ([]NodeJoinRequest, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListJoinRequests(ctx, clusterID)
}
func (s *Service) GetJoinRequestBootstrap(ctx context.Context, input GetJoinRequestBootstrapInput) (JoinRequestBootstrapResult, error) {
input.ClusterID = strings.TrimSpace(input.ClusterID)
input.JoinRequestID = strings.TrimSpace(input.JoinRequestID)
input.NodeFingerprint = strings.TrimSpace(input.NodeFingerprint)
input.PublicKey = strings.TrimSpace(input.PublicKey)
if input.ClusterID == "" || input.JoinRequestID == "" || input.NodeFingerprint == "" || input.PublicKey == "" {
return JoinRequestBootstrapResult{}, ErrInvalidJoinRequest
}
item, err := s.store.GetJoinRequestForBootstrap(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return JoinRequestBootstrapResult{}, ErrInvalidJoinRequest
}
if err != nil {
return JoinRequestBootstrapResult{}, err
}
result := JoinRequestBootstrapResult{Status: item.Status, JoinRequest: item}
if item.Status != JoinRequestStatusApproved {
return result, nil
}
bootstrap, updated, err := s.bootstrapForApprovedJoinRequest(ctx, item)
if err != nil {
return JoinRequestBootstrapResult{}, err
}
result.JoinRequest = updated
result.Bootstrap = &bootstrap
return result, nil
}
func (s *Service) RevokeJoinToken(ctx context.Context, input RevokeJoinTokenInput) (NodeJoinToken, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return NodeJoinToken{}, err
}
item, err := s.store.RevokeJoinToken(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return NodeJoinToken{}, ErrInvalidJoinToken
}
if err != nil {
return NodeJoinToken{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "node_join_token.revoked",
TargetType: "node_join_token",
TargetID: &input.TokenID,
Payload: json.RawMessage(`{}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ApproveJoinRequest(ctx context.Context, input ApproveJoinRequestInput) (ApprovedJoinRequest, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return ApprovedJoinRequest{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return ApprovedJoinRequest{}, err
}
if input.ClusterID == "" || input.JoinRequestID == "" {
return ApprovedJoinRequest{}, ErrInvalidJoinRequest
}
item, err := s.store.ApproveJoinRequest(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return ApprovedJoinRequest{}, ErrInvalidJoinRequest
}
if err != nil {
return ApprovedJoinRequest{}, err
}
item, err = s.signApprovedJoinRequest(ctx, input, item)
if err != nil {
return ApprovedJoinRequest{}, err
}
return item, nil
}
func (s *Service) signApprovedJoinRequest(ctx context.Context, input ApproveJoinRequestInput, item ApprovedJoinRequest) (ApprovedJoinRequest, error) {
authorityKey, err := s.ensureClusterAuthority(ctx, input.ClusterID, &input.ActorUserID)
if err != nil {
return ApprovedJoinRequest{}, err
}
if item.Bootstrap.HeartbeatEndpoint == "" {
item.Bootstrap.HeartbeatEndpoint = nodeHeartbeatEndpoint(input.ClusterID, item.Bootstrap.NodeID)
}
payload := clusterNodeApprovalAuthorityPayload{
SchemaVersion: clusterNodeApprovalAuthoritySchema,
ClusterID: input.ClusterID,
JoinRequestID: item.JoinRequest.ID,
NodeID: item.Bootstrap.NodeID,
NodeFingerprint: item.JoinRequest.NodeFingerprint,
IdentityStatus: item.Bootstrap.IdentityStatus,
HeartbeatEndpoint: item.Bootstrap.HeartbeatEndpoint,
ApprovedByUserID: input.ActorUserID,
IssuedAt: s.now(),
ControlPlaneOnly: true,
ProductionForwarding: false,
}
rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now())
if err != nil {
return ApprovedJoinRequest{}, err
}
updated, err := s.store.SetJoinRequestApprovalAuthority(ctx, input.ClusterID, item.JoinRequest.ID, rawPayload, signature)
if err != nil {
return ApprovedJoinRequest{}, err
}
item.JoinRequest = updated
item.Bootstrap.ClusterAuthority = authorityDescriptor(authorityKey)
item.Bootstrap.AuthorityPayload = rawPayload
item.Bootstrap.AuthoritySignature = &signature
return item, nil
}
func (s *Service) bootstrapForApprovedJoinRequest(ctx context.Context, item NodeJoinRequest) (NodeBootstrap, NodeJoinRequest, error) {
if item.Status != JoinRequestStatusApproved || item.ApprovedNodeID == nil || strings.TrimSpace(*item.ApprovedNodeID) == "" {
return NodeBootstrap{}, NodeJoinRequest{}, ErrInvalidJoinRequest
}
authorityKey, err := s.ensureClusterAuthority(ctx, item.ClusterID, item.ReviewedByUserID)
if err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
heartbeatEndpoint := nodeHeartbeatEndpoint(item.ClusterID, *item.ApprovedNodeID)
identityStatus := NodeRegistrationActive
if rawMessageEmpty(item.ApprovalPayload) || rawMessageEmpty(item.ApprovalSignature) {
approvedBy := "system"
if item.ReviewedByUserID != nil && strings.TrimSpace(*item.ReviewedByUserID) != "" {
approvedBy = strings.TrimSpace(*item.ReviewedByUserID)
}
payload := clusterNodeApprovalAuthorityPayload{
SchemaVersion: clusterNodeApprovalAuthoritySchema,
ClusterID: item.ClusterID,
JoinRequestID: item.ID,
NodeID: *item.ApprovedNodeID,
NodeFingerprint: item.NodeFingerprint,
IdentityStatus: identityStatus,
HeartbeatEndpoint: heartbeatEndpoint,
ApprovedByUserID: approvedBy,
IssuedAt: s.now(),
ControlPlaneOnly: true,
ProductionForwarding: false,
}
rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, s.now())
if err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
item, err = s.store.SetJoinRequestApprovalAuthority(ctx, item.ClusterID, item.ID, rawPayload, signature)
if err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
} else {
var signature ClusterSignature
if err := json.Unmarshal(item.ApprovalSignature, &signature); err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
if err := clusterauth.VerifyRaw(authorityKey.PublicKey, item.ApprovalPayload, signature); err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
}
var signature ClusterSignature
if err := json.Unmarshal(item.ApprovalSignature, &signature); err != nil {
return NodeBootstrap{}, NodeJoinRequest{}, err
}
bootstrap := NodeBootstrap{
NodeID: *item.ApprovedNodeID,
ClusterID: item.ClusterID,
IdentityStatus: identityStatus,
Certificate: map[string]any{
"status": "pending_issuer_integration",
},
HeartbeatEndpoint: heartbeatEndpoint,
ClusterAuthority: authorityDescriptor(authorityKey),
AuthorityPayload: item.ApprovalPayload,
AuthoritySignature: &signature,
}
return bootstrap, item, nil
}
func nodeHeartbeatEndpoint(clusterID, nodeID string) string {
return "/api/v1/clusters/" + clusterID + "/nodes/" + nodeID + "/heartbeats"
}
func rawMessageEmpty(raw json.RawMessage) bool {
value := strings.TrimSpace(string(raw))
return value == "" || value == "{}" || value == "null"
}
func (s *Service) RejectJoinRequest(ctx context.Context, input RejectJoinRequestInput) (NodeJoinRequest, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return NodeJoinRequest{}, err
}
input.Reason = strings.TrimSpace(input.Reason)
if input.Reason == "" {
input.Reason = "Rejected by platform administrator."
}
item, err := s.store.RejectJoinRequest(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return NodeJoinRequest{}, ErrInvalidJoinRequest
}
return item, err
}
func (s *Service) AssignNodeRole(ctx context.Context, input AssignNodeRoleInput) (NodeRoleAssignment, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return NodeRoleAssignment{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return NodeRoleAssignment{}, err
}
if !isAllowedNodeRole(input.Role) {
return NodeRoleAssignment{}, ErrInvalidNodeRole
}
if input.Status == "" {
input.Status = "active"
}
if input.Status != "active" && input.Status != "disabled" && input.Status != "revoked" {
return NodeRoleAssignment{}, ErrInvalidPayload
}
input.Policy = defaultJSON(input.Policy, `{}`)
if !json.Valid(input.Policy) {
return NodeRoleAssignment{}, errors.New("policy must be valid json")
}
item, err := s.store.AssignNodeRole(ctx, input)
if err != nil {
return NodeRoleAssignment{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "node_role." + input.Status,
TargetType: "node",
TargetID: &input.NodeID,
Payload: json.RawMessage(`{"capability_is_not_permission":true}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListNodeRoleAssignments(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeRoleAssignment, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListNodeRoleAssignments(ctx, clusterID, nodeID)
}
func (s *Service) AttachExistingNodeToCluster(ctx context.Context, input AttachExistingNodeInput) (ClusterNode, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return ClusterNode{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return ClusterNode{}, err
}
if input.ClusterID == "" || input.NodeID == "" {
return ClusterNode{}, ErrInvalidPayload
}
for _, role := range input.Roles {
if !isAllowedNodeRole(role) {
return ClusterNode{}, ErrInvalidNodeRole
}
}
item, err := s.store.AttachExistingNodeToCluster(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return ClusterNode{}, ErrInvalidPayload
}
return item, err
}
func (s *Service) AssignNodeToGroup(ctx context.Context, input AssignNodeGroupInput) (ClusterNode, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return ClusterNode{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return ClusterNode{}, err
}
if input.ClusterID == "" || input.NodeID == "" {
return ClusterNode{}, ErrInvalidPayload
}
if input.GroupID != nil {
trimmed := strings.TrimSpace(*input.GroupID)
if trimmed == "" {
input.GroupID = nil
} else {
input.GroupID = &trimmed
}
}
item, err := s.store.AssignNodeToGroup(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return ClusterNode{}, ErrInvalidPayload
}
return item, err
}
func (s *Service) RevokeNodeIdentity(ctx context.Context, input RevokeNodeIdentityInput) error {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return err
}
input.Reason = strings.TrimSpace(input.Reason)
if input.Reason == "" {
input.Reason = "revoked by platform administrator"
}
if err := s.store.RevokeNodeIdentity(ctx, input); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return ErrInvalidPayload
}
return err
}
return nil
}
func (s *Service) DisableClusterMembership(ctx context.Context, input DisableMembershipInput) error {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return err
}
input.Reason = strings.TrimSpace(input.Reason)
if input.Reason == "" {
input.Reason = "disabled by platform administrator"
}
if err := s.store.DisableClusterMembership(ctx, input); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return ErrInvalidPayload
}
return err
}
return nil
}
func (s *Service) RecordHeartbeat(ctx context.Context, input RecordHeartbeatInput) (NodeHeartbeat, error) {
if input.ClusterID == "" || input.NodeID == "" {
return NodeHeartbeat{}, ErrInvalidPayload
}
if input.HealthStatus == "" {
input.HealthStatus = "unknown"
}
input.Capabilities = defaultJSON(input.Capabilities, `{}`)
input.ServiceStates = defaultJSON(input.ServiceStates, `{}`)
input.Metadata = defaultJSON(input.Metadata, `{}`)
return s.store.RecordHeartbeat(ctx, input)
}
func (s *Service) ListNodeHeartbeats(ctx context.Context, actorUserID, clusterID, nodeID string, limit int) ([]NodeHeartbeat, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, limit)
}
func (s *Service) UpsertFabricTestingFlag(ctx context.Context, input UpsertFabricTestingFlagInput) (FabricTestingFlag, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return FabricTestingFlag{}, err
}
input.ScopeType = strings.TrimSpace(input.ScopeType)
if input.ScopeType == "" {
return FabricTestingFlag{}, ErrInvalidPayload
}
switch input.ScopeType {
case "platform":
input.ScopeID = nil
case "organization", "node":
if input.ScopeID == nil || strings.TrimSpace(*input.ScopeID) == "" {
return FabricTestingFlag{}, ErrInvalidPayload
}
default:
return FabricTestingFlag{}, ErrInvalidPayload
}
if input.HistoryRetentionHours <= 0 {
input.HistoryRetentionHours = 24
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return FabricTestingFlag{}, errors.New("testing flag metadata must be valid json")
}
item, err := s.store.UpsertFabricTestingFlag(ctx, input)
if err != nil {
return FabricTestingFlag{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "fabric.testing_flag.updated",
TargetType: input.ScopeType,
TargetID: input.ScopeID,
Payload: json.RawMessage(`{"runtime_mesh_enabled":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListFabricTestingFlags(ctx context.Context, actorUserID string) ([]FabricTestingFlag, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListFabricTestingFlags(ctx)
}
func (s *Service) GetEffectiveNodeTestingFlags(ctx context.Context, clusterID, nodeID string) (EffectiveNodeTestingFlags, error) {
if clusterID == "" || nodeID == "" {
return EffectiveNodeTestingFlags{}, ErrInvalidPayload
}
return s.store.GetEffectiveNodeTestingFlags(ctx, clusterID, nodeID)
}
func (s *Service) GetNodeSyntheticMeshConfig(ctx context.Context, input GetNodeSyntheticMeshConfigInput) (NodeSyntheticMeshConfig, error) {
input.ClusterID = strings.TrimSpace(input.ClusterID)
input.NodeID = strings.TrimSpace(input.NodeID)
if input.ClusterID == "" || input.NodeID == "" {
return NodeSyntheticMeshConfig{}, ErrInvalidPayload
}
cfg := NodeSyntheticMeshConfig{
Enabled: false,
SchemaVersion: "c17z18.synthetic.v1",
ClusterID: input.ClusterID,
LocalNodeID: input.NodeID,
AuthorityRequired: true,
ConfigVersion: "disabled",
PeerDirectoryVersion: "disabled",
PolicyVersion: "disabled",
PeerEndpoints: map[string]string{},
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{},
PeerDirectory: []PeerDirectoryEntry{},
RecoverySeeds: []PeerRecoverySeed{},
RendezvousLeases: []PeerRendezvousLease{},
Routes: []SyntheticMeshRouteConfig{},
ProductionForwarding: false,
}
flags, err := s.store.GetEffectiveNodeTestingFlags(ctx, input.ClusterID, input.NodeID)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
if !flags.Enabled || !flags.SyntheticLinksEnabled {
return s.signSyntheticMeshConfig(ctx, cfg)
}
intents, err := s.store.ListRouteIntents(ctx, input.ClusterID)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
cfg.Enabled = true
cfg.ConfigVersion = "c17z18-" + s.now().UTC().Format("20060102T150405Z")
cfg.PeerDirectoryVersion = cfg.ConfigVersion
cfg.PolicyVersion = cfg.ConfigVersion
meshLinks, err := s.store.ListMeshLinks(ctx, input.ClusterID)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
relayPolicy := newRendezvousRelayPolicy(input.NodeID, meshLinks, s.now())
peerDirectory := map[string]*PeerDirectoryEntry{}
recoverySeeds := map[string]PeerRecoverySeed{}
rendezvousLeases := map[string]PeerRendezvousLease{}
routePathDecisions := []RoutePathDecision{}
for _, intent := range intents {
route, peers, candidates, seeds, policyLeases, ok := s.syntheticRouteFromIntent(input, intent)
if !ok {
continue
}
reportedPeers, reportedCandidates, err := s.reportedEndpointConfig(ctx, input.ClusterID, input.NodeID, route.Hops)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
feedback, err := s.rendezvousRelayFeedback(ctx, input.ClusterID, route.Hops, s.now())
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
relayPolicy.addFeedback(feedback)
replacementHints, err := s.rendezvousRelayReplacementHints(ctx, input.ClusterID, route.Hops, s.now())
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
relayPolicy.addReplacementHints(replacementHints)
relayPolicy.addFeedback(replacementHintFeedback(replacementHints, s.now()))
relayPolicy.addFeedback(rendezvousRelayRouteHealthFeedback(input.NodeID, route, meshLinks, s.now()))
for nodeID, endpoint := range reportedPeers {
peers[nodeID] = endpoint
}
for nodeID, items := range reportedCandidates {
candidates[nodeID] = append(candidates[nodeID], items...)
}
routeLeases := scopedRendezvousLeases(policyLeases, route, input.NodeID, relayPolicy, s.now())
routeLeases = append(routeLeases, derivedRendezvousLeases(route, peers, candidates, input.NodeID, relayPolicy, s.now())...)
cfg.Routes = append(cfg.Routes, route)
routePathDecisions = append(routePathDecisions, routePathDecisionForRoute(route, input.NodeID, routeLeases, relayPolicy, cfg.ConfigVersion))
mergePeerDirectoryRoute(peerDirectory, route, input.NodeID)
for nodeID, endpoint := range peers {
if strings.TrimSpace(nodeID) != "" && strings.TrimSpace(endpoint) != "" {
cfg.PeerEndpoints[nodeID] = endpoint
peerDirectoryEntry(peerDirectory, nodeID).EndpointCount++
}
}
for nodeID, nodeCandidates := range candidates {
if strings.TrimSpace(nodeID) == "" || len(nodeCandidates) == 0 {
continue
}
cfg.PeerEndpointCandidates[nodeID] = append(cfg.PeerEndpointCandidates[nodeID], nodeCandidates...)
mergePeerDirectoryCandidates(peerDirectory, nodeID, nodeCandidates)
}
mergeRecoverySeeds(recoverySeeds, seeds)
mergeRendezvousLeases(rendezvousLeases, routeLeases)
}
cfg.RecoverySeeds = sortedRecoverySeeds(recoverySeeds, maxScopedRecoverySeeds)
cfg.RendezvousLeases = sortedRendezvousLeases(rendezvousLeases, maxScopedRendezvousLeases)
cfg.RendezvousRelayPolicy = relayPolicy.report()
cfg.RoutePathDecisions = routePathDecisionReport(cfg.ConfigVersion, routePathDecisions)
markPeerDirectoryRecoverySeeds(peerDirectory, cfg.RecoverySeeds)
markPeerDirectoryRendezvousLeases(peerDirectory, cfg.RendezvousLeases, input.NodeID)
cfg.PeerDirectory = sortedPeerDirectory(peerDirectory)
return s.signSyntheticMeshConfig(ctx, cfg)
}
func (s *Service) signSyntheticMeshConfig(ctx context.Context, cfg NodeSyntheticMeshConfig) (NodeSyntheticMeshConfig, error) {
authorityKey, err := s.ensureClusterAuthority(ctx, cfg.ClusterID, nil)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
cfg.AuthorityRequired = true
cfg.ClusterAuthority = authorityDescriptor(authorityKey)
unsigned := cfg
unsigned.AuthorityPayload = nil
unsigned.AuthoritySignature = nil
rawConfig, err := json.Marshal(unsigned)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
configHash, err := clusterauth.HashRaw(rawConfig)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
issuedAt := s.now().UTC()
payload := clusterMeshConfigAuthorityPayload{
SchemaVersion: clusterMeshConfigAuthoritySchema,
ClusterID: cfg.ClusterID,
LocalNodeID: cfg.LocalNodeID,
ConfigVersion: cfg.ConfigVersion,
ConfigSHA256: configHash,
IssuedAt: issuedAt,
ExpiresAt: issuedAt.Add(5 * time.Minute),
ControlPlaneOnly: true,
ProductionForwarding: false,
}
rawPayload, signature, err := clusterauth.SignPayload(authorityKey.PrivateKey, payload, issuedAt)
if err != nil {
return NodeSyntheticMeshConfig{}, err
}
cfg.AuthorityPayload = rawPayload
cfg.AuthoritySignature = &signature
return cfg, nil
}
func (s *Service) RecordNodeTelemetry(ctx context.Context, input RecordNodeTelemetryInput) (NodeTelemetryObservation, error) {
if input.ClusterID == "" || input.NodeID == "" {
return NodeTelemetryObservation{}, ErrInvalidPayload
}
input.Payload = defaultJSON(input.Payload, `{}`)
if !json.Valid(input.Payload) {
return NodeTelemetryObservation{}, errors.New("telemetry payload must be valid json")
}
if input.ObservedAt.IsZero() {
input.ObservedAt = s.now()
}
return s.store.RecordNodeTelemetry(ctx, input)
}
func (s *Service) ListNodeTelemetry(ctx context.Context, actorUserID, clusterID, nodeID string, limit int) ([]NodeTelemetryObservation, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListNodeTelemetry(ctx, clusterID, nodeID, limit)
}
func (s *Service) SetDesiredWorkload(ctx context.Context, input SetDesiredWorkloadInput) (NodeWorkloadDesiredState, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return NodeWorkloadDesiredState{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return NodeWorkloadDesiredState{}, err
}
input.ServiceType = strings.TrimSpace(input.ServiceType)
if input.ClusterID == "" || input.NodeID == "" || input.ServiceType == "" {
return NodeWorkloadDesiredState{}, ErrInvalidPayload
}
if input.DesiredState == "" {
input.DesiredState = "disabled"
}
if input.RuntimeMode == "" {
input.RuntimeMode = "container"
}
input.Config = defaultJSON(input.Config, `{}`)
input.Environment = defaultJSON(input.Environment, `{}`)
if !json.Valid(input.Config) || !json.Valid(input.Environment) {
return NodeWorkloadDesiredState{}, errors.New("config and environment must be valid json")
}
item, err := s.store.SetDesiredWorkload(ctx, input)
if err != nil {
return NodeWorkloadDesiredState{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "node_workload.desired_state_set",
TargetType: "node",
TargetID: &input.NodeID,
Payload: json.RawMessage(`{"supervision_runtime":"stub_c5"}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListDesiredWorkloads(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeWorkloadDesiredState, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
if clusterID == "" || nodeID == "" {
return nil, ErrInvalidPayload
}
return s.store.ListDesiredWorkloads(ctx, clusterID, nodeID)
}
func (s *Service) ReportWorkloadStatus(ctx context.Context, input ReportWorkloadStatusInput) (NodeWorkloadStatus, error) {
input.ServiceType = strings.TrimSpace(input.ServiceType)
if input.ClusterID == "" || input.NodeID == "" || input.ServiceType == "" {
return NodeWorkloadStatus{}, ErrInvalidPayload
}
if input.ReportedState == "" {
input.ReportedState = "unknown"
}
if input.RuntimeMode == "" {
input.RuntimeMode = "container"
}
input.StatusPayload = defaultJSON(input.StatusPayload, `{}`)
if !json.Valid(input.StatusPayload) {
return NodeWorkloadStatus{}, errors.New("status_payload must be valid json")
}
return s.store.ReportWorkloadStatus(ctx, input)
}
func (s *Service) ListLatestWorkloadStatuses(ctx context.Context, actorUserID, clusterID, nodeID string) ([]NodeWorkloadStatus, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListLatestWorkloadStatuses(ctx, clusterID, nodeID)
}
func (s *Service) ReportMeshLink(ctx context.Context, input ReportMeshLinkInput) (MeshLinkObservation, error) {
if input.ClusterID == "" || input.SourceNodeID == "" || input.TargetNodeID == "" {
return MeshLinkObservation{}, ErrInvalidPayload
}
if input.LinkStatus == "" {
input.LinkStatus = "unknown"
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return MeshLinkObservation{}, errors.New("metadata must be valid json")
}
return s.store.ReportMeshLink(ctx, input)
}
func (s *Service) ListMeshLinks(ctx context.Context, actorUserID, clusterID string) ([]MeshLinkObservation, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListMeshLinks(ctx, clusterID)
}
func (s *Service) CreateRouteIntent(ctx context.Context, input CreateRouteIntentInput) (MeshRouteIntent, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return MeshRouteIntent{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return MeshRouteIntent{}, err
}
if input.ClusterID == "" || input.ServiceClass == "" {
return MeshRouteIntent{}, ErrInvalidPayload
}
if input.Priority == 0 {
input.Priority = 100
}
input.SourceSelector = defaultJSON(input.SourceSelector, `{}`)
input.DestinationSelector = defaultJSON(input.DestinationSelector, `{}`)
input.Policy = defaultJSON(input.Policy, `{}`)
if !json.Valid(input.SourceSelector) || !json.Valid(input.DestinationSelector) || !json.Valid(input.Policy) {
return MeshRouteIntent{}, errors.New("source_selector, destination_selector, and policy must be valid json")
}
item, err := s.store.CreateRouteIntent(ctx, input)
if err != nil {
return MeshRouteIntent{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "mesh.route_intent.created",
TargetType: "mesh_route_intent",
TargetID: &item.ID,
Payload: json.RawMessage(`{"traffic_forwarding_enabled":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListRouteIntents(ctx context.Context, actorUserID, clusterID string) ([]MeshRouteIntent, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListRouteIntents(ctx, clusterID)
}
func (s *Service) ListQoSPolicies(ctx context.Context, actorUserID, clusterID string) ([]MeshQoSPolicy, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListQoSPolicies(ctx, clusterID)
}
func (s *Service) ListFabricEntryPoints(ctx context.Context, actorUserID, clusterID string) ([]FabricEntryPoint, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListFabricEntryPoints(ctx, clusterID)
}
func (s *Service) CreateFabricEntryPoint(ctx context.Context, input CreateFabricEntryPointInput) (FabricEntryPoint, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return FabricEntryPoint{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return FabricEntryPoint{}, err
}
input.Name = strings.TrimSpace(input.Name)
input.Status = strings.TrimSpace(input.Status)
input.EndpointType = strings.TrimSpace(input.EndpointType)
if input.Status == "" {
input.Status = "active"
}
if input.EndpointType == "" {
input.EndpointType = "client_access"
}
if input.ClusterID == "" || input.Name == "" || !isFabricEndpointStatus(input.Status) || !isFabricEntryPointType(input.EndpointType) {
return FabricEntryPoint{}, ErrInvalidPayload
}
if input.PublicEndpoint != nil {
trimmed := strings.TrimSpace(*input.PublicEndpoint)
if trimmed == "" {
input.PublicEndpoint = nil
} else {
input.PublicEndpoint = &trimmed
}
}
input.Policy = defaultJSON(input.Policy, `{}`)
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Policy) || !json.Valid(input.Metadata) {
return FabricEntryPoint{}, errors.New("entry point policy and metadata must be valid json")
}
item, err := s.store.CreateFabricEntryPoint(ctx, input)
if err != nil {
return FabricEntryPoint{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "fabric.entry_point.created",
TargetType: "fabric_entry_point",
TargetID: &item.ID,
Payload: json.RawMessage(`{"runtime_routing_enabled":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) SetFabricEntryPointNode(ctx context.Context, input SetFabricEntryPointNodeInput) (FabricEntryPointNode, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return FabricEntryPointNode{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return FabricEntryPointNode{}, err
}
input.Status = strings.TrimSpace(input.Status)
if input.Status == "" {
input.Status = "active"
}
if input.Priority <= 0 {
input.Priority = 100
}
if input.ClusterID == "" || input.EntryPointID == "" || input.NodeID == "" || !isFabricEndpointStatus(input.Status) {
return FabricEntryPointNode{}, ErrInvalidPayload
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return FabricEntryPointNode{}, errors.New("entry point node metadata must be valid json")
}
return s.store.SetFabricEntryPointNode(ctx, input)
}
func (s *Service) ListFabricEntryPointNodes(ctx context.Context, actorUserID, clusterID, entryPointID string) ([]FabricEntryPointNode, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
if clusterID == "" || entryPointID == "" {
return nil, ErrInvalidPayload
}
return s.store.ListFabricEntryPointNodes(ctx, clusterID, entryPointID)
}
func (s *Service) ListFabricEgressPools(ctx context.Context, actorUserID, clusterID string) ([]FabricEgressPool, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListFabricEgressPools(ctx, clusterID)
}
func (s *Service) CreateFabricEgressPool(ctx context.Context, input CreateFabricEgressPoolInput) (FabricEgressPool, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return FabricEgressPool{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return FabricEgressPool{}, err
}
input.Name = strings.TrimSpace(input.Name)
input.Status = strings.TrimSpace(input.Status)
if input.Status == "" {
input.Status = "active"
}
if input.ClusterID == "" || input.Name == "" || !isFabricEndpointStatus(input.Status) {
return FabricEgressPool{}, ErrInvalidPayload
}
if input.Description != nil {
trimmed := strings.TrimSpace(*input.Description)
if trimmed == "" {
input.Description = nil
} else {
input.Description = &trimmed
}
}
input.RouteScope = defaultJSON(input.RouteScope, `{}`)
input.Policy = defaultJSON(input.Policy, `{}`)
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.RouteScope) || !json.Valid(input.Policy) || !json.Valid(input.Metadata) {
return FabricEgressPool{}, errors.New("egress pool route_scope, policy, and metadata must be valid json")
}
item, err := s.store.CreateFabricEgressPool(ctx, input)
if err != nil {
return FabricEgressPool{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "fabric.egress_pool.created",
TargetType: "fabric_egress_pool",
TargetID: &item.ID,
Payload: json.RawMessage(`{"runtime_routing_enabled":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) SetFabricEgressPoolNode(ctx context.Context, input SetFabricEgressPoolNodeInput) (FabricEgressPoolNode, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return FabricEgressPoolNode{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return FabricEgressPoolNode{}, err
}
input.Status = strings.TrimSpace(input.Status)
if input.Status == "" {
input.Status = "active"
}
if input.Priority <= 0 {
input.Priority = 100
}
if input.ClusterID == "" || input.EgressPoolID == "" || input.NodeID == "" || !isFabricEndpointStatus(input.Status) {
return FabricEgressPoolNode{}, ErrInvalidPayload
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return FabricEgressPoolNode{}, errors.New("egress pool node metadata must be valid json")
}
return s.store.SetFabricEgressPoolNode(ctx, input)
}
func (s *Service) ListFabricEgressPoolNodes(ctx context.Context, actorUserID, clusterID, egressPoolID string) ([]FabricEgressPoolNode, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
if clusterID == "" || egressPoolID == "" {
return nil, ErrInvalidPayload
}
return s.store.ListFabricEgressPoolNodes(ctx, clusterID, egressPoolID)
}
func (s *Service) GetClusterAuthorityState(ctx context.Context, actorUserID, clusterID string) (ClusterAuthorityState, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return ClusterAuthorityState{}, err
}
return s.store.GetClusterAuthorityState(ctx, clusterID)
}
func (s *Service) UpdateClusterAuthorityState(ctx context.Context, input UpdateClusterAuthorityInput) (ClusterAuthorityState, error) {
role, err := s.store.GetPlatformRole(ctx, strings.TrimSpace(input.ActorUserID))
if err != nil {
return ClusterAuthorityState{}, err
}
if !isPlatformAdminRole(role) {
return ClusterAuthorityState{}, ErrAccessDenied
}
if input.MutationMode == "recovery_override" && role != PlatformRoleRecoveryAdmin {
return ClusterAuthorityState{}, ErrAccessDenied
}
if input.AuthorityState == "" {
input.AuthorityState = "authoritative"
}
if input.MutationMode == "" {
input.MutationMode = "normal"
}
item, err := s.store.UpdateClusterAuthorityState(ctx, input)
if err != nil {
return ClusterAuthorityState{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "cluster_authority.updated",
TargetType: "cluster",
TargetID: &input.ClusterID,
Payload: json.RawMessage(`{"split_brain_guard":true}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListClusterAdminSummaries(ctx context.Context, actorUserID string) ([]ClusterAdminSummary, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListClusterAdminSummaries(ctx)
}
func (s *Service) CreateVPNConnection(ctx context.Context, input CreateVPNConnectionInput) (VPNConnection, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnection{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return VPNConnection{}, err
}
input.Name = strings.TrimSpace(input.Name)
input.ProtocolFamily = strings.TrimSpace(input.ProtocolFamily)
if input.ProtocolFamily == "" {
input.ProtocolFamily = "generic"
}
input.Mode = strings.TrimSpace(input.Mode)
if input.Mode == "" {
input.Mode = VPNConnectionModeSingleActive
}
input.DesiredState = strings.TrimSpace(input.DesiredState)
if input.DesiredState == "" {
input.DesiredState = VPNConnectionDesiredDisabled
}
if input.ClusterID == "" || input.OrganizationID == "" || input.Name == "" {
return VPNConnection{}, ErrInvalidPayload
}
if input.Mode != VPNConnectionModeSingleActive {
return VPNConnection{}, errors.New("vpn connection mode must be single_active")
}
if !isAllowedVPNDesiredState(input.DesiredState) {
return VPNConnection{}, errors.New("vpn connection desired_state must be enabled or disabled")
}
input.TargetEndpoint = defaultJSON(input.TargetEndpoint, `{}`)
input.AllowedNodePolicy = defaultJSON(input.AllowedNodePolicy, `{"mode":"explicit","node_ids":[]}`)
input.RoutingUsage = defaultJSON(input.RoutingUsage, `[]`)
input.RoutePolicy = defaultJSON(input.RoutePolicy, `{}`)
input.QoSPolicy = defaultJSON(input.QoSPolicy, `{}`)
input.PlacementPolicy = defaultJSON(input.PlacementPolicy, `{}`)
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.TargetEndpoint) ||
!json.Valid(input.AllowedNodePolicy) ||
!json.Valid(input.RoutingUsage) ||
!json.Valid(input.RoutePolicy) ||
!json.Valid(input.QoSPolicy) ||
!json.Valid(input.PlacementPolicy) ||
!json.Valid(input.Metadata) {
return VPNConnection{}, errors.New("vpn connection json fields must be valid json")
}
item, err := s.store.CreateVPNConnection(ctx, input)
if err != nil {
return VPNConnection{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.created",
TargetType: "vpn_connection",
TargetID: &item.ID,
Payload: json.RawMessage(`{"runtime_created":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListVPNConnections(ctx context.Context, actorUserID, clusterID string) ([]VPNConnection, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListVPNConnections(ctx, clusterID)
}
func (s *Service) GetVPNConnection(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) (VPNConnection, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return VPNConnection{}, err
}
item, err := s.store.GetVPNConnection(ctx, clusterID, vpnConnectionID)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnection{}, ErrInvalidVPNConnection
}
return item, err
}
func (s *Service) UpdateVPNConnectionDesiredState(ctx context.Context, input UpdateVPNConnectionDesiredStateInput) (VPNConnection, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnection{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return VPNConnection{}, err
}
input.DesiredState = strings.TrimSpace(input.DesiredState)
if !isAllowedVPNDesiredState(input.DesiredState) {
return VPNConnection{}, errors.New("vpn connection desired_state must be enabled or disabled")
}
item, err := s.store.UpdateVPNConnectionDesiredState(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnection{}, ErrInvalidVPNConnection
}
if err != nil {
return VPNConnection{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.desired_state_changed",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"runtime_executed":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) UpsertVPNConnectionRoutePolicy(ctx context.Context, input UpsertVPNConnectionRoutePolicyInput) (VPNConnectionRoutePolicy, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionRoutePolicy{}, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return VPNConnectionRoutePolicy{}, err
}
input.RouteType = strings.TrimSpace(input.RouteType)
input.Destination = strings.TrimSpace(input.Destination)
input.Action = strings.TrimSpace(input.Action)
input.Status = strings.TrimSpace(input.Status)
if input.Action == "" {
input.Action = "allow"
}
if input.Status == "" {
input.Status = "active"
}
if input.Priority == 0 {
input.Priority = 100
}
if input.ClusterID == "" || input.VPNConnectionID == "" || input.RouteType == "" || input.Destination == "" {
return VPNConnectionRoutePolicy{}, ErrInvalidPayload
}
if !isAllowedVPNRouteType(input.RouteType) || !isAllowedVPNRouteAction(input.Action) || !isAllowedVPNPolicyStatus(input.Status) {
return VPNConnectionRoutePolicy{}, ErrInvalidPayload
}
input.Policy = defaultJSON(input.Policy, `{}`)
if !json.Valid(input.Policy) {
return VPNConnectionRoutePolicy{}, errors.New("vpn route policy json must be valid json")
}
item, err := s.store.UpsertVPNConnectionRoutePolicy(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionRoutePolicy{}, ErrInvalidVPNConnection
}
if err != nil {
return VPNConnectionRoutePolicy{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.route_policy_changed",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"routing_runtime_changed":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListVPNConnectionRoutePolicies(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) ([]VPNConnectionRoutePolicy, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListVPNConnectionRoutePolicies(ctx, clusterID, vpnConnectionID)
}
func (s *Service) SetVPNConnectionAllowedNodes(ctx context.Context, input SetVPNConnectionAllowedNodesInput) ([]VPNConnectionAllowedNode, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return nil, err
}
if err := s.ensureClusterMutable(ctx, input.ActorUserID, input.ClusterID); err != nil {
return nil, err
}
input.RolePreference = strings.TrimSpace(input.RolePreference)
if input.RolePreference == "" {
input.RolePreference = "candidate"
}
if input.ClusterID == "" || input.VPNConnectionID == "" {
return nil, ErrInvalidPayload
}
if !isAllowedVPNNodePreference(input.RolePreference) {
return nil, ErrInvalidPayload
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return nil, errors.New("allowed node metadata must be valid json")
}
nodes := make([]string, 0, len(input.NodeIDs))
seen := map[string]struct{}{}
for _, nodeID := range input.NodeIDs {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
if _, ok := seen[nodeID]; ok {
continue
}
seen[nodeID] = struct{}{}
nodes = append(nodes, nodeID)
}
input.NodeIDs = nodes
items, err := s.store.SetVPNConnectionAllowedNodes(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return nil, ErrInvalidVPNConnection
}
if err != nil {
return nil, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.allowed_nodes_changed",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"node_runtime_changed":false}`),
CreatedAt: s.now(),
})
return items, nil
}
func (s *Service) ListVPNConnectionAllowedNodes(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) ([]VPNConnectionAllowedNode, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListVPNConnectionAllowedNodes(ctx, clusterID, vpnConnectionID)
}
func (s *Service) AcquireVPNConnectionLease(ctx context.Context, input AcquireVPNConnectionLeaseInput) (VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionLease{}, err
}
if input.ClusterID == "" || input.VPNConnectionID == "" || input.OwnerNodeID == "" {
return VPNConnectionLease{}, ErrInvalidPayload
}
conn, err := s.store.GetVPNConnection(ctx, input.ClusterID, input.VPNConnectionID)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNConnection
}
if err != nil {
return VPNConnectionLease{}, err
}
if conn.Mode != VPNConnectionModeSingleActive || conn.DesiredState != VPNConnectionDesiredEnabled {
return VPNConnectionLease{}, errors.New("vpn connection must be enabled single_active before lease acquisition")
}
if err := s.ensureVPNLeaseOwnerEligible(ctx, input.ClusterID, input.VPNConnectionID, input.OwnerNodeID); err != nil {
return VPNConnectionLease{}, err
}
if input.TTL <= 0 {
input.TTL = 30 * time.Second
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return VPNConnectionLease{}, errors.New("lease metadata must be valid json")
}
token, err := generateFencingToken()
if err != nil {
return VPNConnectionLease{}, err
}
item, err := s.store.AcquireVPNConnectionLease(ctx, input, s.now().Add(input.TTL), token)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
if errors.Is(err, ErrVPNLeaseAlreadyActive) {
return VPNConnectionLease{}, ErrVPNLeaseAlreadyActive
}
if err != nil {
return VPNConnectionLease{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.lease_acquired",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"vpn_runtime_started":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) RenewVPNConnectionLease(ctx context.Context, input RenewVPNConnectionLeaseInput) (VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionLease{}, err
}
if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" || input.OwnerNodeID == "" || input.FencingToken == "" {
return VPNConnectionLease{}, ErrInvalidPayload
}
if input.TTL <= 0 {
input.TTL = 30 * time.Second
}
if err := s.ensureVPNLeaseOwnerEligible(ctx, input.ClusterID, input.VPNConnectionID, input.OwnerNodeID); err != nil {
return VPNConnectionLease{}, err
}
item, err := s.store.RenewVPNConnectionLease(ctx, input, s.now().Add(input.TTL))
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
if err != nil {
return VPNConnectionLease{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.lease_renewed",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"vpn_runtime_changed":false}`),
CreatedAt: s.now(),
})
return item, err
}
func (s *Service) ReleaseVPNConnectionLease(ctx context.Context, input ReleaseVPNConnectionLeaseInput) (VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionLease{}, err
}
if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" || input.OwnerNodeID == "" || input.FencingToken == "" {
return VPNConnectionLease{}, ErrInvalidPayload
}
item, err := s.store.ReleaseVPNConnectionLease(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
if err != nil {
return VPNConnectionLease{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.lease_released",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"vpn_runtime_stopped":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) FenceVPNConnectionLease(ctx context.Context, input FenceVPNConnectionLeaseInput) (VPNConnectionLease, error) {
if err := s.ensurePlatformRecoveryAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionLease{}, err
}
input.Reason = strings.TrimSpace(input.Reason)
if input.Reason == "" {
input.Reason = "fenced by platform recovery administrator"
}
if input.ClusterID == "" || input.VPNConnectionID == "" || input.LeaseID == "" {
return VPNConnectionLease{}, ErrInvalidPayload
}
item, err := s.store.FenceVPNConnectionLease(ctx, input)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
if err != nil {
return VPNConnectionLease{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.owner_fenced",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"split_brain_guard":true}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) GetActiveVPNConnectionLease(ctx context.Context, actorUserID, clusterID, vpnConnectionID string) (VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return VPNConnectionLease{}, err
}
item, err := s.store.GetActiveVPNConnectionLease(ctx, clusterID, vpnConnectionID)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
return item, err
}
func (s *Service) ExpireStaleVPNConnectionLeases(ctx context.Context, input ExpireStaleVPNConnectionLeasesInput) ([]VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return nil, err
}
if input.ClusterID == "" {
return nil, ErrInvalidPayload
}
items, err := s.store.ExpireStaleVPNConnectionLeases(ctx, input.ClusterID, s.now())
if err != nil {
return nil, err
}
for _, item := range items {
vpnConnectionID := item.VPNConnectionID
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "vpn_connection.lease_expired",
TargetType: "vpn_connection",
TargetID: &vpnConnectionID,
Payload: json.RawMessage(`{"stale_reclamation":true,"vpn_runtime_changed":false}`),
CreatedAt: s.now(),
})
}
return items, nil
}
func (s *Service) ListNodeVPNAssignments(ctx context.Context, clusterID, nodeID string) ([]NodeVPNAssignment, error) {
clusterID = strings.TrimSpace(clusterID)
nodeID = strings.TrimSpace(nodeID)
if clusterID == "" || nodeID == "" {
return nil, ErrInvalidPayload
}
return s.store.ListNodeVPNAssignments(ctx, clusterID, nodeID)
}
func (s *Service) ReportNodeVPNAssignmentStatus(ctx context.Context, input ReportNodeVPNAssignmentStatusInput) (NodeVPNAssignmentStatus, error) {
input.ClusterID = strings.TrimSpace(input.ClusterID)
input.NodeID = strings.TrimSpace(input.NodeID)
input.VPNConnectionID = strings.TrimSpace(input.VPNConnectionID)
input.ObservedStatus = strings.TrimSpace(input.ObservedStatus)
if input.ClusterID == "" || input.NodeID == "" || input.VPNConnectionID == "" {
return NodeVPNAssignmentStatus{}, ErrInvalidPayload
}
if input.ObservedStatus == "" {
input.ObservedStatus = VPNAssignmentStatusUnknown
}
if !isAllowedVPNAssignmentStatus(input.ObservedStatus) {
return NodeVPNAssignmentStatus{}, ErrInvalidPayload
}
input.StatusPayload = defaultJSON(input.StatusPayload, `{}`)
if !json.Valid(input.StatusPayload) {
return NodeVPNAssignmentStatus{}, errors.New("status_payload must be valid json")
}
if input.ObservedAt.IsZero() {
input.ObservedAt = s.now()
}
assignments, err := s.store.ListNodeVPNAssignments(ctx, input.ClusterID, input.NodeID)
if err != nil {
return NodeVPNAssignmentStatus{}, err
}
visible := false
for _, assignment := range assignments {
if assignment.VPNConnectionID == input.VPNConnectionID {
visible = true
break
}
}
if !visible {
return NodeVPNAssignmentStatus{}, ErrVPNLeaseOwnerNotAllowed
}
item, err := s.store.ReportNodeVPNAssignmentStatus(ctx, input)
if err != nil {
return NodeVPNAssignmentStatus{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
EventType: "vpn_connection.assignment_status_reported",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"node_agent_runtime_executed":false}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ListAuditEvents(ctx context.Context, actorUserID, clusterID string, limit int) ([]ClusterAuditEvent, error) {
if err := s.ensurePlatformAdmin(ctx, actorUserID); err != nil {
return nil, err
}
return s.store.ListAuditEvents(ctx, clusterID, limit)
}
func (s *Service) ensurePlatformAdmin(ctx context.Context, userID string) error {
userID = strings.TrimSpace(userID)
if userID == "" {
return ErrAccessDenied
}
role, err := s.store.GetPlatformRole(ctx, userID)
if err != nil {
return err
}
if !isPlatformAdminRole(role) {
return ErrAccessDenied
}
return nil
}
func (s *Service) ensurePlatformRecoveryAdmin(ctx context.Context, userID string) error {
userID = strings.TrimSpace(userID)
if userID == "" {
return ErrAccessDenied
}
role, err := s.store.GetPlatformRole(ctx, userID)
if err != nil {
return err
}
if role != PlatformRoleRecoveryAdmin {
return ErrAccessDenied
}
return nil
}
func (s *Service) ensureClusterMutable(ctx context.Context, actorUserID, clusterID string) error {
role, err := s.store.GetPlatformRole(ctx, strings.TrimSpace(actorUserID))
if err != nil {
return err
}
if role == PlatformRoleRecoveryAdmin {
return nil
}
state, err := s.store.GetClusterAuthorityState(ctx, clusterID)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return nil
}
return err
}
if state.AuthorityState != "authoritative" || state.MutationMode != "normal" {
return ErrClusterReadOnly
}
return nil
}
func (s *Service) ensureVPNLeaseOwnerEligible(ctx context.Context, clusterID, vpnConnectionID, ownerNodeID string) error {
eligibility, err := s.store.CheckVPNLeaseOwnerEligibility(ctx, clusterID, vpnConnectionID, ownerNodeID)
if errors.Is(err, pgx.ErrNoRows) {
return ErrInvalidVPNConnection
}
if err != nil {
return err
}
if eligibility.MembershipStatus != "active" || eligibility.NodeRegistrationStatus != NodeRegistrationActive {
return ErrVPNLeaseOwnerNotAllowed
}
if !eligibility.AllowedByPolicy {
return ErrVPNLeaseOwnerNotAllowed
}
if !eligibility.HasAuthorizedRole {
return ErrVPNLeaseOwnerRoleRequired
}
return nil
}
func defaultJSON(raw json.RawMessage, fallback string) json.RawMessage {
if len(raw) == 0 {
return json.RawMessage(fallback)
}
return raw
}
func isAllowedVPNDesiredState(state string) bool {
return state == VPNConnectionDesiredEnabled || state == VPNConnectionDesiredDisabled
}
func isAllowedVPNRouteType(routeType string) bool {
switch routeType {
case "cidr", "dns_suffix", "service", "resource":
return true
default:
return false
}
}
func isAllowedVPNRouteAction(action string) bool {
return action == "allow" || action == "deny"
}
func isAllowedVPNPolicyStatus(status string) bool {
return status == "active" || status == "disabled"
}
func isFabricEndpointStatus(status string) bool {
switch status {
case "active", "disabled", "maintenance":
return true
default:
return false
}
}
func isFabricEntryPointType(endpointType string) bool {
switch endpointType {
case "client_access", "admin", "api", "other":
return true
default:
return false
}
}
func isAllowedVPNNodePreference(preference string) bool {
switch preference {
case "candidate", "standby", "preferred":
return true
default:
return false
}
}
func isAllowedVPNAssignmentStatus(status string) bool {
switch status {
case VPNAssignmentStatusNotStarted,
VPNAssignmentStatusAssigned,
VPNAssignmentStatusLeaseRequired,
VPNAssignmentStatusBlocked,
VPNAssignmentStatusUnknown:
return true
default:
return false
}
}
type syntheticRoutePolicy struct {
SyntheticEnabled bool `json:"synthetic_enabled"`
PeerEndpoints map[string]string `json:"peer_endpoints"`
PeerEndpointCandidates map[string][]PeerEndpointCandidate `json:"peer_endpoint_candidates"`
RecoverySeeds []PeerRecoverySeed `json:"recovery_seeds"`
RendezvousLeases []PeerRendezvousLease `json:"rendezvous_leases"`
Hops []string `json:"hops"`
AllowedChannels []string `json:"allowed_channels"`
MaxTTL int `json:"max_ttl"`
MaxHops int `json:"max_hops"`
ExpiresAt *time.Time `json:"expires_at"`
RouteVersion string `json:"route_version"`
PolicyVersion string `json:"policy_version"`
PeerDirectoryVersion string `json:"peer_directory_version"`
}
type heartbeatMeshEndpointReport struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
PeerEndpoint string `json:"peer_endpoint"`
Transport string `json:"transport"`
ConnectivityMode string `json:"connectivity_mode"`
NATType string `json:"nat_type"`
Region string `json:"region"`
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates"`
ObservedAt *time.Time `json:"observed_at"`
}
type heartbeatRendezvousLeaseReport struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
ObservedAt string `json:"observed_at"`
Leases []heartbeatRendezvousLeaseDetails `json:"leases"`
}
type heartbeatRendezvousLeaseDetails struct {
LeaseID string `json:"lease_id"`
PeerNodeID string `json:"peer_node_id"`
RelayNodeID string `json:"relay_node_id"`
RouteIDs []string `json:"route_ids"`
StaleRelay bool `json:"stale_relay"`
WithdrawalNeeded bool `json:"withdrawal_needed"`
ReselectionNeeded bool `json:"reselection_needed"`
ConnectionState string `json:"connection_state"`
Reason string `json:"reason"`
}
type meshRouteHealthObservationMetadata struct {
ObservationType string `json:"observation_type"`
RouteID string `json:"route_id"`
RoutePathDecisionApplied bool `json:"route_path_decision_applied"`
RoutePathDecisionSelectedRelayID string `json:"route_path_decision_selected_relay_id"`
RoutePathDecisionStaleRelayNodeID string `json:"route_path_decision_stale_relay_node_id"`
RoutePathDecisionRendezvousPeerNodeID string `json:"route_path_decision_rendezvous_peer_node_id"`
RoutePathDecisionRendezvousLeaseID string `json:"route_path_decision_rendezvous_lease_id"`
RoutePathDecisionRendezvousLeaseReason string `json:"route_path_decision_rendezvous_lease_reason"`
RoutePathDecisionSource string `json:"route_path_decision_source"`
ExpectedEffectiveHops []string `json:"expected_effective_hops"`
ObservedAckPath []string `json:"observed_ack_path"`
RoutePathDriftDetected bool `json:"route_path_drift_detected"`
FailureReason string `json:"failure_reason"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
ProductionPayloadForwarding bool `json:"production_payload_forwarding"`
RouteHealthProductionPayloadForwarding bool `json:"route_health_production_payload_forwarding"`
RouteHealthServicePayloadForwarding bool `json:"route_health_service_payload_forwarding"`
}
type rendezvousRelayFeedbackEntry struct {
ReporterNodeID string
RouteIDs []string
LeaseID string
PeerNodeID string
RelayNodeID string
ConnectionState string
Reason string
WithdrawalNeeded bool
ReselectionNeeded bool
ObservedAt time.Time
}
type rendezvousRelaySelection struct {
RelayNodeID string
Endpoint string
Score int
Reasons []string
}
type rendezvousRelayPolicy struct {
localNodeID string
now time.Time
links []MeshLinkObservation
feedback []rendezvousRelayFeedbackEntry
withdrawn map[string]RendezvousRelayPolicyDecision
replacements map[string]RendezvousRelayPolicyDecision
}
const (
maxScopedRecoverySeeds = 20
maxScopedRendezvousLeases = 20
rendezvousRelayFeedbackMaxAge = 2 * time.Minute
)
type nodeSelector struct {
NodeID string `json:"node_id"`
NodeIDs []string `json:"node_ids"`
}
func (s *Service) syntheticRouteFromIntent(input GetNodeSyntheticMeshConfigInput, intent MeshRouteIntent) (SyntheticMeshRouteConfig, map[string]string, map[string][]PeerEndpointCandidate, []PeerRecoverySeed, []PeerRendezvousLease, bool) {
if intent.Status != "active" {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
var policy syntheticRoutePolicy
if err := json.Unmarshal(intent.Policy, &policy); err != nil {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
if !policy.SyntheticEnabled {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
var source nodeSelector
var destination nodeSelector
_ = json.Unmarshal(intent.SourceSelector, &source)
_ = json.Unmarshal(intent.DestinationSelector, &destination)
sourceNodeID := firstNodeID(source)
destinationNodeID := firstNodeID(destination)
hops := append([]string{}, policy.Hops...)
if len(hops) == 0 && sourceNodeID != "" && destinationNodeID != "" {
hops = []string{sourceNodeID, destinationNodeID}
}
if len(hops) < 2 || !containsString(hops, input.NodeID) {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
if err := validatePeerEndpointCandidates(policy.PeerEndpointCandidates, hops); err != nil {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
if err := validatePeerRecoverySeeds(policy.RecoverySeeds); err != nil {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
if err := validatePeerRendezvousLeases(policy.RendezvousLeases, hops, s.now()); err != nil {
return SyntheticMeshRouteConfig{}, nil, nil, nil, nil, false
}
if sourceNodeID == "" {
sourceNodeID = hops[0]
}
if destinationNodeID == "" {
destinationNodeID = hops[len(hops)-1]
}
expiresAt := s.now().UTC().Add(5 * time.Minute)
if policy.ExpiresAt != nil {
expiresAt = policy.ExpiresAt.UTC()
}
allowedChannels := policy.AllowedChannels
if len(allowedChannels) == 0 {
allowedChannels = []string{"fabric_control", "route_control"}
}
maxTTL := policy.MaxTTL
if maxTTL <= 0 {
maxTTL = 8
}
maxHops := policy.MaxHops
if maxHops <= 0 {
maxHops = 8
}
routeVersion := policy.RouteVersion
if routeVersion == "" {
routeVersion = intent.UpdatedAt.UTC().Format(time.RFC3339)
}
policyVersion := policy.PolicyVersion
if policyVersion == "" {
policyVersion = routeVersion
}
peerDirectoryVersion := policy.PeerDirectoryVersion
if peerDirectoryVersion == "" {
peerDirectoryVersion = routeVersion
}
route := SyntheticMeshRouteConfig{
RouteID: intent.ID,
ClusterID: input.ClusterID,
SourceNodeID: sourceNodeID,
DestinationNodeID: destinationNodeID,
Hops: hops,
AllowedChannels: allowedChannels,
ExpiresAt: expiresAt,
MaxTTL: maxTTL,
MaxHops: maxHops,
RouteVersion: routeVersion,
PolicyVersion: policyVersion,
PeerDirectoryVersion: peerDirectoryVersion,
}
return route,
scopedPeerEndpoints(policy.PeerEndpoints, hops),
scopedPeerEndpointCandidates(policy.PeerEndpointCandidates, hops),
policy.RecoverySeeds,
normalizeRendezvousLeases(policy.RendezvousLeases, route, s.now()),
true
}
func (s *Service) reportedEndpointConfig(ctx context.Context, clusterID string, localNodeID string, routePath []string) (map[string]string, map[string][]PeerEndpointCandidate, error) {
peers := map[string]string{}
candidates := map[string][]PeerEndpointCandidate{}
for _, nodeID := range routePath {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" || nodeID == localNodeID {
continue
}
heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1)
if err != nil {
return nil, nil, err
}
if len(heartbeats) == 0 {
continue
}
peerEndpoint, nodeCandidates, ok := endpointReportFromHeartbeat(heartbeats[0])
if !ok {
continue
}
if peerEndpoint != "" {
peers[nodeID] = peerEndpoint
}
if len(nodeCandidates) > 0 {
candidates[nodeID] = append(candidates[nodeID], nodeCandidates...)
}
}
return peers, candidates, nil
}
func endpointReportFromHeartbeat(heartbeat NodeHeartbeat) (string, []PeerEndpointCandidate, bool) {
var metadata struct {
MeshEndpointReport heartbeatMeshEndpointReport `json:"mesh_endpoint_report"`
}
if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) {
return "", nil, false
}
if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil {
return "", nil, false
}
report := metadata.MeshEndpointReport
if report.NodeID != "" && report.NodeID != heartbeat.NodeID {
return "", nil, false
}
if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID {
return "", nil, false
}
nodeID := heartbeat.NodeID
peerEndpoint := strings.TrimSpace(report.PeerEndpoint)
out := make([]PeerEndpointCandidate, 0, len(report.EndpointCandidates))
for _, candidate := range report.EndpointCandidates {
if candidate.NodeID == "" {
candidate.NodeID = nodeID
}
if candidate.EndpointID == "" {
candidate.EndpointID = nodeID + "-reported"
}
if candidate.Address == "" {
candidate.Address = peerEndpoint
}
if candidate.Transport == "" {
candidate.Transport = report.Transport
}
if candidate.ConnectivityMode == "" {
candidate.ConnectivityMode = report.ConnectivityMode
}
if candidate.NATType == "" {
candidate.NATType = report.NATType
}
if candidate.Region == "" {
candidate.Region = report.Region
}
if candidate.Reachability == "" {
candidate.Reachability = reachabilityFromConnectivityMode(candidate.ConnectivityMode)
}
if candidate.Metadata == nil {
candidate.Metadata = json.RawMessage(`{}`)
}
if candidate.NodeID != nodeID {
return "", nil, false
}
out = append(out, candidate)
}
if len(out) > 0 {
if err := validatePeerEndpointCandidates(map[string][]PeerEndpointCandidate{nodeID: out}, []string{nodeID}); err != nil {
return "", nil, false
}
}
return peerEndpoint, out, peerEndpoint != "" || len(out) > 0
}
func (s *Service) rendezvousRelayFeedback(ctx context.Context, clusterID string, routePath []string, now time.Time) ([]rendezvousRelayFeedbackEntry, error) {
out := []rendezvousRelayFeedbackEntry{}
seenNodes := map[string]struct{}{}
for _, nodeID := range routePath {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
if _, duplicate := seenNodes[nodeID]; duplicate {
continue
}
seenNodes[nodeID] = struct{}{}
heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1)
if err != nil {
return nil, err
}
if len(heartbeats) == 0 {
continue
}
out = append(out, rendezvousRelayFeedbackFromHeartbeat(heartbeats[0], now)...)
}
return out, nil
}
func rendezvousRelayFeedbackFromHeartbeat(heartbeat NodeHeartbeat, now time.Time) []rendezvousRelayFeedbackEntry {
if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) {
return nil
}
if now.IsZero() {
now = time.Now().UTC()
} else {
now = now.UTC()
}
if heartbeat.ObservedAt.IsZero() ||
heartbeat.ObservedAt.After(now.Add(time.Minute)) ||
now.Sub(heartbeat.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge {
return nil
}
var metadata struct {
MeshRendezvousLeaseReport heartbeatRendezvousLeaseReport `json:"mesh_rendezvous_lease_report"`
}
if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil {
return nil
}
report := metadata.MeshRendezvousLeaseReport
if report.NodeID != "" && report.NodeID != heartbeat.NodeID {
return nil
}
if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID {
return nil
}
out := []rendezvousRelayFeedbackEntry{}
for _, lease := range report.Leases {
if !lease.StaleRelay && !lease.WithdrawalNeeded && !lease.ReselectionNeeded {
continue
}
if strings.TrimSpace(lease.PeerNodeID) == "" || strings.TrimSpace(lease.RelayNodeID) == "" {
continue
}
out = append(out, rendezvousRelayFeedbackEntry{
ReporterNodeID: heartbeat.NodeID,
RouteIDs: append([]string{}, lease.RouteIDs...),
LeaseID: strings.TrimSpace(lease.LeaseID),
PeerNodeID: strings.TrimSpace(lease.PeerNodeID),
RelayNodeID: strings.TrimSpace(lease.RelayNodeID),
ConnectionState: strings.TrimSpace(lease.ConnectionState),
Reason: strings.TrimSpace(lease.Reason),
WithdrawalNeeded: lease.WithdrawalNeeded,
ReselectionNeeded: lease.ReselectionNeeded,
ObservedAt: heartbeat.ObservedAt.UTC(),
})
}
return out
}
func (s *Service) rendezvousRelayReplacementHints(ctx context.Context, clusterID string, routePath []string, now time.Time) ([]RendezvousRelayPolicyDecision, error) {
out := []RendezvousRelayPolicyDecision{}
seenNodes := map[string]struct{}{}
for _, nodeID := range routePath {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
if _, duplicate := seenNodes[nodeID]; duplicate {
continue
}
seenNodes[nodeID] = struct{}{}
heartbeats, err := s.store.ListNodeHeartbeats(ctx, clusterID, nodeID, 1)
if err != nil {
return nil, err
}
if len(heartbeats) == 0 {
continue
}
out = append(out, rendezvousRelayReplacementHintsFromHeartbeat(heartbeats[0], now)...)
}
return out, nil
}
func rendezvousRelayReplacementHintsFromHeartbeat(heartbeat NodeHeartbeat, now time.Time) []RendezvousRelayPolicyDecision {
if len(heartbeat.Metadata) == 0 || !json.Valid(heartbeat.Metadata) {
return nil
}
if now.IsZero() {
now = time.Now().UTC()
} else {
now = now.UTC()
}
if heartbeat.ObservedAt.IsZero() ||
heartbeat.ObservedAt.After(now.Add(time.Minute)) ||
now.Sub(heartbeat.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge {
return nil
}
var metadata struct {
MeshRoutePathDecisionReport struct {
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
Decisions []RoutePathDecision `json:"decisions"`
} `json:"mesh_route_path_decision_report"`
}
if err := json.Unmarshal(heartbeat.Metadata, &metadata); err != nil {
return nil
}
report := metadata.MeshRoutePathDecisionReport
if report.NodeID != "" && report.NodeID != heartbeat.NodeID {
return nil
}
if report.ClusterID != "" && report.ClusterID != heartbeat.ClusterID {
return nil
}
out := []RendezvousRelayPolicyDecision{}
for _, decision := range report.Decisions {
if strings.TrimSpace(decision.RouteID) == "" ||
decision.DecisionSource != "stale_relay_replacement" ||
strings.TrimSpace(decision.SelectedRelayID) == "" ||
strings.TrimSpace(decision.StaleRelayNodeID) == "" ||
decision.ProductionForwarding ||
!decision.ControlPlaneOnly ||
(!decision.ExpiresAt.IsZero() && !decision.ExpiresAt.After(now)) {
continue
}
peerNodeID := strings.TrimSpace(decision.RendezvousPeerNodeID)
if peerNodeID == "" {
peerNodeID = replacementPeerNodeIDFromDecision(decision)
}
if peerNodeID == "" {
continue
}
out = append(out, RendezvousRelayPolicyDecision{
RouteID: strings.TrimSpace(decision.RouteID),
PeerNodeID: peerNodeID,
StaleRelayNodeID: strings.TrimSpace(decision.StaleRelayNodeID),
SelectedRelayID: strings.TrimSpace(decision.SelectedRelayID),
SelectedEndpoint: strings.TrimRight(strings.TrimSpace(decision.SelectedRelayEndpoint), "/"),
Score: decision.PathScore,
Reason: "stale_relay_replacement",
ScoreReasons: append([]string{}, decision.ScoreReasons...),
ReporterNodeID: heartbeat.NodeID,
})
}
return out
}
func replacementPeerNodeIDFromDecision(decision RoutePathDecision) string {
effectiveHops := cleanRouteNodePath(decision.EffectiveHops)
selectedRelayID := strings.TrimSpace(decision.SelectedRelayID)
for index, nodeID := range effectiveHops {
if nodeID == selectedRelayID && index+1 < len(effectiveHops) {
return effectiveHops[index+1]
}
}
return strings.TrimSpace(decision.DestinationNodeID)
}
func replacementHintFeedback(hints []RendezvousRelayPolicyDecision, now time.Time) []rendezvousRelayFeedbackEntry {
if len(hints) == 0 {
return nil
}
if now.IsZero() {
now = time.Now().UTC()
} else {
now = now.UTC()
}
out := make([]rendezvousRelayFeedbackEntry, 0, len(hints))
for _, hint := range hints {
if strings.TrimSpace(hint.RouteID) == "" ||
strings.TrimSpace(hint.PeerNodeID) == "" ||
strings.TrimSpace(hint.StaleRelayNodeID) == "" ||
strings.TrimSpace(hint.SelectedRelayID) == "" {
continue
}
out = append(out, rendezvousRelayFeedbackEntry{
ReporterNodeID: strings.TrimSpace(hint.ReporterNodeID),
RouteIDs: []string{strings.TrimSpace(hint.RouteID)},
PeerNodeID: strings.TrimSpace(hint.PeerNodeID),
RelayNodeID: strings.TrimSpace(hint.StaleRelayNodeID),
ConnectionState: "replacement_hint",
Reason: "stale_relay_replacement_hint",
WithdrawalNeeded: true,
ReselectionNeeded: true,
ObservedAt: now,
})
}
return out
}
func rendezvousRelayRouteHealthFeedback(localNodeID string, route SyntheticMeshRouteConfig, links []MeshLinkObservation, now time.Time) []rendezvousRelayFeedbackEntry {
out := []rendezvousRelayFeedbackEntry{}
for _, link := range links {
item, ok := rendezvousRelayRouteHealthFeedbackFromLink(localNodeID, route, link, now)
if ok {
out = append(out, item)
}
}
return out
}
func rendezvousRelayRouteHealthFeedbackFromLink(localNodeID string, route SyntheticMeshRouteConfig, link MeshLinkObservation, now time.Time) (rendezvousRelayFeedbackEntry, bool) {
localNodeID = strings.TrimSpace(localNodeID)
if localNodeID == "" || link.SourceNodeID != localNodeID || strings.TrimSpace(route.RouteID) == "" {
return rendezvousRelayFeedbackEntry{}, false
}
if !meshLinkObservationFresh(link, now) {
return rendezvousRelayFeedbackEntry{}, false
}
metadata, ok := routeHealthMetadataFromLink(link)
if !ok ||
metadata.ObservationType != "synthetic_route_health" ||
strings.TrimSpace(metadata.RouteID) != route.RouteID ||
!metadata.RoutePathDecisionApplied ||
metadata.ProductionForwarding ||
metadata.ProductionPayloadForwarding ||
metadata.RouteHealthProductionPayloadForwarding ||
metadata.RouteHealthServicePayloadForwarding {
return rendezvousRelayFeedbackEntry{}, false
}
selectedRelayID := strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID)
if selectedRelayID == "" {
return rendezvousRelayFeedbackEntry{}, false
}
reason := ""
switch {
case metadata.RoutePathDriftDetected:
reason = "synthetic_route_health_drift"
case link.LinkStatus == "unreachable":
reason = "synthetic_route_health_unreachable"
case strings.TrimSpace(metadata.FailureReason) != "":
reason = "synthetic_route_health_failure"
default:
return rendezvousRelayFeedbackEntry{}, false
}
peerNodeID := routeHealthPeerNodeID(metadata, route, link.TargetNodeID)
if peerNodeID == "" {
return rendezvousRelayFeedbackEntry{}, false
}
return rendezvousRelayFeedbackEntry{
ReporterNodeID: link.SourceNodeID,
RouteIDs: []string{route.RouteID},
LeaseID: strings.TrimSpace(metadata.RoutePathDecisionRendezvousLeaseID),
PeerNodeID: peerNodeID,
RelayNodeID: selectedRelayID,
ConnectionState: reason,
Reason: reason,
WithdrawalNeeded: true,
ReselectionNeeded: true,
ObservedAt: link.ObservedAt.UTC(),
}, true
}
func routeHealthMetadataFromLink(link MeshLinkObservation) (meshRouteHealthObservationMetadata, bool) {
if len(link.Metadata) == 0 || !json.Valid(link.Metadata) {
return meshRouteHealthObservationMetadata{}, false
}
var metadata meshRouteHealthObservationMetadata
if err := json.Unmarshal(link.Metadata, &metadata); err != nil {
return meshRouteHealthObservationMetadata{}, false
}
return metadata, true
}
func meshLinkObservationFresh(link MeshLinkObservation, now time.Time) bool {
if now.IsZero() {
now = time.Now().UTC()
} else {
now = now.UTC()
}
return !link.ObservedAt.IsZero() &&
!link.ObservedAt.After(now.Add(time.Minute)) &&
now.Sub(link.ObservedAt.UTC()) <= rendezvousRelayFeedbackMaxAge
}
func routeHealthPeerNodeID(metadata meshRouteHealthObservationMetadata, route SyntheticMeshRouteConfig, targetNodeID string) string {
if peerNodeID := strings.TrimSpace(metadata.RoutePathDecisionRendezvousPeerNodeID); peerNodeID != "" {
return peerNodeID
}
selectedRelayID := strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID)
if peerNodeID := nodeAfterInPath(cleanRouteNodePath(metadata.ExpectedEffectiveHops), selectedRelayID); peerNodeID != "" {
return peerNodeID
}
if peerNodeID := nodeAfterInPath(cleanRouteNodePath(route.Hops), selectedRelayID); peerNodeID != "" {
return peerNodeID
}
if targetNodeID = strings.TrimSpace(targetNodeID); targetNodeID != "" {
return targetNodeID
}
return strings.TrimSpace(route.DestinationNodeID)
}
func nodeAfterInPath(path []string, nodeID string) string {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
return ""
}
for index, item := range path {
if item == nodeID && index+1 < len(path) {
return path[index+1]
}
}
return ""
}
func newRendezvousRelayPolicy(localNodeID string, links []MeshLinkObservation, now time.Time) *rendezvousRelayPolicy {
if now.IsZero() {
now = time.Now().UTC()
} else {
now = now.UTC()
}
return &rendezvousRelayPolicy{
localNodeID: strings.TrimSpace(localNodeID),
now: now,
links: append([]MeshLinkObservation{}, links...),
withdrawn: map[string]RendezvousRelayPolicyDecision{},
replacements: map[string]RendezvousRelayPolicyDecision{},
}
}
func (p *rendezvousRelayPolicy) addFeedback(items []rendezvousRelayFeedbackEntry) {
if p == nil {
return
}
p.feedback = append(p.feedback, items...)
}
func (p *rendezvousRelayPolicy) staleForLease(routeID string, lease PeerRendezvousLease) (rendezvousRelayFeedbackEntry, bool) {
if p == nil {
return rendezvousRelayFeedbackEntry{}, false
}
for _, item := range p.feedback {
if !rendezvousFeedbackAppliesToRoute(item, routeID) {
continue
}
if item.LeaseID != "" && lease.LeaseID != "" && item.LeaseID == lease.LeaseID {
return item, true
}
if item.PeerNodeID == lease.PeerNodeID && item.RelayNodeID == lease.RelayNodeID {
return item, true
}
}
return rendezvousRelayFeedbackEntry{}, false
}
func (p *rendezvousRelayPolicy) relayStale(routeID string, peerNodeID string, relayNodeID string) (rendezvousRelayFeedbackEntry, bool) {
if p == nil {
return rendezvousRelayFeedbackEntry{}, false
}
for _, item := range p.feedback {
if item.PeerNodeID == peerNodeID &&
item.RelayNodeID == relayNodeID &&
rendezvousFeedbackAppliesToRoute(item, routeID) {
return item, true
}
}
return rendezvousRelayFeedbackEntry{}, false
}
func (p *rendezvousRelayPolicy) hasStalePeer(routeID string, peerNodeID string) (rendezvousRelayFeedbackEntry, bool) {
if p == nil {
return rendezvousRelayFeedbackEntry{}, false
}
for _, item := range p.feedback {
if item.PeerNodeID == peerNodeID && rendezvousFeedbackAppliesToRoute(item, routeID) {
return item, true
}
}
return rendezvousRelayFeedbackEntry{}, false
}
func (p *rendezvousRelayPolicy) recordWithdrawal(route SyntheticMeshRouteConfig, lease PeerRendezvousLease, feedback rendezvousRelayFeedbackEntry) {
if p == nil {
return
}
key := route.RouteID + "\x00" + lease.LeaseID + "\x00" + lease.RelayNodeID
p.withdrawn[key] = RendezvousRelayPolicyDecision{
RouteID: route.RouteID,
PeerNodeID: lease.PeerNodeID,
WithdrawnLeaseID: lease.LeaseID,
StaleRelayNodeID: lease.RelayNodeID,
Reason: "stale_relay_withdrawn",
ReporterNodeID: feedback.ReporterNodeID,
}
}
func (p *rendezvousRelayPolicy) recordReplacement(route SyntheticMeshRouteConfig, peerNodeID string, feedback rendezvousRelayFeedbackEntry, selection rendezvousRelaySelection) {
if p == nil || selection.RelayNodeID == "" {
return
}
key := rendezvousRelayReplacementKey(route.RouteID, peerNodeID, feedback.RelayNodeID, selection.RelayNodeID)
p.replacements[key] = RendezvousRelayPolicyDecision{
RouteID: route.RouteID,
PeerNodeID: peerNodeID,
StaleRelayNodeID: feedback.RelayNodeID,
SelectedRelayID: selection.RelayNodeID,
SelectedEndpoint: selection.Endpoint,
Score: selection.Score,
Reason: "stale_relay_replacement",
ScoreReasons: append([]string{}, selection.Reasons...),
ReporterNodeID: feedback.ReporterNodeID,
}
}
func (p *rendezvousRelayPolicy) addReplacementHints(hints []RendezvousRelayPolicyDecision) {
if p == nil {
return
}
for _, hint := range hints {
hint.RouteID = strings.TrimSpace(hint.RouteID)
hint.PeerNodeID = strings.TrimSpace(hint.PeerNodeID)
hint.StaleRelayNodeID = strings.TrimSpace(hint.StaleRelayNodeID)
hint.SelectedRelayID = strings.TrimSpace(hint.SelectedRelayID)
hint.SelectedEndpoint = strings.TrimRight(strings.TrimSpace(hint.SelectedEndpoint), "/")
if hint.RouteID == "" || hint.PeerNodeID == "" || hint.StaleRelayNodeID == "" || hint.SelectedRelayID == "" {
continue
}
if hint.Reason == "" {
hint.Reason = "stale_relay_replacement"
}
if len(hint.ScoreReasons) == 0 {
hint.ScoreReasons = []string{"route_path_decision_hint"}
}
key := rendezvousRelayReplacementKey(hint.RouteID, hint.PeerNodeID, hint.StaleRelayNodeID, hint.SelectedRelayID)
existing, exists := p.replacements[key]
if !exists || hint.Score > existing.Score {
p.replacements[key] = hint
}
}
}
func (p *rendezvousRelayPolicy) report() *RendezvousRelayPolicyReport {
if p == nil || (len(p.feedback) == 0 && len(p.withdrawn) == 0 && len(p.replacements) == 0) {
return nil
}
decisions := make([]RendezvousRelayPolicyDecision, 0, len(p.withdrawn)+len(p.replacements))
for _, decision := range p.withdrawn {
decisions = append(decisions, decision)
}
for _, decision := range p.replacements {
decisions = append(decisions, decision)
}
sort.SliceStable(decisions, func(i, j int) bool {
if decisions[i].RouteID != decisions[j].RouteID {
return decisions[i].RouteID < decisions[j].RouteID
}
if decisions[i].PeerNodeID != decisions[j].PeerNodeID {
return decisions[i].PeerNodeID < decisions[j].PeerNodeID
}
if decisions[i].Reason != decisions[j].Reason {
return decisions[i].Reason < decisions[j].Reason
}
return decisions[i].SelectedRelayID < decisions[j].SelectedRelayID
})
return &RendezvousRelayPolicyReport{
SchemaVersion: "c17z15.rendezvous_relay_policy.v1",
ScoringMode: "route_adjacency_endpoint_priority_mesh_link_health_synthetic_route_health_feedback",
FeedbackMaxAgeSeconds: int(rendezvousRelayFeedbackMaxAge / time.Second),
StaleRelayCount: len(p.feedback),
WithdrawnLeaseCount: len(p.withdrawn),
ReplacementLeaseCount: len(p.replacements),
Decisions: decisions,
}
}
func (p *rendezvousRelayPolicy) replacementDecision(routeID string, peerNodeID string, selectedRelayID string) (RendezvousRelayPolicyDecision, bool) {
if p == nil {
return RendezvousRelayPolicyDecision{}, false
}
for _, decision := range p.replacements {
if decision.RouteID == routeID &&
decision.PeerNodeID == peerNodeID &&
decision.SelectedRelayID == selectedRelayID {
return decision, true
}
}
return RendezvousRelayPolicyDecision{}, false
}
func rendezvousRelayReplacementKey(routeID string, peerNodeID string, staleRelayNodeID string, selectedRelayID string) string {
return strings.TrimSpace(routeID) + "\x00" +
strings.TrimSpace(peerNodeID) + "\x00" +
strings.TrimSpace(staleRelayNodeID) + "\x00" +
strings.TrimSpace(selectedRelayID)
}
func routePathDecisionReport(generation string, decisions []RoutePathDecision) *RoutePathDecisionReport {
if len(decisions) == 0 {
return nil
}
out := append([]RoutePathDecision{}, decisions...)
sort.SliceStable(out, func(i, j int) bool {
if out[i].RouteID != out[j].RouteID {
return out[i].RouteID < out[j].RouteID
}
return out[i].DecisionID < out[j].DecisionID
})
replacements := 0
for _, decision := range out {
if decision.DecisionSource == "stale_relay_replacement" {
replacements++
}
}
return &RoutePathDecisionReport{
SchemaVersion: "c17z18.route_path_decisions.v1",
DecisionMode: "control_plane_effective_path_from_relay_policy",
Generation: generation,
DecisionCount: len(out),
ReplacementDecisionCount: replacements,
ControlPlaneOnly: true,
ProductionForwarding: false,
Decisions: out,
}
}
func routePathDecisionForRoute(route SyntheticMeshRouteConfig, localNodeID string, leases []PeerRendezvousLease, relayPolicy *rendezvousRelayPolicy, generation string) RoutePathDecision {
decision := RoutePathDecision{
DecisionID: route.RouteID + "-path-" + localNodeID,
RouteID: route.RouteID,
ClusterID: route.ClusterID,
LocalNodeID: localNodeID,
SourceNodeID: route.SourceNodeID,
DestinationNodeID: route.DestinationNodeID,
OriginalHops: append([]string{}, route.Hops...),
EffectiveHops: append([]string{}, route.Hops...),
DecisionSource: "route_intent",
Generation: generation,
PathScore: 1000,
ScoreReasons: []string{"route_intent_hops"},
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: route.ExpiresAt.UTC(),
}
var replacementLease PeerRendezvousLease
var replacementDecision RendezvousRelayPolicyDecision
replacementFound := false
for _, lease := range leases {
if !containsString(lease.RouteIDs, route.RouteID) {
continue
}
relayDecision, ok := relayPolicy.replacementDecision(route.RouteID, lease.PeerNodeID, lease.RelayNodeID)
if !ok && lease.Reason != "stale_relay_replacement" {
continue
}
if !ok {
relayDecision = RendezvousRelayPolicyDecision{
RouteID: route.RouteID,
PeerNodeID: lease.PeerNodeID,
SelectedRelayID: lease.RelayNodeID,
SelectedEndpoint: lease.RelayEndpoint,
Reason: "stale_relay_replacement",
}
}
if !replacementFound || relayDecision.Score > replacementDecision.Score {
replacementFound = true
replacementLease = lease
replacementDecision = relayDecision
}
}
if replacementFound {
decision.DecisionID = route.RouteID + "-path-" + localNodeID + "-via-" + replacementLease.RelayNodeID
decision.EffectiveHops = effectiveRoutePathWithReplacement(route.Hops, replacementLease.PeerNodeID, replacementDecision.StaleRelayNodeID, replacementLease.RelayNodeID)
decision.SelectedRelayID = replacementLease.RelayNodeID
decision.SelectedRelayEndpoint = replacementLease.RelayEndpoint
decision.StaleRelayNodeID = replacementDecision.StaleRelayNodeID
decision.RendezvousPeerNodeID = replacementLease.PeerNodeID
decision.RendezvousLeaseID = replacementLease.LeaseID
decision.RendezvousLeaseReason = replacementLease.Reason
decision.DecisionSource = "stale_relay_replacement"
decision.PathScore = replacementDecision.Score
if decision.PathScore == 0 {
decision.PathScore = 1000
}
decision.ScoreReasons = append([]string{}, replacementDecision.ScoreReasons...)
if len(decision.ScoreReasons) == 0 {
decision.ScoreReasons = []string{"relay_replacement_policy"}
}
}
decision.PreviousHopID, decision.NextHopID, decision.LocalRole = routePathLocalPosition(decision.EffectiveHops, localNodeID, decision.SelectedRelayID, decision.StaleRelayNodeID)
return decision
}
func effectiveRoutePathWithReplacement(original []string, peerNodeID string, staleRelayNodeID string, selectedRelayID string) []string {
out := make([]string, 0, len(original)+1)
for _, nodeID := range original {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" || (staleRelayNodeID != "" && nodeID == staleRelayNodeID) {
continue
}
out = append(out, nodeID)
}
if selectedRelayID == "" || containsString(out, selectedRelayID) {
return out
}
peerIndex := -1
for index, nodeID := range out {
if nodeID == peerNodeID {
peerIndex = index
break
}
}
if peerIndex < 0 {
return append(out, selectedRelayID)
}
out = append(out, "")
copy(out[peerIndex+1:], out[peerIndex:])
out[peerIndex] = selectedRelayID
return out
}
func routePathLocalPosition(path []string, localNodeID string, selectedRelayID string, staleRelayNodeID string) (string, string, string) {
localIndex := -1
for index, nodeID := range path {
if nodeID == localNodeID {
localIndex = index
break
}
}
if localIndex < 0 {
if staleRelayNodeID != "" && localNodeID == staleRelayNodeID {
return "", "", "withdrawn_relay"
}
return "", "", "not_on_effective_path"
}
previous := ""
next := ""
if localIndex > 0 {
previous = path[localIndex-1]
}
if localIndex < len(path)-1 {
next = path[localIndex+1]
}
role := "transit"
switch {
case localIndex == 0:
role = "entry"
case localIndex == len(path)-1:
role = "exit"
case selectedRelayID != "" && localNodeID == selectedRelayID:
role = "selected_relay"
}
return previous, next, role
}
func rendezvousFeedbackAppliesToRoute(item rendezvousRelayFeedbackEntry, routeID string) bool {
if strings.TrimSpace(routeID) == "" || len(item.RouteIDs) == 0 {
return true
}
return containsString(item.RouteIDs, routeID)
}
func reachabilityFromConnectivityMode(connectivityMode string) string {
switch connectivityMode {
case "outbound_only":
return "outbound_only"
case "relay_required":
return "relay"
case "direct":
return "public"
default:
return "unknown"
}
}
func validatePeerRecoverySeeds(seeds []PeerRecoverySeed) error {
if len(seeds) > maxScopedRecoverySeeds {
return ErrInvalidPayload
}
seen := map[string]struct{}{}
for _, seed := range seeds {
key := strings.TrimSpace(seed.NodeID) + "\x00" + strings.TrimSpace(seed.Endpoint)
if strings.TrimSpace(seed.NodeID) == "" ||
strings.TrimSpace(seed.Endpoint) == "" ||
!isPeerEndpointTransport(seed.Transport) ||
(seed.ConnectivityMode != "" && !isPeerEndpointConnectivityMode(seed.ConnectivityMode)) ||
(len(seed.Metadata) > 0 && !json.Valid(seed.Metadata)) {
return ErrInvalidPayload
}
if _, duplicate := seen[key]; duplicate {
return ErrInvalidPayload
}
seen[key] = struct{}{}
}
return nil
}
func validatePeerRendezvousLeases(leases []PeerRendezvousLease, routePath []string, now time.Time) error {
if len(leases) > maxScopedRendezvousLeases {
return ErrInvalidPayload
}
now = now.UTC()
seen := map[string]struct{}{}
for _, lease := range leases {
peerNodeID := strings.TrimSpace(lease.PeerNodeID)
relayNodeID := strings.TrimSpace(lease.RelayNodeID)
relayEndpoint := strings.TrimSpace(lease.RelayEndpoint)
transport := strings.TrimSpace(lease.Transport)
if peerNodeID == "" ||
relayNodeID == "" ||
relayEndpoint == "" ||
peerNodeID == relayNodeID ||
!containsString(routePath, peerNodeID) ||
!containsString(routePath, relayNodeID) ||
(transport != "" && !isPeerRendezvousTransport(transport)) ||
(!lease.ExpiresAt.IsZero() && !lease.ExpiresAt.After(now)) ||
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
return ErrInvalidPayload
}
if strings.TrimSpace(lease.LeaseID) == "" {
continue
}
if _, duplicate := seen[lease.LeaseID]; duplicate {
return ErrInvalidPayload
}
seen[lease.LeaseID] = struct{}{}
}
return nil
}
func normalizeRendezvousLeases(leases []PeerRendezvousLease, route SyntheticMeshRouteConfig, now time.Time) []PeerRendezvousLease {
out := make([]PeerRendezvousLease, 0, len(leases))
now = now.UTC()
for _, lease := range leases {
lease.PeerNodeID = strings.TrimSpace(lease.PeerNodeID)
lease.RelayNodeID = strings.TrimSpace(lease.RelayNodeID)
lease.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
if lease.LeaseID == "" {
lease.LeaseID = route.RouteID + "-rv-" + lease.PeerNodeID + "-via-" + lease.RelayNodeID
}
if lease.Transport == "" {
lease.Transport = "relay_control"
}
if lease.ConnectivityMode == "" {
lease.ConnectivityMode = "relay_required"
}
if lease.Priority <= 0 {
lease.Priority = 100
}
if len(lease.RouteIDs) == 0 {
lease.RouteIDs = []string{route.RouteID}
} else if !containsString(lease.RouteIDs, route.RouteID) {
lease.RouteIDs = append(append([]string{}, lease.RouteIDs...), route.RouteID)
}
lease.AllowedChannels = controlPlaneAllowedChannels(firstNonEmptyStringSlice(lease.AllowedChannels, route.AllowedChannels))
if len(lease.AllowedChannels) == 0 {
lease.AllowedChannels = []string{"fabric_control", "route_control"}
}
lease.ControlPlaneOnly = true
if lease.IssuedAt.IsZero() {
lease.IssuedAt = now
} else {
lease.IssuedAt = lease.IssuedAt.UTC()
}
if lease.ExpiresAt.IsZero() || (!route.ExpiresAt.IsZero() && lease.ExpiresAt.After(route.ExpiresAt)) {
lease.ExpiresAt = route.ExpiresAt.UTC()
} else {
lease.ExpiresAt = lease.ExpiresAt.UTC()
}
if lease.Reason == "" {
lease.Reason = "policy_rendezvous_lease"
}
if lease.Metadata == nil {
lease.Metadata = json.RawMessage(`{}`)
}
if !lease.ExpiresAt.IsZero() && lease.ExpiresAt.After(now) {
out = append(out, lease)
}
}
return out
}
func scopedRendezvousLeases(leases []PeerRendezvousLease, route SyntheticMeshRouteConfig, localNodeID string, relayPolicy *rendezvousRelayPolicy, now time.Time) []PeerRendezvousLease {
if !containsString(route.Hops, localNodeID) {
return nil
}
normalized := normalizeRendezvousLeases(leases, route, now)
out := make([]PeerRendezvousLease, 0, len(normalized))
for _, lease := range normalized {
if feedback, stale := relayPolicy.staleForLease(route.RouteID, lease); stale {
relayPolicy.recordWithdrawal(route, lease, feedback)
continue
}
if containsString(route.Hops, lease.PeerNodeID) && containsString(route.Hops, lease.RelayNodeID) {
out = append(out, lease)
}
}
return out
}
func derivedRendezvousLeases(route SyntheticMeshRouteConfig, peers map[string]string, candidates map[string][]PeerEndpointCandidate, localNodeID string, relayPolicy *rendezvousRelayPolicy, now time.Time) []PeerRendezvousLease {
if !containsString(route.Hops, localNodeID) {
return nil
}
out := []PeerRendezvousLease{}
for peerNodeID, items := range candidates {
peerNodeID = strings.TrimSpace(peerNodeID)
if peerNodeID == "" || !containsString(route.Hops, peerNodeID) || !peerEndpointCandidatesRequireRendezvous(items) {
continue
}
selection := selectRendezvousRelay(route, peerNodeID, localNodeID, peers, candidates, relayPolicy)
if selection.RelayNodeID == "" || selection.Endpoint == "" {
continue
}
_, replacement := relayPolicy.hasStalePeer(route.RouteID, peerNodeID)
reason := rendezvousLeaseReason(items)
if replacement {
reason = "stale_relay_replacement"
}
lease := PeerRendezvousLease{
LeaseID: route.RouteID + "-rv-" + peerNodeID + "-via-" + selection.RelayNodeID,
PeerNodeID: peerNodeID,
RelayNodeID: selection.RelayNodeID,
RelayEndpoint: selection.Endpoint,
Transport: "relay_control",
ConnectivityMode: "relay_required",
RouteIDs: []string{route.RouteID},
AllowedChannels: controlPlaneAllowedChannels(route.AllowedChannels),
Priority: rendezvousLeasePriority(items),
ControlPlaneOnly: true,
IssuedAt: now.UTC(),
ExpiresAt: route.ExpiresAt.UTC(),
Reason: reason,
Metadata: rendezvousRelayLeaseMetadata(selection, replacement),
}
if len(lease.AllowedChannels) == 0 {
lease.AllowedChannels = []string{"fabric_control", "route_control"}
}
if lease.Priority <= 0 {
lease.Priority = 100
}
if lease.ExpiresAt.After(now.UTC()) {
out = append(out, lease)
if feedback, ok := relayPolicy.hasStalePeer(route.RouteID, peerNodeID); ok && feedback.RelayNodeID != selection.RelayNodeID {
relayPolicy.recordReplacement(route, peerNodeID, feedback, selection)
}
}
}
return out
}
func selectRendezvousRelay(route SyntheticMeshRouteConfig, peerNodeID string, localNodeID string, peers map[string]string, candidates map[string][]PeerEndpointCandidate, relayPolicy *rendezvousRelayPolicy) rendezvousRelaySelection {
routePath := route.Hops
peerIndex := -1
for index, nodeID := range routePath {
if nodeID == peerNodeID {
peerIndex = index
break
}
}
preferred := []string{}
if peerIndex > 0 {
preferred = append(preferred, routePath[peerIndex-1])
}
if peerIndex >= 0 && peerIndex < len(routePath)-1 {
preferred = append(preferred, routePath[peerIndex+1])
}
preferred = append(preferred, routePath...)
seen := map[string]struct{}{}
relayCandidates := []rendezvousRelaySelection{}
for _, relayNodeID := range preferred {
relayNodeID = strings.TrimSpace(relayNodeID)
if relayNodeID == "" || relayNodeID == peerNodeID {
continue
}
if _, duplicate := seen[relayNodeID]; duplicate {
continue
}
seen[relayNodeID] = struct{}{}
if _, stale := relayPolicy.relayStale(route.RouteID, peerNodeID, relayNodeID); stale {
continue
}
endpoint, endpointScore, endpointReasons := relayControlEndpointForNode(relayNodeID, peers, candidates)
if endpoint == "" {
continue
}
score, scoreReasons := rendezvousRelayCandidateScore(route.RouteID, routePath, peerIndex, relayNodeID, localNodeID, endpointScore, endpointReasons, relayPolicy)
relayCandidates = append(relayCandidates, rendezvousRelaySelection{
RelayNodeID: relayNodeID,
Endpoint: endpoint,
Score: score,
Reasons: scoreReasons,
})
}
if len(relayCandidates) == 0 {
return rendezvousRelaySelection{}
}
sort.SliceStable(relayCandidates, func(i, j int) bool {
if relayCandidates[i].Score != relayCandidates[j].Score {
return relayCandidates[i].Score > relayCandidates[j].Score
}
return relayCandidates[i].RelayNodeID < relayCandidates[j].RelayNodeID
})
return relayCandidates[0]
}
func relayControlEndpointForNode(nodeID string, peers map[string]string, candidates map[string][]PeerEndpointCandidate) (string, int, []string) {
if endpoint := strings.TrimRight(strings.TrimSpace(peers[nodeID]), "/"); isHTTPControlEndpoint(endpoint) {
return endpoint, 80, []string{"reported_peer_endpoint"}
}
items := append([]PeerEndpointCandidate{}, candidates[nodeID]...)
sort.SliceStable(items, func(i, j int) bool {
if items[i].Priority != items[j].Priority {
return items[i].Priority < items[j].Priority
}
return items[i].EndpointID < items[j].EndpointID
})
for _, candidate := range items {
if endpointCandidateRequiresRendezvous(candidate) {
continue
}
endpoint := strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
if isHTTPControlEndpoint(endpoint) {
score := 70
reasons := []string{"endpoint_candidate"}
if candidate.Priority > 0 {
score += maxInt(0, 50-candidate.Priority)
}
if hasPolicyTag(candidate.PolicyTags, "fast-path") {
score += 25
reasons = append(reasons, "fast_path")
}
if hasPolicyTag(candidate.PolicyTags, "same-site") || hasPolicyTag(candidate.PolicyTags, "corp-lan") || hasPolicyTag(candidate.PolicyTags, "private-lan") {
score += 20
reasons = append(reasons, "same_site")
}
if strings.EqualFold(candidate.ConnectivityMode, "direct") {
score += 10
reasons = append(reasons, "direct")
}
return endpoint, score, reasons
}
}
return "", 0, nil
}
func rendezvousRelayCandidateScore(routeID string, routePath []string, peerIndex int, relayNodeID string, localNodeID string, endpointScore int, endpointReasons []string, relayPolicy *rendezvousRelayPolicy) (int, []string) {
score := 500 + endpointScore
reasons := append([]string{}, endpointReasons...)
relayIndex := -1
for index, nodeID := range routePath {
if nodeID == relayNodeID {
relayIndex = index
break
}
}
if peerIndex >= 0 && relayIndex >= 0 {
distance := absInt(peerIndex - relayIndex)
switch {
case distance == 1:
score += 180
reasons = append(reasons, "adjacent_to_peer")
case distance == 2:
score += 120
reasons = append(reasons, "near_peer")
default:
score += maxInt(0, 80-distance*10)
reasons = append(reasons, "route_path_candidate")
}
}
if relayIndex == 0 && len(routePath) > 2 {
score -= 120
reasons = append(reasons, "entry_relay_fallback")
}
if relayNodeID == localNodeID {
score += 40
reasons = append(reasons, "local_entry_relay")
}
linkScore, linkReasons := rendezvousRelayLinkScore(relayNodeID, relayPolicy)
score += linkScore
reasons = append(reasons, linkReasons...)
routeHealthScore, routeHealthReasons := rendezvousRelayRouteHealthScore(routeID, relayNodeID, relayPolicy)
score += routeHealthScore
reasons = append(reasons, routeHealthReasons...)
return score, reasons
}
func rendezvousRelayLinkScore(relayNodeID string, relayPolicy *rendezvousRelayPolicy) (int, []string) {
if relayPolicy == nil || relayPolicy.localNodeID == "" {
return 0, nil
}
var latest *MeshLinkObservation
for i := range relayPolicy.links {
link := &relayPolicy.links[i]
if link.SourceNodeID != relayPolicy.localNodeID || link.TargetNodeID != relayNodeID {
continue
}
if !link.ObservedAt.IsZero() && relayPolicy.now.Sub(link.ObservedAt.UTC()) > rendezvousRelayFeedbackMaxAge {
continue
}
if latest == nil || link.ObservedAt.After(latest.ObservedAt) {
latest = link
}
}
if latest == nil {
return 0, nil
}
switch latest.LinkStatus {
case "reachable":
score := 60
reasons := []string{"mesh_link_reachable"}
if latest.QualityScore != nil {
score += *latest.QualityScore
reasons = append(reasons, "mesh_link_quality")
}
if latest.LatencyMs != nil {
score += maxInt(0, 80-*latest.LatencyMs)
reasons = append(reasons, "mesh_link_latency")
}
return score, reasons
case "unreachable":
return -250, []string{"mesh_link_unreachable"}
default:
return 0, nil
}
}
func rendezvousRelayRouteHealthScore(routeID string, relayNodeID string, relayPolicy *rendezvousRelayPolicy) (int, []string) {
if relayPolicy == nil || relayPolicy.localNodeID == "" {
return 0, nil
}
routeID = strings.TrimSpace(routeID)
relayNodeID = strings.TrimSpace(relayNodeID)
if routeID == "" || relayNodeID == "" {
return 0, nil
}
var latest *MeshLinkObservation
var latestMetadata meshRouteHealthObservationMetadata
for i := range relayPolicy.links {
link := &relayPolicy.links[i]
if link.SourceNodeID != relayPolicy.localNodeID || !meshLinkObservationFresh(*link, relayPolicy.now) {
continue
}
metadata, ok := routeHealthMetadataFromLink(*link)
if !ok ||
metadata.ObservationType != "synthetic_route_health" ||
strings.TrimSpace(metadata.RouteID) != routeID ||
strings.TrimSpace(metadata.RoutePathDecisionSelectedRelayID) != relayNodeID ||
metadata.ProductionForwarding ||
metadata.ProductionPayloadForwarding ||
metadata.RouteHealthProductionPayloadForwarding ||
metadata.RouteHealthServicePayloadForwarding {
continue
}
if latest == nil || link.ObservedAt.After(latest.ObservedAt) {
latest = link
latestMetadata = metadata
}
}
if latest == nil {
return 0, nil
}
if latestMetadata.RoutePathDriftDetected {
return -360, []string{"route_health_drift"}
}
if latest.LinkStatus == "unreachable" || strings.TrimSpace(latestMetadata.FailureReason) != "" {
return -320, []string{"route_health_unreachable"}
}
if latest.LinkStatus != "reachable" {
return 0, nil
}
score := 90
reasons := []string{"route_health_reachable", "route_health_no_drift"}
if latest.QualityScore != nil {
score += *latest.QualityScore
reasons = append(reasons, "route_health_quality")
}
if latest.LatencyMs != nil {
score += maxInt(0, 100-*latest.LatencyMs)
reasons = append(reasons, "route_health_latency")
}
return score, reasons
}
func rendezvousRelayLeaseMetadata(selection rendezvousRelaySelection, replacement bool) json.RawMessage {
payload := map[string]any{
"source": "control-plane",
"derived_from": "endpoint_candidate",
"lease_refresh_contract": "node_scoped_synthetic_config_get",
"relay_replacement_contract": "stale_relay_feedback_policy",
"relay_selection_score": selection.Score,
"relay_selection_score_reasons": selection.Reasons,
"production_payload_forwarding": false,
}
if replacement {
payload["replacement_for_stale_relay"] = true
}
raw, err := json.Marshal(payload)
if err != nil {
return json.RawMessage(`{"source":"control-plane","derived_from":"endpoint_candidate","lease_refresh_contract":"node_scoped_synthetic_config_get","relay_replacement_contract":"stale_relay_feedback_policy","production_payload_forwarding":false}`)
}
return raw
}
func hasPolicyTag(tags []string, want string) bool {
want = strings.ToLower(strings.TrimSpace(want))
for _, tag := range tags {
if strings.ToLower(strings.TrimSpace(tag)) == want {
return true
}
}
return false
}
func maxInt(a int, b int) int {
if a > b {
return a
}
return b
}
func absInt(value int) int {
if value < 0 {
return -value
}
return value
}
func peerEndpointCandidatesRequireRendezvous(candidates []PeerEndpointCandidate) bool {
for _, candidate := range candidates {
if endpointCandidateRequiresRendezvous(candidate) {
return true
}
}
return false
}
func endpointCandidateRequiresRendezvous(candidate PeerEndpointCandidate) bool {
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
connectivityMode := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
return strings.Contains(transport, "relay") ||
strings.Contains(transport, "outbound") ||
reachability == "relay" ||
reachability == "outbound_only" ||
connectivityMode == "relay_required" ||
connectivityMode == "outbound_only"
}
func rendezvousLeasePriority(candidates []PeerEndpointCandidate) int {
priority := 0
for _, candidate := range candidates {
if !endpointCandidateRequiresRendezvous(candidate) {
continue
}
if priority == 0 || (candidate.Priority > 0 && candidate.Priority < priority) {
priority = candidate.Priority
}
}
return priority
}
func rendezvousLeaseReason(candidates []PeerEndpointCandidate) string {
for _, candidate := range candidates {
connectivityMode := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
if connectivityMode == "outbound_only" || reachability == "outbound_only" {
return "auto_outbound_only"
}
if connectivityMode == "relay_required" || reachability == "relay" {
return "auto_relay_required"
}
}
return "auto_rendezvous_required"
}
func mergeRendezvousLeases(out map[string]PeerRendezvousLease, leases []PeerRendezvousLease) {
for _, lease := range leases {
if lease.Metadata == nil {
lease.Metadata = json.RawMessage(`{}`)
}
key := strings.TrimSpace(lease.LeaseID)
if key == "" {
key = lease.PeerNodeID + "\x00" + lease.RelayNodeID + "\x00" + lease.RelayEndpoint
}
existing, ok := out[key]
if !ok || lease.Priority < existing.Priority || existing.ExpiresAt.Before(lease.ExpiresAt) {
out[key] = lease
}
}
}
func sortedRendezvousLeases(items map[string]PeerRendezvousLease, limit int) []PeerRendezvousLease {
out := make([]PeerRendezvousLease, 0, len(items))
for _, item := range items {
out = append(out, item)
}
sort.SliceStable(out, func(i, j int) bool {
if out[i].Priority != out[j].Priority {
return out[i].Priority < out[j].Priority
}
if out[i].PeerNodeID != out[j].PeerNodeID {
return out[i].PeerNodeID < out[j].PeerNodeID
}
if out[i].RelayNodeID != out[j].RelayNodeID {
return out[i].RelayNodeID < out[j].RelayNodeID
}
return out[i].LeaseID < out[j].LeaseID
})
if len(out) > limit {
out = out[:limit]
}
return out
}
func markPeerDirectoryRendezvousLeases(directory map[string]*PeerDirectoryEntry, leases []PeerRendezvousLease, localNodeID string) {
for _, lease := range leases {
if lease.PeerNodeID != "" && lease.PeerNodeID != localNodeID {
entry := peerDirectoryEntry(directory, lease.PeerNodeID)
entry.CandidateCount++
if !containsString(entry.ConnectivityModes, "relay_required") {
entry.ConnectivityModes = append(entry.ConnectivityModes, "relay_required")
}
}
if lease.RelayNodeID != "" && lease.RelayNodeID != localNodeID {
entry := peerDirectoryEntry(directory, lease.RelayNodeID)
entry.EndpointCount++
if !containsString(entry.ConnectivityModes, "relay_control") {
entry.ConnectivityModes = append(entry.ConnectivityModes, "relay_control")
}
}
}
}
func mergePeerDirectoryRoute(directory map[string]*PeerDirectoryEntry, route SyntheticMeshRouteConfig, localNodeID string) {
for _, nodeID := range route.Hops {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" || nodeID == localNodeID {
continue
}
entry := peerDirectoryEntry(directory, nodeID)
if !containsString(entry.RouteIDs, route.RouteID) {
entry.RouteIDs = append(entry.RouteIDs, route.RouteID)
}
}
}
func mergePeerDirectoryCandidates(directory map[string]*PeerDirectoryEntry, nodeID string, candidates []PeerEndpointCandidate) {
entry := peerDirectoryEntry(directory, nodeID)
entry.CandidateCount += len(candidates)
for _, candidate := range candidates {
if strings.TrimSpace(candidate.ConnectivityMode) != "" && !containsString(entry.ConnectivityModes, candidate.ConnectivityMode) {
entry.ConnectivityModes = append(entry.ConnectivityModes, candidate.ConnectivityMode)
}
}
}
func peerDirectoryEntry(directory map[string]*PeerDirectoryEntry, nodeID string) *PeerDirectoryEntry {
if entry, ok := directory[nodeID]; ok {
return entry
}
entry := &PeerDirectoryEntry{NodeID: nodeID}
directory[nodeID] = entry
return entry
}
func mergeRecoverySeeds(out map[string]PeerRecoverySeed, seeds []PeerRecoverySeed) {
for _, seed := range seeds {
if seed.Metadata == nil {
seed.Metadata = json.RawMessage(`{}`)
}
key := seed.NodeID + "\x00" + seed.Endpoint
existing, ok := out[key]
if !ok || seed.Priority < existing.Priority {
out[key] = seed
}
}
}
func sortedRecoverySeeds(items map[string]PeerRecoverySeed, limit int) []PeerRecoverySeed {
out := make([]PeerRecoverySeed, 0, len(items))
for _, item := range items {
out = append(out, item)
}
sort.SliceStable(out, func(i, j int) bool {
if out[i].Priority != out[j].Priority {
return out[i].Priority < out[j].Priority
}
if out[i].NodeID != out[j].NodeID {
return out[i].NodeID < out[j].NodeID
}
return out[i].Endpoint < out[j].Endpoint
})
if len(out) > limit {
out = out[:limit]
}
return out
}
func markPeerDirectoryRecoverySeeds(directory map[string]*PeerDirectoryEntry, seeds []PeerRecoverySeed) {
for _, seed := range seeds {
entry := peerDirectoryEntry(directory, seed.NodeID)
entry.RecoverySeed = true
if strings.TrimSpace(seed.ConnectivityMode) != "" && !containsString(entry.ConnectivityModes, seed.ConnectivityMode) {
entry.ConnectivityModes = append(entry.ConnectivityModes, seed.ConnectivityMode)
}
}
}
func sortedPeerDirectory(items map[string]*PeerDirectoryEntry) []PeerDirectoryEntry {
out := make([]PeerDirectoryEntry, 0, len(items))
for _, entry := range items {
sort.Strings(entry.RouteIDs)
sort.Strings(entry.ConnectivityModes)
if entry.NodeID != "" {
out = append(out, *entry)
}
}
sort.SliceStable(out, func(i, j int) bool {
return out[i].NodeID < out[j].NodeID
})
return out
}
func validatePeerEndpointCandidates(candidates map[string][]PeerEndpointCandidate, routePath []string) error {
if len(candidates) == 0 {
return nil
}
for nodeID, items := range candidates {
if strings.TrimSpace(nodeID) == "" || !containsString(routePath, nodeID) {
return ErrInvalidPayload
}
for _, candidate := range items {
if strings.TrimSpace(candidate.EndpointID) == "" ||
strings.TrimSpace(candidate.NodeID) == "" ||
candidate.NodeID != nodeID ||
strings.TrimSpace(candidate.Address) == "" ||
!isPeerEndpointTransport(candidate.Transport) ||
!isPeerEndpointReachability(candidate.Reachability) ||
!isPeerEndpointConnectivityMode(candidate.ConnectivityMode) ||
(candidate.NATType != "" && !isPeerEndpointNATType(candidate.NATType)) {
return ErrInvalidPayload
}
if len(candidate.Metadata) > 0 && !json.Valid(candidate.Metadata) {
return ErrInvalidPayload
}
}
}
return nil
}
func scopedPeerEndpoints(peers map[string]string, routePath []string) map[string]string {
out := map[string]string{}
for nodeID, endpoint := range peers {
if containsString(routePath, nodeID) && strings.TrimSpace(endpoint) != "" {
out[nodeID] = endpoint
}
}
return out
}
func scopedPeerEndpointCandidates(candidates map[string][]PeerEndpointCandidate, routePath []string) map[string][]PeerEndpointCandidate {
out := map[string][]PeerEndpointCandidate{}
for nodeID, items := range candidates {
if !containsString(routePath, nodeID) {
continue
}
for _, candidate := range items {
if candidate.Metadata == nil {
candidate.Metadata = json.RawMessage(`{}`)
}
out[nodeID] = append(out[nodeID], candidate)
}
}
return out
}
func isPeerEndpointTransport(value string) bool {
switch value {
case "direct_tcp_tls", "wss", "relay", "outbound_reverse":
return true
default:
return false
}
}
func isPeerRendezvousTransport(value string) bool {
switch value {
case "relay_control", "relay", "wss", "direct_tcp_tls":
return true
default:
return false
}
}
func isPeerEndpointReachability(value string) bool {
switch value {
case "public", "private", "relay", "outbound_only", "unknown":
return true
default:
return false
}
}
func isPeerEndpointConnectivityMode(value string) bool {
switch value {
case "direct", "relay_required", "outbound_only", "unknown":
return true
default:
return false
}
}
func isPeerEndpointNATType(value string) bool {
switch value {
case "unknown", "none", "full_cone", "restricted", "port_restricted", "symmetric", "blocked":
return true
default:
return false
}
}
func controlPlaneAllowedChannels(channels []string) []string {
out := []string{}
for _, channel := range channels {
channel = strings.TrimSpace(channel)
switch channel {
case "fabric_control", "route_control":
if !containsString(out, channel) {
out = append(out, channel)
}
}
}
return out
}
func firstNonEmptyStringSlice(values ...[]string) []string {
for _, value := range values {
if len(value) > 0 {
return value
}
}
return nil
}
func isHTTPControlEndpoint(endpoint string) bool {
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
return strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://")
}
func firstNodeID(selector nodeSelector) string {
if strings.TrimSpace(selector.NodeID) != "" {
return strings.TrimSpace(selector.NodeID)
}
for _, nodeID := range selector.NodeIDs {
if strings.TrimSpace(nodeID) != "" {
return strings.TrimSpace(nodeID)
}
}
return ""
}
func cleanRouteNodePath(values []string) []string {
out := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
out = append(out, value)
}
}
return out
}
func containsString(values []string, needle string) bool {
needle = strings.TrimSpace(needle)
if needle == "" {
return false
}
for _, value := range values {
if strings.TrimSpace(value) == needle {
return true
}
}
return false
}
func generateFencingToken() (string, error) {
buf := make([]byte, 32)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return "rap_vpn_fence_" + hex.EncodeToString(buf), nil
}