Files
rdp-proxy/backend/internal/modules/cluster/postgres_store.go
T

5579 lines
198 KiB
Go

package cluster
import (
"context"
"encoding/json"
"errors"
"fmt"
"net"
"net/url"
"strings"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgconn"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/example/remote-access-platform/backend/internal/platform/authority"
"github.com/example/remote-access-platform/backend/internal/platform/clusterauth"
"github.com/example/remote-access-platform/backend/internal/platform/secrets"
)
type PostgresStore struct {
db *pgxpool.Pool
authority *authority.Verifier
clusterKeyEncryptor *secrets.Encryptor
}
const encryptedClusterAuthorityKeyPrefix = "enc:v1:"
const nodeHeartbeatStaleIntervalSQL = "1 minute"
const meshLinkStaleIntervalSQL = "2 minutes"
func NewPostgresStore(db *pgxpool.Pool, verifiers ...*authority.Verifier) *PostgresStore {
var authorityVerifier *authority.Verifier
if len(verifiers) > 0 {
authorityVerifier = verifiers[0]
}
return &PostgresStore{db: db, authority: authorityVerifier}
}
func (s *PostgresStore) WithClusterKeyEncryptor(encryptor *secrets.Encryptor) *PostgresStore {
if s != nil {
s.clusterKeyEncryptor = encryptor
}
return s
}
func (s *PostgresStore) encodeClusterAuthorityPrivateKey(clusterID, privateKey string) (string, error) {
if s == nil || s.clusterKeyEncryptor == nil {
return privateKey, nil
}
encrypted, err := s.clusterKeyEncryptor.Encrypt([]byte(privateKey), clusterAuthorityPrivateKeyAAD(clusterID))
if err != nil {
return "", err
}
payload, err := json.Marshal(encrypted)
if err != nil {
return "", err
}
return encryptedClusterAuthorityKeyPrefix + string(payload), nil
}
func (s *PostgresStore) decodeClusterAuthorityPrivateKey(clusterID, stored string) (string, error) {
if !strings.HasPrefix(stored, encryptedClusterAuthorityKeyPrefix) {
if s != nil && s.clusterKeyEncryptor != nil {
return "", fmt.Errorf("cluster authority private key is not encrypted")
}
return stored, nil
}
if s == nil || s.clusterKeyEncryptor == nil {
return "", secrets.ErrSecretEncryptionKeyMissing
}
var encrypted secrets.EncryptedPayload
if err := json.Unmarshal([]byte(strings.TrimPrefix(stored, encryptedClusterAuthorityKeyPrefix)), &encrypted); err != nil {
return "", fmt.Errorf("decode encrypted cluster authority private key: %w", err)
}
plaintext, err := s.clusterKeyEncryptor.Decrypt(encrypted, clusterAuthorityPrivateKeyAAD(clusterID))
if err != nil {
return "", err
}
return string(plaintext), nil
}
func clusterAuthorityPrivateKeyAAD(clusterID string) []byte {
return []byte("rap-cluster-authority-v1|" + strings.TrimSpace(clusterID))
}
func stringPtrValue(value *string) string {
if value == nil {
return ""
}
return strings.TrimSpace(*value)
}
func (s *PostgresStore) GetPlatformRole(ctx context.Context, userID string) (string, error) {
return authority.EffectivePlatformRole(ctx, s.db, s.authority, userID)
}
func (s *PostgresStore) ListClusters(ctx context.Context) ([]Cluster, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, slug, name, status, region, metadata, created_at, updated_at
FROM clusters
ORDER BY created_at DESC
`)
if err != nil {
return nil, err
}
defer rows.Close()
var out []Cluster
for rows.Next() {
item, err := scanCluster(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) GetCluster(ctx context.Context, clusterID string) (Cluster, error) {
row := s.db.QueryRow(ctx, `
SELECT id::text, slug, name, status, region, metadata, created_at, updated_at
FROM clusters
WHERE id = $1::uuid
`, clusterID)
return scanCluster(row)
}
func (s *PostgresStore) CreateCluster(ctx context.Context, input CreateClusterInput) (Cluster, error) {
id := uuid.NewString()
now := time.Now().UTC()
keys, err := clusterauth.GenerateKeyPair()
if err != nil {
return Cluster{}, err
}
storedPrivateKey, err := s.encodeClusterAuthorityPrivateKey(id, keys.PrivateKeyB64)
if err != nil {
return Cluster{}, err
}
tx, err := s.db.Begin(ctx)
if err != nil {
return Cluster{}, err
}
defer func() { _ = tx.Rollback(ctx) }()
row := tx.QueryRow(ctx, `
INSERT INTO clusters (id, slug, name, status, region, metadata, created_at, updated_at)
VALUES ($1::uuid, $2, $3, 'active', $4, $5::jsonb, $6, $6)
RETURNING id::text, slug, name, status, region, metadata, created_at, updated_at
`, id, input.Slug, input.Name, input.Region, []byte(input.Metadata), now)
item, err := scanCluster(row)
if err != nil {
return Cluster{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_authority_states (cluster_id, authority_state, mutation_mode, term, notes, updated_by_user_id, updated_at)
VALUES ($1::uuid, 'authoritative', 'normal', 1, 'cluster created with authority key', $2::uuid, $3)
ON CONFLICT (cluster_id) DO NOTHING
`, id, input.ActorUserID, now); err != nil {
return Cluster{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_authorities (
cluster_id, authority_state, key_algorithm, public_key, public_key_fingerprint,
private_key, created_by_user_id, created_at, updated_at, metadata
) VALUES ($1::uuid, 'active', 'ed25519', $2, $3, $4, $5::uuid, $6, $6, $7::jsonb)
`, id, keys.PublicKeyB64, keys.Fingerprint, storedPrivateKey, input.ActorUserID, now, []byte(`{"storage":"database_signer","production_target":"external_cluster_signer_or_hsm"}`)); err != nil {
return Cluster{}, err
}
if err := tx.Commit(ctx); err != nil {
return Cluster{}, err
}
return item, nil
}
func (s *PostgresStore) UpdateCluster(ctx context.Context, input UpdateClusterInput) (Cluster, error) {
now := time.Now().UTC()
row := s.db.QueryRow(ctx, `
UPDATE clusters
SET name = $2,
status = $3,
region = $4,
metadata = $5::jsonb,
updated_at = $6
WHERE id = $1::uuid
RETURNING id::text, slug, name, status, region, metadata, created_at, updated_at
`, input.ClusterID, input.Name, input.Status, input.Region, []byte(input.Metadata), now)
return scanCluster(row)
}
func (s *PostgresStore) GetClusterAuthority(ctx context.Context, clusterID string) (ClusterAuthorityKey, error) {
row := s.db.QueryRow(ctx, `
SELECT cluster_id::text, authority_state, key_algorithm, public_key,
public_key_fingerprint, private_key, created_at, updated_at, metadata
FROM cluster_authorities
WHERE cluster_id = $1::uuid
`, clusterID)
item, err := scanClusterAuthority(row)
if err != nil {
return ClusterAuthorityKey{}, err
}
privateKey, err := s.decodeClusterAuthorityPrivateKey(item.ClusterID, item.PrivateKey)
if err != nil {
return ClusterAuthorityKey{}, err
}
item.PrivateKey = privateKey
return item, nil
}
func (s *PostgresStore) EnsureClusterAuthority(ctx context.Context, clusterID string, actorUserID *string) (ClusterAuthorityKey, error) {
keys, err := clusterauth.GenerateKeyPair()
if err != nil {
return ClusterAuthorityKey{}, err
}
storedPrivateKey, err := s.encodeClusterAuthorityPrivateKey(clusterID, keys.PrivateKeyB64)
if err != nil {
return ClusterAuthorityKey{}, err
}
now := time.Now().UTC()
_, err = s.db.Exec(ctx, `
INSERT INTO cluster_authorities (
cluster_id, authority_state, key_algorithm, public_key, public_key_fingerprint,
private_key, created_by_user_id, created_at, updated_at, metadata
)
SELECT c.id, 'active', 'ed25519', $2, $3, $4, $5::uuid, $6, $6,
'{"storage":"database_signer","created_by":"ensure_cluster_authority"}'::jsonb
FROM clusters c
WHERE c.id = $1::uuid
ON CONFLICT (cluster_id) DO NOTHING
`, clusterID, keys.PublicKeyB64, keys.Fingerprint, storedPrivateKey, actorUserID, now)
if err != nil {
return ClusterAuthorityKey{}, err
}
_, _ = s.db.Exec(ctx, `
INSERT INTO cluster_authority_states (cluster_id, authority_state, mutation_mode, term, notes, updated_by_user_id, updated_at)
VALUES ($1::uuid, 'authoritative', 'normal', 1, 'authority key ensured', $2::uuid, $3)
ON CONFLICT (cluster_id) DO NOTHING
`, clusterID, actorUserID, now)
return s.GetClusterAuthority(ctx, clusterID)
}
func (s *PostgresStore) ListClusterNodes(ctx context.Context, clusterID string) ([]ClusterNode, error) {
rows, err := s.db.Query(ctx, `
SELECT n.id::text, n.owner_organization_id::text, n.node_key, n.name, n.ownership_type,
n.registration_status,
CASE
WHEN n.registration_status = 'active'
AND COALESCE(n.last_seen_at, n.updated_at, n.created_at) < NOW() - $2::interval THEN 'offline'
ELSE n.health_status
END AS health_status,
CASE
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version = update_policy.target_version THEN 'current'
WHEN update_status.status IN ('failed', 'error') THEN 'failed'
WHEN update_status.phase = 'rollback' OR update_status.status = 'rolled_back' THEN 'rollback'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version
AND update_status.target_version = update_policy.target_version
AND update_status.phase IN ('planned', 'download', 'apply', 'health_check')
AND update_status.status IN ('accepted', 'started', 'staged', 'running') THEN 'updating'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version THEN 'outdated'
ELSE n.version_state
END AS version_state,
n.partition_state,
n.reported_version, n.last_seen_at, cm.membership_status, cm.metadata,
ng.id::text, ng.name,
n.created_at, n.updated_at
FROM cluster_memberships cm
JOIN nodes n ON n.id = cm.node_id
LEFT JOIN cluster_node_group_memberships ngm ON ngm.cluster_id = cm.cluster_id AND ngm.node_id = cm.node_id
LEFT JOIN cluster_node_groups ng ON ng.cluster_id = ngm.cluster_id AND ng.id = ngm.group_id
LEFT JOIN LATERAL (
SELECT p.enabled, p.target_version
FROM node_update_desired_policies p
WHERE p.cluster_id = cm.cluster_id
AND p.node_id = cm.node_id
AND p.product = 'rap-node-agent'
AND p.enabled
ORDER BY p.updated_at DESC
LIMIT 1
) update_policy ON true
LEFT JOIN LATERAL (
SELECT s.target_version, s.phase, s.status
FROM node_update_status_reports s
WHERE s.cluster_id = cm.cluster_id
AND s.node_id = cm.node_id
AND s.product = 'rap-node-agent'
ORDER BY s.observed_at DESC
LIMIT 1
) update_status ON true
WHERE cm.cluster_id = $1::uuid
ORDER BY n.created_at DESC
`, clusterID, nodeHeartbeatStaleIntervalSQL)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ClusterNode
for rows.Next() {
item, err := scanClusterNode(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ListNodeGroups(ctx context.Context, clusterID string) ([]ClusterNodeGroup, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, parent_group_id::text, name, description,
sort_order, metadata, created_by_user_id::text, created_at, updated_at
FROM cluster_node_groups
WHERE cluster_id = $1::uuid
ORDER BY sort_order, name
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ClusterNodeGroup
for rows.Next() {
item, err := scanNodeGroup(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []ClusterNodeGroup{}
}
return out, rows.Err()
}
func (s *PostgresStore) CreateNodeGroup(ctx context.Context, input CreateNodeGroupInput) (ClusterNodeGroup, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
WITH parent_ok AS (
SELECT $3::uuid AS parent_group_id
WHERE $3::uuid IS NULL
OR EXISTS (
SELECT 1
FROM cluster_node_groups parent
WHERE parent.cluster_id = $2::uuid
AND parent.id = $3::uuid
)
)
INSERT INTO cluster_node_groups (
id, cluster_id, parent_group_id, name, description, sort_order, metadata, created_by_user_id, created_at, updated_at
)
SELECT $1::uuid, $2::uuid, parent_group_id, $4, $5, $6, $7::jsonb, $8::uuid, NOW(), NOW()
FROM parent_ok
RETURNING id::text, cluster_id::text, parent_group_id::text, name, description,
sort_order, metadata, created_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, input.ParentGroupID, input.Name, input.Description, input.SortOrder, []byte(input.Metadata), input.ActorUserID)
item, err := scanNodeGroup(row)
if err != nil {
return ClusterNodeGroup{}, err
}
_ = s.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "cluster_node_group.created",
TargetType: "cluster_node_group",
TargetID: &item.ID,
Payload: json.RawMessage(`{"hierarchical":true}`),
CreatedAt: time.Now().UTC(),
})
return item, nil
}
func (s *PostgresStore) CreateJoinToken(ctx context.Context, input CreateJoinTokenInput, tokenHash string) (NodeJoinToken, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO node_join_tokens (
id, cluster_id, token_hash, scope, expires_at, max_uses, used_count, status, created_by_user_id, created_at
) VALUES ($1::uuid, $2::uuid, $3, $4::jsonb, $5, $6, 0, 'active', $7::uuid, NOW())
RETURNING id::text, cluster_id::text, scope, expires_at, max_uses, used_count, status,
created_by_user_id::text, created_at, revoked_at, authority_payload, authority_signature
`, id, input.ClusterID, tokenHash, []byte(input.Scope), input.ExpiresAt, input.MaxUses, input.ActorUserID)
return scanJoinToken(row)
}
func (s *PostgresStore) ListJoinTokens(ctx context.Context, clusterID string) ([]NodeJoinToken, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, scope, expires_at, max_uses, used_count, status,
created_by_user_id::text, created_at, revoked_at, authority_payload, authority_signature
FROM node_join_tokens
WHERE cluster_id = $1::uuid
ORDER BY created_at DESC
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeJoinToken
for rows.Next() {
item, err := scanJoinToken(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []NodeJoinToken{}
}
return out, rows.Err()
}
func (s *PostgresStore) SetJoinTokenAuthority(ctx context.Context, clusterID, tokenID string, payload json.RawMessage, signature ClusterSignature) (NodeJoinToken, error) {
signatureJSON, err := json.Marshal(signature)
if err != nil {
return NodeJoinToken{}, err
}
row := s.db.QueryRow(ctx, `
UPDATE node_join_tokens
SET authority_payload = $3::jsonb,
authority_signature = $4::jsonb
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, scope, expires_at, max_uses, used_count, status,
created_by_user_id::text, created_at, revoked_at, authority_payload, authority_signature
`, clusterID, tokenID, []byte(payload), signatureJSON)
return scanJoinToken(row)
}
func (s *PostgresStore) GetValidJoinTokenByHash(ctx context.Context, clusterID, tokenHash string) (NodeJoinToken, error) {
row := s.db.QueryRow(ctx, `
SELECT id::text, cluster_id::text, scope, expires_at, max_uses, used_count, status,
created_by_user_id::text, created_at, revoked_at, authority_payload, authority_signature
FROM node_join_tokens
WHERE cluster_id = $1::uuid
AND token_hash = $2
AND status = 'active'
AND expires_at > NOW()
AND used_count < max_uses
`, clusterID, tokenHash)
return scanJoinToken(row)
}
func (s *PostgresStore) RevokeJoinToken(ctx context.Context, input RevokeJoinTokenInput) (NodeJoinToken, error) {
row := s.db.QueryRow(ctx, `
UPDATE node_join_tokens
SET status = 'revoked',
revoked_at = NOW()
WHERE id = $1::uuid
AND cluster_id = $2::uuid
AND status = 'active'
RETURNING id::text, cluster_id::text, scope, expires_at, max_uses, used_count, status,
created_by_user_id::text, created_at, revoked_at, authority_payload, authority_signature
`, input.TokenID, input.ClusterID)
return scanJoinToken(row)
}
func (s *PostgresStore) ExpireJoinTokens(ctx context.Context, clusterID string) error {
_, err := s.db.Exec(ctx, `
UPDATE node_join_tokens
SET status = 'expired'
WHERE cluster_id = $1::uuid
AND status = 'active'
AND expires_at <= NOW()
`, clusterID)
return err
}
func (s *PostgresStore) CreateJoinRequest(ctx context.Context, input CreateJoinRequestInput, joinTokenID string) (NodeJoinRequest, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return NodeJoinRequest{}, err
}
defer tx.Rollback(ctx)
tag, err := tx.Exec(ctx, `
UPDATE node_join_tokens
SET used_count = used_count + 1
WHERE id = $1::uuid
AND cluster_id = $2::uuid
AND status = 'active'
AND expires_at > NOW()
AND used_count < max_uses
`, joinTokenID, input.ClusterID)
if err != nil {
return NodeJoinRequest{}, err
}
if tag.RowsAffected() != 1 {
return NodeJoinRequest{}, pgx.ErrNoRows
}
id := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO node_join_requests (
id, cluster_id, join_token_id, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, created_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7::jsonb, $8::jsonb, $9::jsonb, 'pending', NOW(), NOW())
RETURNING id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
`, id, input.ClusterID, joinTokenID, input.NodeName, input.NodeFingerprint, input.PublicKey, []byte(input.ReportedCapabilities), []byte(input.ReportedFacts), []byte(input.RequestedRoles))
item, err := scanJoinRequest(row)
if err != nil {
return NodeJoinRequest{}, err
}
if err := tx.Commit(ctx); err != nil {
return NodeJoinRequest{}, err
}
return item, nil
}
func (s *PostgresStore) GetJoinRequestForBootstrap(ctx context.Context, input GetJoinRequestBootstrapInput) (NodeJoinRequest, error) {
row := s.db.QueryRow(ctx, `
SELECT id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
FROM node_join_requests
WHERE cluster_id = $1::uuid
AND id = $2::uuid
AND node_fingerprint = $3
AND public_key = $4
`, input.ClusterID, input.JoinRequestID, input.NodeFingerprint, input.PublicKey)
return scanJoinRequest(row)
}
func (s *PostgresStore) ListJoinRequests(ctx context.Context, clusterID string) ([]NodeJoinRequest, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
FROM node_join_requests
WHERE cluster_id = $1::uuid
ORDER BY created_at DESC
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeJoinRequest
for rows.Next() {
item, err := scanJoinRequest(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ApproveJoinRequest(ctx context.Context, input ApproveJoinRequestInput) (ApprovedJoinRequest, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return ApprovedJoinRequest{}, err
}
defer tx.Rollback(ctx)
req, err := getJoinRequestForUpdate(ctx, tx, input.ClusterID, input.JoinRequestID)
if err != nil {
return ApprovedJoinRequest{}, err
}
if req.Status != JoinRequestStatusPending {
return ApprovedJoinRequest{}, errors.New("join request is not pending")
}
now := time.Now().UTC()
nodeID := uuid.NewString()
nodeKey := input.NodeKey
if nodeKey == "" {
nodeKey = req.NodeFingerprint
}
ownershipType := input.OwnershipType
if ownershipType == "" {
ownershipType = "platform_managed"
}
nodeGroupID := strings.TrimSpace(stringPtrValue(input.NodeGroupID))
if nodeGroupID == "" {
nodeGroupID, err = s.joinRequestTokenNodeGroupID(ctx, tx, req)
if err != nil {
return ApprovedJoinRequest{}, err
}
}
if _, err := tx.Exec(ctx, `
INSERT INTO nodes (
id, owner_organization_id, node_key, name, ownership_type, registration_status, health_status,
version_state, partition_state, metadata, created_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3, $4, $5, 'active', 'unknown', 'unknown', 'healthy', $6::jsonb, $7, $7)
`, nodeID, input.OwnerOrganizationID, nodeKey, req.NodeName, ownershipType, []byte(`{"created_from_join_request":true}`), now); err != nil {
return ApprovedJoinRequest{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_memberships (cluster_id, node_id, membership_status, joined_at, metadata)
VALUES ($1::uuid, $2::uuid, 'active', $3, $4::jsonb)
`, input.ClusterID, nodeID, now, []byte(`{"created_from_join_request":true}`)); err != nil {
return ApprovedJoinRequest{}, err
}
if nodeGroupID != "" {
tag, err := tx.Exec(ctx, `
INSERT INTO cluster_node_group_memberships (cluster_id, node_id, group_id, assigned_by_user_id, assigned_at, metadata)
SELECT $1::uuid, $2::uuid, id, $4::uuid, $5, $6::jsonb
FROM cluster_node_groups
WHERE cluster_id = $1::uuid
AND id = $3::uuid
`, input.ClusterID, nodeID, nodeGroupID, input.ActorUserID, now, []byte(`{"source":"join_token_scope"}`))
if err != nil {
return ApprovedJoinRequest{}, err
}
if tag.RowsAffected() != 1 {
return ApprovedJoinRequest{}, ErrInvalidPayload
}
}
roles, err := s.joinRequestTokenRoles(ctx, tx, req)
if err != nil {
return ApprovedJoinRequest{}, err
}
for _, role := range roles {
if _, ok := allowedNodeRoles[role]; !ok {
return ApprovedJoinRequest{}, ErrInvalidPayload
}
if _, err := tx.Exec(ctx, `
INSERT INTO node_role_assignments (id, cluster_id, node_id, role, status, policy, assigned_by_user_id, assigned_at)
VALUES ($1::uuid, $2::uuid, $3::uuid, $4, 'active', $5::jsonb, $6::uuid, $7)
ON CONFLICT DO NOTHING
`, uuid.NewString(), input.ClusterID, nodeID, role, []byte(`{"source":"join_token_scope"}`), input.ActorUserID, now); err != nil {
return ApprovedJoinRequest{}, err
}
}
if _, err := tx.Exec(ctx, `
INSERT INTO node_identities (node_id, public_key, identity_status, metadata, created_at, updated_at)
VALUES ($1::uuid, $2, 'active', $3::jsonb, $4, $4)
`, nodeID, req.PublicKey, []byte(`{"source":"join_request"}`), now); err != nil {
return ApprovedJoinRequest{}, err
}
row := tx.QueryRow(ctx, `
UPDATE node_join_requests
SET status = 'approved',
reviewed_by_user_id = $3::uuid,
reviewed_at = $4,
approved_node_id = $5::uuid,
updated_at = $4
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
`, input.ClusterID, input.JoinRequestID, input.ActorUserID, now, nodeID)
updated, err := scanJoinRequest(row)
if err != nil {
return ApprovedJoinRequest{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_audit_events (cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, 'node_join_request.approved', 'node_join_request', $3, $4::jsonb, $5)
`, input.ClusterID, input.ActorUserID, input.JoinRequestID, []byte(fmt.Sprintf(`{"node_id":%q}`, nodeID)), now); err != nil {
return ApprovedJoinRequest{}, err
}
if err := tx.Commit(ctx); err != nil {
return ApprovedJoinRequest{}, err
}
return ApprovedJoinRequest{
JoinRequest: updated,
Bootstrap: NodeBootstrap{
NodeID: nodeID,
ClusterID: input.ClusterID,
IdentityStatus: "active",
Certificate: map[string]any{
"status": "pending_issuer_integration",
},
HeartbeatEndpoint: fmt.Sprintf("/api/v1/clusters/%s/nodes/%s/heartbeats", input.ClusterID, nodeID),
},
}, nil
}
func (s *PostgresStore) joinRequestTokenNodeGroupID(ctx context.Context, tx pgx.Tx, req NodeJoinRequest) (string, error) {
if req.JoinTokenID == nil || strings.TrimSpace(*req.JoinTokenID) == "" {
return "", nil
}
var scopeBytes []byte
if err := tx.QueryRow(ctx, `
SELECT scope
FROM node_join_tokens
WHERE cluster_id = $1::uuid
AND id = $2::uuid
`, req.ClusterID, *req.JoinTokenID).Scan(&scopeBytes); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return "", nil
}
return "", err
}
var scope struct {
NodeGroupID string `json:"node_group_id"`
}
if len(scopeBytes) == 0 || !json.Valid(scopeBytes) {
return "", nil
}
if err := json.Unmarshal(scopeBytes, &scope); err != nil {
return "", err
}
return strings.TrimSpace(scope.NodeGroupID), nil
}
func (s *PostgresStore) joinRequestTokenRoles(ctx context.Context, tx pgx.Tx, req NodeJoinRequest) ([]string, error) {
if req.JoinTokenID == nil || strings.TrimSpace(*req.JoinTokenID) == "" {
return nil, nil
}
var scopeBytes []byte
if err := tx.QueryRow(ctx, `
SELECT scope
FROM node_join_tokens
WHERE cluster_id = $1::uuid
AND id = $2::uuid
`, req.ClusterID, *req.JoinTokenID).Scan(&scopeBytes); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return nil, nil
}
return nil, err
}
var scope struct {
Roles []string `json:"roles"`
}
if len(scopeBytes) == 0 || !json.Valid(scopeBytes) {
return nil, nil
}
if err := json.Unmarshal(scopeBytes, &scope); err != nil {
return nil, err
}
out := make([]string, 0, len(scope.Roles))
seen := map[string]struct{}{}
for _, role := range scope.Roles {
role = strings.TrimSpace(role)
if role == "" {
continue
}
if _, ok := seen[role]; ok {
continue
}
seen[role] = struct{}{}
out = append(out, role)
}
return out, nil
}
func (s *PostgresStore) SetJoinRequestApprovalAuthority(ctx context.Context, clusterID, joinRequestID string, payload json.RawMessage, signature ClusterSignature) (NodeJoinRequest, error) {
signatureJSON, err := json.Marshal(signature)
if err != nil {
return NodeJoinRequest{}, err
}
row := s.db.QueryRow(ctx, `
UPDATE node_join_requests
SET approval_payload = $3::jsonb,
approval_signature = $4::jsonb,
updated_at = NOW()
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
`, clusterID, joinRequestID, []byte(payload), signatureJSON)
return scanJoinRequest(row)
}
func (s *PostgresStore) RejectJoinRequest(ctx context.Context, input RejectJoinRequestInput) (NodeJoinRequest, error) {
now := time.Now().UTC()
row := s.db.QueryRow(ctx, `
UPDATE node_join_requests
SET status = 'rejected',
reviewed_by_user_id = $3::uuid,
reviewed_at = $4,
rejection_reason = $5,
updated_at = $4
WHERE cluster_id = $1::uuid
AND id = $2::uuid
AND status = 'pending'
RETURNING id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
`, input.ClusterID, input.JoinRequestID, input.ActorUserID, now, input.Reason)
return scanJoinRequest(row)
}
func (s *PostgresStore) AssignNodeRole(ctx context.Context, input AssignNodeRoleInput) (NodeRoleAssignment, error) {
id := uuid.NewString()
status := input.Status
if status == "" {
status = "active"
}
if status != "active" {
row := s.db.QueryRow(ctx, `
UPDATE node_role_assignments
SET status = $6,
revoked_at = CASE WHEN $6 = 'revoked' THEN NOW() ELSE revoked_at END,
policy = $7::jsonb,
assigned_by_user_id = $8::uuid
WHERE cluster_id = $2::uuid
AND node_id = $3::uuid
AND role = $5
AND COALESCE(organization_id, '00000000-0000-0000-0000-000000000000'::uuid) =
COALESCE($4::uuid, '00000000-0000-0000-0000-000000000000'::uuid)
AND status = 'active'
RETURNING id::text, cluster_id::text, node_id::text, organization_id::text, role, status,
policy, assigned_by_user_id::text, assigned_at, revoked_at
`, id, input.ClusterID, input.NodeID, input.OrganizationID, input.Role, status, []byte(input.Policy), input.ActorUserID)
return scanRoleAssignment(row)
}
row := s.db.QueryRow(ctx, `
INSERT INTO node_role_assignments (
id, cluster_id, node_id, organization_id, role, status, policy, assigned_by_user_id, assigned_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, $6, $7::jsonb, $8::uuid, NOW())
ON CONFLICT (cluster_id, node_id, role, COALESCE(organization_id, '00000000-0000-0000-0000-000000000000'::uuid))
WHERE status = 'active'
DO UPDATE SET
policy = EXCLUDED.policy,
assigned_by_user_id = EXCLUDED.assigned_by_user_id
RETURNING id::text, cluster_id::text, node_id::text, organization_id::text, role, status,
policy, assigned_by_user_id::text, assigned_at, revoked_at
`, id, input.ClusterID, input.NodeID, input.OrganizationID, input.Role, status, []byte(input.Policy), input.ActorUserID)
return scanRoleAssignment(row)
}
func (s *PostgresStore) ListNodeRoleAssignments(ctx context.Context, clusterID, nodeID string) ([]NodeRoleAssignment, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, node_id::text, organization_id::text, role, status,
policy, assigned_by_user_id::text, assigned_at, revoked_at
FROM node_role_assignments
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
ORDER BY assigned_at DESC
`, clusterID, nodeID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeRoleAssignment
for rows.Next() {
item, err := scanRoleAssignment(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) AttachExistingNodeToCluster(ctx context.Context, input AttachExistingNodeInput) (ClusterNode, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return ClusterNode{}, err
}
defer tx.Rollback(ctx)
now := time.Now().UTC()
membershipMetadata, err := json.Marshal(map[string]any{
"attached_from_existing_node": true,
"attached_at": now.Format(time.RFC3339Nano),
})
if err != nil {
return ClusterNode{}, err
}
tag, err := tx.Exec(ctx, `
WITH eligible_node AS (
SELECT id
FROM nodes
WHERE id = $2::uuid
AND registration_status = 'active'
)
INSERT INTO cluster_memberships (cluster_id, node_id, membership_status, joined_at, metadata)
SELECT $1::uuid, id, 'active', $3, $4::jsonb
FROM eligible_node
ON CONFLICT (cluster_id, node_id) DO UPDATE SET
membership_status = 'active',
metadata = cluster_memberships.metadata || EXCLUDED.metadata
WHERE cluster_memberships.membership_status <> 'revoked'
`, input.ClusterID, input.NodeID, now, membershipMetadata)
if err != nil {
return ClusterNode{}, err
}
if tag.RowsAffected() != 1 {
return ClusterNode{}, pgx.ErrNoRows
}
for _, role := range input.Roles {
_, err := tx.Exec(ctx, `
INSERT INTO node_role_assignments (
id, cluster_id, node_id, role, status, policy, assigned_by_user_id, assigned_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, 'active', '{}'::jsonb, $5::uuid, $6)
ON CONFLICT DO NOTHING
`, uuid.NewString(), input.ClusterID, input.NodeID, role, input.ActorUserID, now)
if err != nil {
return ClusterNode{}, err
}
}
auditPayload, err := json.Marshal(map[string]any{
"attached_from_existing_node": true,
"roles": input.Roles,
})
if err != nil {
return ClusterNode{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_audit_events (cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, 'cluster_membership.attached_existing_node', 'node', $3, $4::jsonb, $5)
`, input.ClusterID, input.ActorUserID, input.NodeID, auditPayload, now); err != nil {
return ClusterNode{}, err
}
row := tx.QueryRow(ctx, `
SELECT n.id::text, n.owner_organization_id::text, n.node_key, n.name, n.ownership_type,
n.registration_status,
CASE
WHEN n.registration_status = 'active'
AND COALESCE(n.last_seen_at, n.updated_at, n.created_at) < NOW() - $3::interval THEN 'offline'
ELSE n.health_status
END AS health_status,
CASE
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version = update_policy.target_version THEN 'current'
WHEN update_status.status IN ('failed', 'error') THEN 'failed'
WHEN update_status.phase = 'rollback' OR update_status.status = 'rolled_back' THEN 'rollback'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version
AND update_status.target_version = update_policy.target_version
AND update_status.phase IN ('planned', 'download', 'apply', 'health_check')
AND update_status.status IN ('accepted', 'started', 'staged', 'running') THEN 'updating'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version THEN 'outdated'
ELSE n.version_state
END AS version_state,
n.partition_state,
n.reported_version, n.last_seen_at, cm.membership_status, cm.metadata,
ng.id::text, ng.name,
n.created_at, n.updated_at
FROM cluster_memberships cm
JOIN nodes n ON n.id = cm.node_id
LEFT JOIN cluster_node_group_memberships ngm ON ngm.cluster_id = cm.cluster_id AND ngm.node_id = cm.node_id
LEFT JOIN cluster_node_groups ng ON ng.cluster_id = ngm.cluster_id AND ng.id = ngm.group_id
LEFT JOIN LATERAL (
SELECT p.enabled, p.target_version
FROM node_update_desired_policies p
WHERE p.cluster_id = cm.cluster_id
AND p.node_id = cm.node_id
AND p.product = 'rap-node-agent'
AND p.enabled
ORDER BY p.updated_at DESC
LIMIT 1
) update_policy ON true
LEFT JOIN LATERAL (
SELECT s.target_version, s.phase, s.status
FROM node_update_status_reports s
WHERE s.cluster_id = cm.cluster_id
AND s.node_id = cm.node_id
AND s.product = 'rap-node-agent'
ORDER BY s.observed_at DESC
LIMIT 1
) update_status ON true
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
`, input.ClusterID, input.NodeID, nodeHeartbeatStaleIntervalSQL)
item, err := scanClusterNode(row)
if err != nil {
return ClusterNode{}, err
}
if err := tx.Commit(ctx); err != nil {
return ClusterNode{}, err
}
return item, nil
}
func (s *PostgresStore) AssignNodeToGroup(ctx context.Context, input AssignNodeGroupInput) (ClusterNode, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return ClusterNode{}, err
}
defer tx.Rollback(ctx)
now := time.Now().UTC()
if input.GroupID == nil {
tag, err := tx.Exec(ctx, `
DELETE FROM cluster_node_group_memberships
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
AND EXISTS (
SELECT 1
FROM cluster_memberships cm
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
AND cm.membership_status <> 'revoked'
)
`, input.ClusterID, input.NodeID)
if err != nil {
return ClusterNode{}, err
}
if tag.RowsAffected() == 0 {
var exists bool
if err := tx.QueryRow(ctx, `
SELECT EXISTS (
SELECT 1
FROM cluster_memberships
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
AND membership_status <> 'revoked'
)
`, input.ClusterID, input.NodeID).Scan(&exists); err != nil {
return ClusterNode{}, err
}
if !exists {
return ClusterNode{}, pgx.ErrNoRows
}
}
} else {
tag, err := tx.Exec(ctx, `
INSERT INTO cluster_node_group_memberships (
cluster_id, node_id, group_id, assigned_by_user_id, assigned_at, metadata
)
SELECT $1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, '{}'::jsonb
WHERE EXISTS (
SELECT 1
FROM cluster_memberships cm
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
AND cm.membership_status <> 'revoked'
)
AND EXISTS (
SELECT 1
FROM cluster_node_groups ng
WHERE ng.cluster_id = $1::uuid
AND ng.id = $3::uuid
)
ON CONFLICT (cluster_id, node_id) DO UPDATE SET
group_id = EXCLUDED.group_id,
assigned_by_user_id = EXCLUDED.assigned_by_user_id,
assigned_at = EXCLUDED.assigned_at
`, input.ClusterID, input.NodeID, input.GroupID, input.ActorUserID, now)
if err != nil {
return ClusterNode{}, err
}
if tag.RowsAffected() != 1 {
return ClusterNode{}, pgx.ErrNoRows
}
}
auditPayload := json.RawMessage(`{"group_id":null}`)
if input.GroupID != nil {
auditPayload = json.RawMessage(fmt.Sprintf(`{"group_id":%q}`, *input.GroupID))
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_audit_events (cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, 'cluster_node_group.assigned', 'node', $3, $4::jsonb, $5)
`, input.ClusterID, input.ActorUserID, input.NodeID, auditPayload, now); err != nil {
return ClusterNode{}, err
}
row := tx.QueryRow(ctx, `
SELECT n.id::text, n.owner_organization_id::text, n.node_key, n.name, n.ownership_type,
n.registration_status,
CASE
WHEN n.registration_status = 'active'
AND COALESCE(n.last_seen_at, n.updated_at, n.created_at) < NOW() - $3::interval THEN 'offline'
ELSE n.health_status
END AS health_status,
CASE
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version = update_policy.target_version THEN 'current'
WHEN update_status.status IN ('failed', 'error') THEN 'failed'
WHEN update_status.phase = 'rollback' OR update_status.status = 'rolled_back' THEN 'rollback'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version
AND update_status.target_version = update_policy.target_version
AND update_status.phase IN ('planned', 'download', 'apply', 'health_check')
AND update_status.status IN ('accepted', 'started', 'staged', 'running') THEN 'updating'
WHEN update_policy.enabled
AND update_policy.target_version IS NOT NULL
AND n.reported_version IS DISTINCT FROM update_policy.target_version THEN 'outdated'
ELSE n.version_state
END AS version_state,
n.partition_state,
n.reported_version, n.last_seen_at, cm.membership_status, cm.metadata,
ng.id::text, ng.name,
n.created_at, n.updated_at
FROM cluster_memberships cm
JOIN nodes n ON n.id = cm.node_id
LEFT JOIN cluster_node_group_memberships ngm ON ngm.cluster_id = cm.cluster_id AND ngm.node_id = cm.node_id
LEFT JOIN cluster_node_groups ng ON ng.cluster_id = ngm.cluster_id AND ng.id = ngm.group_id
LEFT JOIN LATERAL (
SELECT p.enabled, p.target_version
FROM node_update_desired_policies p
WHERE p.cluster_id = cm.cluster_id
AND p.node_id = cm.node_id
AND p.product = 'rap-node-agent'
AND p.enabled
ORDER BY p.updated_at DESC
LIMIT 1
) update_policy ON true
LEFT JOIN LATERAL (
SELECT s.target_version, s.phase, s.status
FROM node_update_status_reports s
WHERE s.cluster_id = cm.cluster_id
AND s.node_id = cm.node_id
AND s.product = 'rap-node-agent'
ORDER BY s.observed_at DESC
LIMIT 1
) update_status ON true
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
`, input.ClusterID, input.NodeID, nodeHeartbeatStaleIntervalSQL)
item, err := scanClusterNode(row)
if err != nil {
return ClusterNode{}, err
}
if err := tx.Commit(ctx); err != nil {
return ClusterNode{}, err
}
return item, nil
}
func (s *PostgresStore) RecordHeartbeat(ctx context.Context, input RecordHeartbeatInput) (NodeHeartbeat, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return NodeHeartbeat{}, err
}
defer tx.Rollback(ctx)
id := uuid.NewString()
now := time.Now().UTC()
row := tx.QueryRow(ctx, `
INSERT INTO node_heartbeats (
id, cluster_id, node_id, health_status, reported_version, capabilities, service_states, metadata, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6::jsonb, $7::jsonb, $8::jsonb, $9)
RETURNING id::text, cluster_id::text, node_id::text, health_status, reported_version,
capabilities, service_states, metadata, observed_at
`, id, input.ClusterID, input.NodeID, input.HealthStatus, input.ReportedVersion, []byte(input.Capabilities), []byte(input.ServiceStates), []byte(input.Metadata), now)
heartbeat, err := scanHeartbeat(row)
if err != nil {
return NodeHeartbeat{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO node_latest_heartbeats (
cluster_id, node_id, heartbeat_id, health_status, reported_version, capabilities, service_states, metadata, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6::jsonb, $7::jsonb, $8::jsonb, $9)
ON CONFLICT (cluster_id, node_id) DO UPDATE SET
heartbeat_id = EXCLUDED.heartbeat_id,
health_status = EXCLUDED.health_status,
reported_version = EXCLUDED.reported_version,
capabilities = EXCLUDED.capabilities,
service_states = EXCLUDED.service_states,
metadata = EXCLUDED.metadata,
observed_at = EXCLUDED.observed_at
`, input.ClusterID, input.NodeID, heartbeat.ID, heartbeat.HealthStatus, heartbeat.ReportedVersion, []byte(heartbeat.Capabilities), []byte(heartbeat.ServiceStates), []byte(heartbeat.Metadata), heartbeat.ObservedAt); err != nil {
return NodeHeartbeat{}, err
}
if _, err := tx.Exec(ctx, `
UPDATE nodes
SET health_status = $2,
reported_version = COALESCE($3, reported_version),
last_seen_at = $4,
updated_at = $4
WHERE id = $1::uuid
`, input.NodeID, input.HealthStatus, input.ReportedVersion, heartbeat.ObservedAt); err != nil {
return NodeHeartbeat{}, err
}
if _, err := tx.Exec(ctx, `
UPDATE cluster_memberships
SET last_seen_at = $3
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
`, input.ClusterID, input.NodeID, heartbeat.ObservedAt); err != nil {
return NodeHeartbeat{}, err
}
if err := tx.Commit(ctx); err != nil {
return NodeHeartbeat{}, err
}
return heartbeat, nil
}
func (s *PostgresStore) ListNodeHeartbeats(ctx context.Context, clusterID, nodeID string, limit int) ([]NodeHeartbeat, error) {
if limit <= 0 || limit > 500 {
limit = 100
}
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, node_id::text, health_status, reported_version,
capabilities, service_states, metadata, observed_at
FROM node_heartbeats
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
ORDER BY observed_at DESC
LIMIT $3
`, clusterID, nodeID, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeHeartbeat
for rows.Next() {
item, err := scanHeartbeat(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) CreateReleaseVersion(ctx context.Context, input CreateReleaseVersionInput) (ReleaseVersion, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return ReleaseVersion{}, err
}
defer tx.Rollback(ctx)
releaseID := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO release_versions (
id, cluster_id, product, version, channel, status, compatibility, changelog, created_by_user_id, created_at
) VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7::jsonb, $8, $9::uuid, NOW())
RETURNING id::text, cluster_id::text, product, version, channel, status, compatibility,
changelog, created_by_user_id::text, created_at, authority_payload, authority_signature
`, releaseID, input.ClusterID, input.Product, input.Version, input.Channel, input.Status, []byte(input.Compatibility), input.Changelog, input.ActorUserID)
item, err := scanReleaseVersion(row)
if err != nil {
return ReleaseVersion{}, err
}
for _, artifact := range input.Artifacts {
artifactID := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO release_artifacts (
id, release_id, cluster_id, product, version, os, arch, install_type, kind,
url, sha256, size_bytes, signature, metadata, created_at
) VALUES (
$1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7, $8, $9,
$10, $11, $12, $13, $14::jsonb, NOW()
)
RETURNING id::text, release_id::text, cluster_id::text, product, version, os, arch,
install_type, kind, url, sha256, size_bytes, signature, metadata, created_at
`, artifactID, releaseID, input.ClusterID, input.Product, input.Version, artifact.OS, artifact.Arch, artifact.InstallType, artifact.Kind, artifact.URL, artifact.SHA256, artifact.SizeBytes, artifact.Signature, []byte(artifact.Metadata))
storedArtifact, err := scanReleaseArtifact(row)
if err != nil {
return ReleaseVersion{}, err
}
item.Artifacts = append(item.Artifacts, storedArtifact)
}
if err := tx.Commit(ctx); err != nil {
return ReleaseVersion{}, err
}
return item, nil
}
func (s *PostgresStore) ListReleaseVersions(ctx context.Context, clusterID, product, channel string) ([]ReleaseVersion, error) {
query := `
SELECT id::text, cluster_id::text, product, version, channel, status, compatibility,
changelog, created_by_user_id::text, created_at, authority_payload, authority_signature
FROM release_versions
WHERE cluster_id = $1::uuid
AND ($2 = '' OR product = $2)
AND ($3 = '' OR channel = $3)
ORDER BY created_at DESC, version DESC
`
rows, err := s.db.Query(ctx, query, clusterID, product, channel)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ReleaseVersion
for rows.Next() {
item, err := scanReleaseVersion(rows)
if err != nil {
return nil, err
}
artifacts, err := s.listReleaseArtifacts(ctx, item.ID)
if err != nil {
return nil, err
}
item.Artifacts = artifacts
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) listReleaseArtifacts(ctx context.Context, releaseID string) ([]ReleaseArtifact, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, release_id::text, cluster_id::text, product, version, os, arch,
install_type, kind, url, sha256, size_bytes, signature, metadata, created_at
FROM release_artifacts
WHERE release_id = $1::uuid
ORDER BY os, arch, install_type, kind
`, releaseID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ReleaseArtifact
for rows.Next() {
item, err := scanReleaseArtifact(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ListNodeUpdateServiceCandidates(ctx context.Context, clusterID string) ([]NodeUpdateServiceCandidate, error) {
rows, err := s.db.Query(ctx, `
SELECT n.id::text,
n.name,
COALESCE(lh.metadata #>> '{mesh_endpoint_report,peer_endpoint}', '') AS endpoint,
COALESCE(lh.metadata #>> '{mesh_endpoint_report,region}', '') AS region,
n.last_seen_at
FROM node_role_assignments r
JOIN nodes n ON n.id = r.node_id
JOIN cluster_memberships cm ON cm.cluster_id = r.cluster_id AND cm.node_id = r.node_id
LEFT JOIN node_latest_heartbeats lh ON lh.cluster_id = r.cluster_id AND lh.node_id = r.node_id
WHERE r.cluster_id = $1::uuid
AND r.role = 'update-cache'
AND r.status = 'active'
AND cm.membership_status = 'active'
AND n.registration_status = 'active'
AND n.health_status = 'healthy'
AND COALESCE(n.last_seen_at, n.updated_at, n.created_at) >= NOW() - $2::interval
ORDER BY
CASE WHEN COALESCE(lh.metadata #>> '{mesh_endpoint_report,peer_endpoint}', '') = '' THEN 1 ELSE 0 END,
n.last_seen_at DESC NULLS LAST,
n.name ASC
`, clusterID, nodeHeartbeatStaleIntervalSQL)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeUpdateServiceCandidate
for rows.Next() {
var item NodeUpdateServiceCandidate
if err := rows.Scan(&item.NodeID, &item.NodeName, &item.Endpoint, &item.Region, &item.LastSeenAt); err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) UpsertNodeUpdatePolicy(ctx context.Context, input UpsertNodeUpdatePolicyInput) (NodeUpdatePolicy, error) {
row := s.db.QueryRow(ctx, `
INSERT INTO node_update_desired_policies (
cluster_id, node_id, product, channel, target_version, strategy, enabled,
rollback_allowed, health_window_seconds, updated_by_user_id, updated_at
) VALUES (
$1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10::uuid, NOW()
)
ON CONFLICT (cluster_id, node_id, product) DO UPDATE SET
channel = EXCLUDED.channel,
target_version = EXCLUDED.target_version,
strategy = EXCLUDED.strategy,
enabled = EXCLUDED.enabled,
rollback_allowed = EXCLUDED.rollback_allowed,
health_window_seconds = EXCLUDED.health_window_seconds,
updated_by_user_id = EXCLUDED.updated_by_user_id,
updated_at = NOW()
RETURNING cluster_id::text, node_id::text, product, channel, target_version,
strategy, enabled, rollback_allowed, health_window_seconds,
updated_by_user_id::text, updated_at
`, input.ClusterID, input.NodeID, input.Product, input.Channel, input.TargetVersion, input.Strategy, input.Enabled, input.RollbackAllowed, input.HealthWindowSec, input.ActorUserID)
return scanNodeUpdatePolicy(row)
}
func (s *PostgresStore) GetNodeUpdatePolicy(ctx context.Context, clusterID, nodeID, product string) (NodeUpdatePolicy, error) {
row := s.db.QueryRow(ctx, `
SELECT cluster_id::text, node_id::text, product, channel, target_version,
strategy, enabled, rollback_allowed, health_window_seconds,
updated_by_user_id::text, updated_at
FROM node_update_desired_policies
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
AND product = $3
`, clusterID, nodeID, product)
return scanNodeUpdatePolicy(row)
}
func (s *PostgresStore) ReportNodeUpdateStatus(ctx context.Context, input ReportNodeUpdateStatusInput) (NodeUpdateStatus, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO node_update_status_reports (
id, cluster_id, node_id, product, current_version, target_version, phase, status,
attempt_id, error_message, rollback_version, payload, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13)
RETURNING id::text, cluster_id::text, node_id::text, product, current_version,
target_version, phase, status, attempt_id, error_message, rollback_version,
payload, observed_at
`, id, input.ClusterID, input.NodeID, input.Product, input.CurrentVersion, input.TargetVersion, input.Phase, input.Status, input.AttemptID, input.ErrorMessage, input.RollbackVersion, []byte(input.Payload), input.ObservedAt)
return scanNodeUpdateStatus(row)
}
func (s *PostgresStore) ListNodeUpdateStatuses(ctx context.Context, clusterID, nodeID string, limit int) ([]NodeUpdateStatus, error) {
if limit <= 0 || limit > 200 {
limit = 50
}
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, node_id::text, product, current_version, target_version,
phase, status, attempt_id, error_message, rollback_version, payload, observed_at
FROM node_update_status_reports
WHERE cluster_id = $1::uuid AND node_id = $2::uuid
ORDER BY observed_at DESC
LIMIT $3
`, clusterID, nodeID, limit)
if err != nil {
return nil, err
}
defer rows.Close()
out := []NodeUpdateStatus{}
for rows.Next() {
item, err := scanNodeUpdateStatus(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) RevokeNodeIdentity(ctx context.Context, input RevokeNodeIdentityInput) error {
tx, err := s.db.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
now := time.Now().UTC()
tag, err := tx.Exec(ctx, `
UPDATE node_identities
SET identity_status = 'revoked',
revoked_at = $3,
updated_at = $3,
metadata = metadata || $4::jsonb
WHERE node_id = $1::uuid
AND EXISTS (
SELECT 1 FROM cluster_memberships cm
WHERE cm.cluster_id = $2::uuid
AND cm.node_id = node_identities.node_id
)
`, input.NodeID, input.ClusterID, now, []byte(fmt.Sprintf(`{"revocation_reason":%q}`, input.Reason)))
if err != nil {
return err
}
if tag.RowsAffected() != 1 {
return pgx.ErrNoRows
}
if _, err := tx.Exec(ctx, `
UPDATE nodes
SET registration_status = 'revoked',
updated_at = $2
WHERE id = $1::uuid
`, input.NodeID, now); err != nil {
return err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_audit_events (cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, 'node_identity.revoked', 'node', $3, $4::jsonb, $5)
`, input.ClusterID, input.ActorUserID, input.NodeID, []byte(fmt.Sprintf(`{"reason":%q}`, input.Reason)), now); err != nil {
return err
}
return tx.Commit(ctx)
}
func (s *PostgresStore) DisableClusterMembership(ctx context.Context, input DisableMembershipInput) error {
now := time.Now().UTC()
tag, err := s.db.Exec(ctx, `
UPDATE cluster_memberships
SET membership_status = 'disabled',
metadata = metadata || $4::jsonb
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
AND membership_status <> 'revoked'
`, input.ClusterID, input.NodeID, now, []byte(fmt.Sprintf(`{"disabled_reason":%q,"disabled_at":%q}`, input.Reason, now.Format(time.RFC3339Nano))))
if err != nil {
return err
}
if tag.RowsAffected() != 1 {
return pgx.ErrNoRows
}
return s.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
ActorUserID: &input.ActorUserID,
EventType: "cluster_membership.disabled",
TargetType: "node",
TargetID: &input.NodeID,
Payload: json.RawMessage(fmt.Sprintf(`{"reason":%q}`, input.Reason)),
CreatedAt: now,
})
}
func (s *PostgresStore) DeleteClusterNode(ctx context.Context, input DeleteClusterNodeInput) error {
tx, err := s.db.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
now := time.Now().UTC()
var nodeName string
if err := tx.QueryRow(ctx, `
SELECT n.name
FROM cluster_memberships cm
JOIN nodes n ON n.id = cm.node_id
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
`, input.ClusterID, input.NodeID).Scan(&nodeName); err != nil {
return err
}
auditPayload, err := json.Marshal(map[string]any{
"reason": input.Reason,
"node_name": nodeName,
"deleted_at": now.Format(time.RFC3339Nano),
})
if err != nil {
return err
}
if _, err := tx.Exec(ctx, `
INSERT INTO cluster_audit_events (cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, 'cluster_node.deleted', 'node', $3, $4::jsonb, $5)
`, input.ClusterID, input.ActorUserID, input.NodeID, auditPayload, now); err != nil {
return err
}
if _, err := tx.Exec(ctx, `
UPDATE node_identities
SET identity_status = 'revoked',
revoked_at = COALESCE(revoked_at, $2),
updated_at = $2,
metadata = metadata || $3::jsonb
WHERE node_id = $1::uuid
`, input.NodeID, now, []byte(fmt.Sprintf(`{"revocation_reason":%q,"revoked_by_delete":true}`, input.Reason))); err != nil {
return err
}
if _, err := tx.Exec(ctx, `
DELETE FROM cluster_node_group_memberships
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
`, input.ClusterID, input.NodeID); err != nil {
return err
}
tag, err := tx.Exec(ctx, `
DELETE FROM cluster_memberships
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
`, input.ClusterID, input.NodeID)
if err != nil {
return err
}
if tag.RowsAffected() != 1 {
return pgx.ErrNoRows
}
if _, err := tx.Exec(ctx, `
DELETE FROM nodes n
WHERE n.id = $1::uuid
AND NOT EXISTS (
SELECT 1
FROM cluster_memberships cm
WHERE cm.node_id = n.id
)
`, input.NodeID); err != nil {
return err
}
return tx.Commit(ctx)
}
func (s *PostgresStore) UpsertFabricTestingFlag(ctx context.Context, input UpsertFabricTestingFlagInput) (FabricTestingFlag, error) {
if input.HistoryRetentionHours <= 0 {
input.HistoryRetentionHours = 24
}
if len(input.Metadata) == 0 {
input.Metadata = json.RawMessage(`{}`)
}
row := s.db.QueryRow(ctx, `
UPDATE fabric_testing_flags
SET enabled = $4,
telemetry_enabled = $5,
synthetic_links_enabled = $6,
history_retention_hours = $7,
metadata = $8::jsonb,
updated_by_user_id = $9::uuid,
updated_at = NOW()
WHERE scope_type = $1
AND COALESCE(scope_id, '00000000-0000-0000-0000-000000000000'::uuid) = COALESCE($2::uuid, '00000000-0000-0000-0000-000000000000'::uuid)
AND COALESCE(cluster_id, '00000000-0000-0000-0000-000000000000'::uuid) = COALESCE($3::uuid, '00000000-0000-0000-0000-000000000000'::uuid)
RETURNING id::text, scope_type, scope_id::text, cluster_id::text, enabled, telemetry_enabled,
synthetic_links_enabled, history_retention_hours, metadata, updated_by_user_id::text, updated_at
`, input.ScopeType, input.ScopeID, input.ClusterID, input.Enabled, input.TelemetryEnabled, input.SyntheticLinksEnabled, input.HistoryRetentionHours, []byte(input.Metadata), input.ActorUserID)
item, err := scanFabricTestingFlag(row)
if err == nil {
return item, nil
}
if !errors.Is(err, pgx.ErrNoRows) {
return FabricTestingFlag{}, err
}
row = s.db.QueryRow(ctx, `
INSERT INTO fabric_testing_flags (
id, scope_type, scope_id, cluster_id, enabled, telemetry_enabled, synthetic_links_enabled,
history_retention_hours, metadata, updated_by_user_id, updated_at
) VALUES ($1::uuid, $2, $3::uuid, $4::uuid, $5, $6, $7, $8, $9::jsonb, $10::uuid, NOW())
RETURNING id::text, scope_type, scope_id::text, cluster_id::text, enabled, telemetry_enabled,
synthetic_links_enabled, history_retention_hours, metadata, updated_by_user_id::text, updated_at
`, uuid.NewString(), input.ScopeType, input.ScopeID, input.ClusterID, input.Enabled, input.TelemetryEnabled, input.SyntheticLinksEnabled, input.HistoryRetentionHours, []byte(input.Metadata), input.ActorUserID)
return scanFabricTestingFlag(row)
}
func (s *PostgresStore) ListFabricTestingFlags(ctx context.Context) ([]FabricTestingFlag, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, scope_type, scope_id::text, cluster_id::text, enabled, telemetry_enabled,
synthetic_links_enabled, history_retention_hours, metadata, updated_by_user_id::text, updated_at
FROM fabric_testing_flags
ORDER BY scope_type, updated_at DESC
`)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FabricTestingFlag
for rows.Next() {
item, err := scanFabricTestingFlag(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) GetEffectiveNodeTestingFlags(ctx context.Context, clusterID, nodeID string) (EffectiveNodeTestingFlags, error) {
rows, err := s.db.Query(ctx, `
WITH node_scope AS (
SELECT n.owner_organization_id
FROM nodes n
JOIN cluster_memberships cm ON cm.node_id = n.id AND cm.cluster_id = $1::uuid
WHERE n.id = $2::uuid
)
SELECT f.scope_type, f.enabled, f.telemetry_enabled, f.synthetic_links_enabled,
f.history_retention_hours, f.metadata
FROM fabric_testing_flags f
LEFT JOIN node_scope ns ON TRUE
WHERE (
f.scope_type = 'platform'
OR (f.scope_type = 'organization' AND f.scope_id = ns.owner_organization_id)
OR (f.scope_type = 'node' AND f.scope_id = $2::uuid)
)
AND (f.cluster_id IS NULL OR f.cluster_id = $1::uuid)
ORDER BY CASE f.scope_type
WHEN 'platform' THEN 1
WHEN 'organization' THEN 2
WHEN 'node' THEN 3
ELSE 4
END
`, clusterID, nodeID)
if err != nil {
return EffectiveNodeTestingFlags{}, err
}
defer rows.Close()
out := EffectiveNodeTestingFlags{HistoryRetentionHours: 24, Metadata: json.RawMessage(`{}`)}
for rows.Next() {
var scope string
var metadata json.RawMessage
var retention int
var enabled, telemetry, links bool
if err := rows.Scan(&scope, &enabled, &telemetry, &links, &retention, &metadata); err != nil {
return EffectiveNodeTestingFlags{}, err
}
if enabled {
out.Enabled = true
}
if telemetry {
out.TelemetryEnabled = true
}
if links {
out.SyntheticLinksEnabled = true
}
if retention > 0 {
out.HistoryRetentionHours = retention
}
out.AppliedScopes = append(out.AppliedScopes, scope)
if len(metadata) > 0 && string(metadata) != "{}" {
out.Metadata = metadata
}
}
return out, rows.Err()
}
func (s *PostgresStore) RecordNodeTelemetry(ctx context.Context, input RecordNodeTelemetryInput) (NodeTelemetryObservation, error) {
if input.ObservedAt.IsZero() {
input.ObservedAt = time.Now().UTC()
}
if len(input.Payload) == 0 {
input.Payload = json.RawMessage(`{}`)
}
row := s.db.QueryRow(ctx, `
INSERT INTO node_telemetry_observations (
id, cluster_id, node_id, cpu_percent, memory_used_bytes, memory_total_bytes,
disk_used_bytes, disk_total_bytes, network_rx_bytes, network_tx_bytes,
process_count, payload, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13)
RETURNING id::text, cluster_id::text, node_id::text, cpu_percent, memory_used_bytes,
memory_total_bytes, disk_used_bytes, disk_total_bytes, network_rx_bytes,
network_tx_bytes, process_count, payload, observed_at
`, uuid.NewString(), input.ClusterID, input.NodeID, input.CPUPercent, input.MemoryUsedBytes, input.MemoryTotalBytes, input.DiskUsedBytes, input.DiskTotalBytes, input.NetworkRxBytes, input.NetworkTxBytes, input.ProcessCount, []byte(input.Payload), input.ObservedAt)
return scanNodeTelemetry(row)
}
func (s *PostgresStore) ListNodeTelemetry(ctx context.Context, clusterID, nodeID string, limit int) ([]NodeTelemetryObservation, error) {
if limit <= 0 || limit > 1000 {
limit = 240
}
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, node_id::text, cpu_percent, memory_used_bytes,
memory_total_bytes, disk_used_bytes, disk_total_bytes, network_rx_bytes,
network_tx_bytes, process_count, payload, observed_at
FROM node_telemetry_observations
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
ORDER BY observed_at DESC
LIMIT $3
`, clusterID, nodeID, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeTelemetryObservation
for rows.Next() {
item, err := scanNodeTelemetry(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) SetDesiredWorkload(ctx context.Context, input SetDesiredWorkloadInput) (NodeWorkloadDesiredState, error) {
row := s.db.QueryRow(ctx, `
INSERT INTO node_workload_desired_states (
cluster_id, node_id, service_type, desired_state, version, runtime_mode,
artifact_ref, config, environment, updated_by_user_id, updated_at
) VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8::jsonb, $9::jsonb, $10::uuid, NOW())
ON CONFLICT (cluster_id, node_id, service_type) DO UPDATE SET
desired_state = EXCLUDED.desired_state,
version = EXCLUDED.version,
runtime_mode = EXCLUDED.runtime_mode,
artifact_ref = EXCLUDED.artifact_ref,
config = EXCLUDED.config,
environment = EXCLUDED.environment,
updated_by_user_id = EXCLUDED.updated_by_user_id,
updated_at = EXCLUDED.updated_at
RETURNING cluster_id::text, node_id::text, service_type, desired_state, version, runtime_mode,
artifact_ref, config, environment, updated_by_user_id::text, updated_at
`, input.ClusterID, input.NodeID, input.ServiceType, input.DesiredState, input.Version, input.RuntimeMode, input.ArtifactRef, []byte(input.Config), []byte(input.Environment), input.ActorUserID)
return scanDesiredWorkload(row)
}
func (s *PostgresStore) ListDesiredWorkloads(ctx context.Context, clusterID, nodeID string) ([]NodeWorkloadDesiredState, error) {
rows, err := s.db.Query(ctx, `
SELECT cluster_id::text, node_id::text, service_type, desired_state, version, runtime_mode,
artifact_ref, config, environment, updated_by_user_id::text, updated_at
FROM node_workload_desired_states
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
ORDER BY service_type
`, clusterID, nodeID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeWorkloadDesiredState
for rows.Next() {
item, err := scanDesiredWorkload(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ReportWorkloadStatus(ctx context.Context, input ReportWorkloadStatusInput) (NodeWorkloadStatus, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return NodeWorkloadStatus{}, err
}
defer tx.Rollback(ctx)
id := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO node_workload_status_reports (
id, cluster_id, node_id, service_type, reported_state, runtime_mode, version, status_payload, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7, $8::jsonb, NOW())
RETURNING id::text, cluster_id::text, node_id::text, service_type, reported_state,
runtime_mode, version, status_payload, observed_at
`, id, input.ClusterID, input.NodeID, input.ServiceType, input.ReportedState, input.RuntimeMode, input.Version, []byte(input.StatusPayload))
status, err := scanWorkloadStatus(row)
if err != nil {
return NodeWorkloadStatus{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO node_workload_latest_statuses (
cluster_id, node_id, service_type, status_report_id, reported_state, runtime_mode, version, status_payload, observed_at
) VALUES ($1::uuid, $2::uuid, $3, $4::uuid, $5, $6, $7, $8::jsonb, $9)
ON CONFLICT (cluster_id, node_id, service_type) DO UPDATE SET
status_report_id = EXCLUDED.status_report_id,
reported_state = EXCLUDED.reported_state,
runtime_mode = EXCLUDED.runtime_mode,
version = EXCLUDED.version,
status_payload = EXCLUDED.status_payload,
observed_at = EXCLUDED.observed_at
`, status.ClusterID, status.NodeID, status.ServiceType, status.ID, status.ReportedState, status.RuntimeMode, status.Version, []byte(status.StatusPayload), status.ObservedAt); err != nil {
return NodeWorkloadStatus{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO node_services (node_id, service_type, enabled, desired_state, reported_state, last_reported_at, metadata, updated_at)
VALUES ($1::uuid, $2, FALSE, 'disabled', $3, $4, $5::jsonb, $4)
ON CONFLICT (node_id, service_type) DO UPDATE SET
reported_state = EXCLUDED.reported_state,
last_reported_at = EXCLUDED.last_reported_at,
metadata = EXCLUDED.metadata,
updated_at = EXCLUDED.updated_at
`, status.NodeID, status.ServiceType, status.ReportedState, status.ObservedAt, []byte(status.StatusPayload)); err != nil {
return NodeWorkloadStatus{}, err
}
if err := tx.Commit(ctx); err != nil {
return NodeWorkloadStatus{}, err
}
return status, nil
}
func (s *PostgresStore) ListLatestWorkloadStatuses(ctx context.Context, clusterID, nodeID string) ([]NodeWorkloadStatus, error) {
rows, err := s.db.Query(ctx, `
SELECT COALESCE(status_report_id::text, '00000000-0000-0000-0000-000000000000'), cluster_id::text, node_id::text,
service_type, reported_state, runtime_mode, version, status_payload, observed_at
FROM node_workload_latest_statuses
WHERE cluster_id = $1::uuid
AND node_id = $2::uuid
ORDER BY service_type
`, clusterID, nodeID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeWorkloadStatus
for rows.Next() {
item, err := scanWorkloadStatus(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ReportMeshLink(ctx context.Context, input ReportMeshLinkInput) (MeshLinkObservation, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return MeshLinkObservation{}, err
}
defer tx.Rollback(ctx)
id := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO mesh_link_observations (
id, cluster_id, source_node_id, target_node_id, link_status, latency_ms, quality_score, metadata, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, $6, $7, $8::jsonb, NOW())
RETURNING id::text, cluster_id::text, source_node_id::text, target_node_id::text, link_status,
latency_ms, quality_score, metadata, observed_at
`, id, input.ClusterID, input.SourceNodeID, input.TargetNodeID, input.LinkStatus, input.LatencyMs, input.QualityScore, []byte(input.Metadata))
observation, err := scanMeshLink(row)
if err != nil {
return MeshLinkObservation{}, err
}
observationKey := meshLatestObservationKey(observation.Metadata)
if _, err := tx.Exec(ctx, `
INSERT INTO mesh_latest_links (
cluster_id, source_node_id, target_node_id, observation_id, link_status, latency_ms, quality_score, metadata, observed_at, observation_key
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, $6, $7, $8::jsonb, $9, $10)
ON CONFLICT (cluster_id, source_node_id, target_node_id, observation_key) DO UPDATE SET
observation_id = EXCLUDED.observation_id,
link_status = EXCLUDED.link_status,
latency_ms = EXCLUDED.latency_ms,
quality_score = EXCLUDED.quality_score,
metadata = EXCLUDED.metadata,
observed_at = EXCLUDED.observed_at
`, observation.ClusterID, observation.SourceNodeID, observation.TargetNodeID, observation.ID, observation.LinkStatus, observation.LatencyMs, observation.QualityScore, []byte(observation.Metadata), observation.ObservedAt, observationKey); err != nil {
return MeshLinkObservation{}, err
}
if err := tx.Commit(ctx); err != nil {
return MeshLinkObservation{}, err
}
return observation, nil
}
func meshLatestObservationKey(metadata json.RawMessage) string {
var values map[string]any
if err := json.Unmarshal(metadata, &values); err != nil {
return "default"
}
observationType := meshMetadataString(values, "observation_type")
if observationType == "" {
observationType = "default"
}
switch observationType {
case "synthetic_route_health":
if routeID := meshMetadataString(values, "route_id"); routeID != "" {
return observationType + ":" + routeID
}
case "peer_connection_manager":
transportMode := meshMetadataString(values, "transport_mode")
relayNodeID := meshMetadataString(values, "relay_node_id")
if transportMode != "" || relayNodeID != "" {
return observationType + ":" + transportMode + ":" + relayNodeID
}
}
return observationType
}
func meshMetadataString(values map[string]any, key string) string {
value, ok := values[key].(string)
if !ok {
return ""
}
return value
}
func (s *PostgresStore) ListMeshLinks(ctx context.Context, clusterID string) ([]MeshLinkObservation, error) {
rows, err := s.db.Query(ctx, `
SELECT COALESCE(observation_id::text, '00000000-0000-0000-0000-000000000000'),
cluster_id::text, source_node_id::text, target_node_id::text,
CASE WHEN stale THEN 'stale' ELSE link_status END AS link_status,
latency_ms, quality_score,
CASE
WHEN stale THEN metadata || jsonb_build_object(
'derived_link_status', 'stale',
'derived_link_stale', true,
'derived_link_stale_reason',
CASE
WHEN observation_stale THEN 'observation_expired'
WHEN source_stale THEN 'source_node_offline'
WHEN target_stale THEN 'target_node_offline'
ELSE 'endpoint_unavailable'
END
)
ELSE metadata
END AS metadata,
observed_at
FROM (
SELECT ml.*,
ml.observed_at < NOW() - $2::interval AS observation_stale,
sn.registration_status = 'active'
AND COALESCE(sn.last_seen_at, sn.updated_at, sn.created_at) < NOW() - $3::interval AS source_stale,
tn.registration_status = 'active'
AND COALESCE(tn.last_seen_at, tn.updated_at, tn.created_at) < NOW() - $3::interval AS target_stale,
ml.observed_at < NOW() - $2::interval
OR (sn.registration_status = 'active'
AND COALESCE(sn.last_seen_at, sn.updated_at, sn.created_at) < NOW() - $3::interval)
OR (tn.registration_status = 'active'
AND COALESCE(tn.last_seen_at, tn.updated_at, tn.created_at) < NOW() - $3::interval) AS stale
FROM mesh_latest_links ml
JOIN nodes sn ON sn.id = ml.source_node_id
JOIN nodes tn ON tn.id = ml.target_node_id
WHERE ml.cluster_id = $1::uuid
) latest
ORDER BY observed_at DESC
`, clusterID, meshLinkStaleIntervalSQL, nodeHeartbeatStaleIntervalSQL)
if err != nil {
return nil, err
}
defer rows.Close()
var out []MeshLinkObservation
for rows.Next() {
item, err := scanMeshLink(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) CreateRouteIntent(ctx context.Context, input CreateRouteIntentInput) (MeshRouteIntent, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO mesh_route_intents (
id, cluster_id, source_selector, destination_selector, service_class,
priority, status, policy, created_by_user_id, created_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3::jsonb, $4::jsonb, $5, $6, 'active', $7::jsonb, NULLIF($8, '')::uuid, NOW(), NOW())
RETURNING id::text, cluster_id::text, source_selector, destination_selector, service_class,
priority, status, policy, created_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, []byte(input.SourceSelector), []byte(input.DestinationSelector), input.ServiceClass, input.Priority, []byte(input.Policy), input.ActorUserID)
return scanRouteIntent(row)
}
func (s *PostgresStore) ListRouteIntents(ctx context.Context, clusterID string) ([]MeshRouteIntent, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, source_selector, destination_selector, service_class,
priority, status, policy, created_by_user_id::text, created_at, updated_at
FROM mesh_route_intents
WHERE cluster_id = $1::uuid
ORDER BY priority ASC, created_at DESC
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []MeshRouteIntent
for rows.Next() {
item, err := scanRouteIntent(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ExpireRouteIntent(ctx context.Context, input RouteIntentLifecycleInput, expiresAt time.Time) (MeshRouteIntent, error) {
expiresText := expiresAt.UTC().Format(time.RFC3339Nano)
reason := strings.TrimSpace(input.Reason)
row := s.db.QueryRow(ctx, `
UPDATE mesh_route_intents
SET policy = jsonb_set(
jsonb_set(COALESCE(policy, '{}'::jsonb), '{expires_at}', to_jsonb($3::text), true),
'{operator_expire}',
jsonb_build_object('expired_at', $3::text, 'reason', $4::text),
true
),
updated_at = NOW()
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, source_selector, destination_selector, service_class,
priority, status, policy, created_by_user_id::text, created_at, updated_at
`, input.ClusterID, input.RouteIntentID, expiresText, reason)
return scanRouteIntent(row)
}
func (s *PostgresStore) DisableRouteIntent(ctx context.Context, input RouteIntentLifecycleInput) (MeshRouteIntent, error) {
reason := strings.TrimSpace(input.Reason)
disabledAt := time.Now().UTC().Format(time.RFC3339Nano)
row := s.db.QueryRow(ctx, `
UPDATE mesh_route_intents
SET status = 'disabled',
policy = jsonb_set(
COALESCE(policy, '{}'::jsonb),
'{operator_disable}',
jsonb_build_object('disabled_at', $3::text, 'reason', $4::text),
true
),
updated_at = NOW()
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, source_selector, destination_selector, service_class,
priority, status, policy, created_by_user_id::text, created_at, updated_at
`, input.ClusterID, input.RouteIntentID, disabledAt, reason)
return scanRouteIntent(row)
}
func (s *PostgresStore) RecordFabricServiceChannelRouteFeedback(ctx context.Context, input RecordFabricServiceChannelRouteFeedbackInput) (FabricServiceChannelRouteFeedbackObservation, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
defer tx.Rollback(ctx)
id := uuid.NewString()
observedAt := input.ObservedAt.UTC()
if observedAt.IsZero() {
observedAt = time.Now().UTC()
}
if input.FeedbackStatus != "healthy" {
var currentPayload json.RawMessage
err := tx.QueryRow(ctx, `
SELECT payload
FROM fabric_service_channel_route_feedback_latest
WHERE cluster_id = $1::uuid
AND reporter_node_id = $2::uuid
AND route_id = $3
`, input.ClusterID, input.ReporterNodeID, input.RouteID).Scan(&currentPayload)
if err != nil && !errors.Is(err, pgx.ErrNoRows) {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
if cooldownUntil := fabricServiceChannelRetryCooldownUntil(currentPayload); cooldownUntil != nil && cooldownUntil.After(observedAt) {
input = fabricServiceChannelFeedbackSuppressedByOperatorCooldown(input, *cooldownUntil, observedAt)
}
}
expiresAt := input.ExpiresAt.UTC()
if expiresAt.IsZero() {
expiresAt = observedAt.Add(fabricServiceChannelFeedbackMaxAge)
}
row := tx.QueryRow(ctx, `
INSERT INTO fabric_service_channel_route_feedback_observations (
id, cluster_id, reporter_node_id, route_id, service_class, feedback_status,
score_adjustment, reasons, last_error, consecutive_failures, stall_count,
last_send_duration_ms, payload, observed_at, expires_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13::jsonb, $14, $15)
RETURNING id::text, cluster_id::text, reporter_node_id::text, route_id, service_class,
feedback_status, score_adjustment, reasons, last_error, consecutive_failures,
stall_count, last_send_duration_ms, payload, observed_at, expires_at
`, id, input.ClusterID, input.ReporterNodeID, input.RouteID, input.ServiceClass, input.FeedbackStatus, input.ScoreAdjustment,
input.Reasons, input.LastError, input.ConsecutiveFailures, input.StallCount, input.LastSendDurationMs, []byte(input.Payload), observedAt, expiresAt)
item, err := scanFabricServiceChannelRouteFeedback(row)
if err != nil {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO fabric_service_channel_route_feedback_latest (
cluster_id, reporter_node_id, route_id, observation_id, service_class, feedback_status,
score_adjustment, reasons, last_error, consecutive_failures, stall_count,
last_send_duration_ms, payload, observed_at, expires_at
) VALUES ($1::uuid, $2::uuid, $3, $4::uuid, $5, $6, $7, $8, $9, $10, $11, $12, $13::jsonb, $14, $15)
ON CONFLICT (cluster_id, reporter_node_id, route_id) DO UPDATE SET
observation_id = EXCLUDED.observation_id,
service_class = EXCLUDED.service_class,
feedback_status = EXCLUDED.feedback_status,
score_adjustment = EXCLUDED.score_adjustment,
reasons = EXCLUDED.reasons,
last_error = EXCLUDED.last_error,
consecutive_failures = EXCLUDED.consecutive_failures,
stall_count = EXCLUDED.stall_count,
last_send_duration_ms = EXCLUDED.last_send_duration_ms,
payload = EXCLUDED.payload,
observed_at = EXCLUDED.observed_at,
expires_at = EXCLUDED.expires_at
WHERE fabric_service_channel_route_feedback_latest.observed_at <= EXCLUDED.observed_at
AND NOT (
EXCLUDED.feedback_status = 'healthy'
AND fabric_service_channel_route_feedback_latest.feedback_status IN ('degraded', 'fenced')
AND fabric_service_channel_route_feedback_latest.expires_at > EXCLUDED.observed_at
AND NOT (
EXCLUDED.consecutive_failures = 0
AND EXCLUDED.stall_count = 0
AND COALESCE(NULLIF(EXCLUDED.payload->>'quality_window_failure_count', '')::int, 0) = 0
AND COALESCE(NULLIF(EXCLUDED.payload->>'quality_window_drop_count', '')::int, 0) = 0
AND COALESCE(NULLIF(EXCLUDED.payload->>'quality_window_slow_count', '')::int, 0) = 0
)
)
`, item.ClusterID, item.ReporterNodeID, item.RouteID, item.ID, item.ServiceClass, item.FeedbackStatus, item.ScoreAdjustment,
item.Reasons, item.LastError, item.ConsecutiveFailures, item.StallCount, item.LastSendDurationMs, []byte(item.Payload), item.ObservedAt, item.ExpiresAt); err != nil {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
if err := tx.Commit(ctx); err != nil {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
return item, nil
}
func (s *PostgresStore) ListFabricServiceChannelRouteFeedback(ctx context.Context, input ListFabricServiceChannelRouteFeedbackInput) ([]FabricServiceChannelRouteFeedbackObservation, error) {
now := input.Now.UTC()
if now.IsZero() {
now = time.Now().UTC()
}
rows, err := s.db.Query(ctx, `
SELECT observation_id::text, cluster_id::text, reporter_node_id::text, route_id, service_class,
feedback_status, score_adjustment, reasons, last_error, consecutive_failures,
stall_count, last_send_duration_ms, payload, observed_at, expires_at
FROM fabric_service_channel_route_feedback_latest
WHERE cluster_id = $1::uuid
AND (NULLIF($2, '') IS NULL OR reporter_node_id = NULLIF($2, '')::uuid)
AND ($3 = '' OR route_id = $3)
AND ($4 = '' OR service_class = $4)
AND ($5 = '' OR feedback_status = $5)
AND ($6::boolean OR expires_at > $7)
ORDER BY observed_at DESC
`, input.ClusterID, input.ReporterNodeID, input.RouteID, input.ServiceClass, input.FeedbackStatus, input.IncludeExpired, now)
if err != nil {
return nil, err
}
defer rows.Close()
out := []FabricServiceChannelRouteFeedbackObservation{}
for rows.Next() {
item, err := scanFabricServiceChannelRouteFeedback(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) StoreFabricServiceChannelLease(ctx context.Context, input StoreFabricServiceChannelLeaseInput) (FabricServiceChannelLeaseRecord, error) {
lease := input.Lease
if lease.ClusterID == "" || lease.ChannelID == "" || input.TokenHash == "" {
return FabricServiceChannelLeaseRecord{}, ErrInvalidPayload
}
storedLease := lease
storedLease.Token.Token = ""
rawLease, err := json.Marshal(storedLease)
if err != nil {
return FabricServiceChannelLeaseRecord{}, err
}
row := s.db.QueryRow(ctx, `
INSERT INTO fabric_service_channel_leases (
cluster_id, channel_id, token_hash, resource_id, service_class,
selected_entry_node_id, expires_at, lease, created_at, updated_at
) VALUES (
$1::uuid, $2::uuid, $3, $4, $5, $6::uuid, $7, $8::jsonb, NOW(), NOW()
)
ON CONFLICT (cluster_id, channel_id) DO UPDATE SET
token_hash = EXCLUDED.token_hash,
resource_id = EXCLUDED.resource_id,
service_class = EXCLUDED.service_class,
selected_entry_node_id = EXCLUDED.selected_entry_node_id,
expires_at = EXCLUDED.expires_at,
lease = EXCLUDED.lease,
updated_at = NOW()
RETURNING cluster_id::text, channel_id::text, token_hash, resource_id, service_class,
selected_entry_node_id::text, expires_at, lease, created_at, updated_at
`, lease.ClusterID, lease.ChannelID, input.TokenHash, lease.ResourceID, lease.ServiceClass, lease.SelectedEntryNodeID, lease.ExpiresAt, rawLease)
return scanFabricServiceChannelLeaseRecord(row)
}
func (s *PostgresStore) GetFabricServiceChannelLease(ctx context.Context, clusterID, channelID string) (FabricServiceChannelLeaseRecord, error) {
row := s.db.QueryRow(ctx, `
SELECT cluster_id::text, channel_id::text, token_hash, resource_id, service_class,
selected_entry_node_id::text, expires_at, lease, created_at, updated_at
FROM fabric_service_channel_leases
WHERE cluster_id = $1::uuid
AND channel_id = $2::uuid
`, clusterID, channelID)
return scanFabricServiceChannelLeaseRecord(row)
}
func (s *PostgresStore) ListFabricServiceChannelLeases(ctx context.Context, input ListFabricServiceChannelLeasesInput) ([]FabricServiceChannelLeaseRecord, error) {
now := input.Now.UTC()
if now.IsZero() {
now = time.Now().UTC()
}
if input.Limit <= 0 || input.Limit > 500 {
input.Limit = 100
}
rows, err := s.db.Query(ctx, `
SELECT cluster_id::text, channel_id::text, token_hash, resource_id, service_class,
selected_entry_node_id::text, expires_at, lease, created_at, updated_at
FROM fabric_service_channel_leases
WHERE cluster_id = $1::uuid
AND ($2 = '' OR service_class = $2)
AND (NULLIF($3, '') IS NULL OR selected_entry_node_id = NULLIF($3, '')::uuid)
AND ($4 = '' OR resource_id = $4)
AND ($5::boolean OR expires_at > $6)
ORDER BY expires_at DESC
LIMIT $7
`, input.ClusterID, input.ServiceClass, input.EntryNodeID, input.ResourceID, input.IncludeExpired, now, input.Limit)
if err != nil {
return nil, err
}
defer rows.Close()
out := []FabricServiceChannelLeaseRecord{}
for rows.Next() {
item, err := scanFabricServiceChannelLeaseRecord(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) CleanupExpiredFabricServiceChannelLeases(ctx context.Context, clusterID string, now time.Time, limit int) (int, error) {
if now.IsZero() {
now = time.Now().UTC()
}
if limit <= 0 || limit > 1000 {
limit = 100
}
tag, err := s.db.Exec(ctx, `
DELETE FROM fabric_service_channel_leases
WHERE (cluster_id, channel_id) IN (
SELECT cluster_id, channel_id
FROM fabric_service_channel_leases
WHERE cluster_id = $1::uuid
AND expires_at <= $2
ORDER BY expires_at ASC
LIMIT $3
)
`, clusterID, now.UTC(), limit)
if err != nil {
return 0, err
}
return int(tag.RowsAffected()), nil
}
func (s *PostgresStore) RecordFabricServiceChannelRouteRebuildAttempt(ctx context.Context, input RecordFabricServiceChannelRouteRebuildAttemptInput) (FabricServiceChannelRouteRebuildAttempt, error) {
id := uuid.NewString()
payload := defaultJSON(input.Payload, `{}`)
row := s.db.QueryRow(ctx, `
INSERT INTO fabric_service_channel_route_rebuild_attempts (
id, cluster_id, reporter_node_id, service_class, route_id, replacement_route_id,
rebuild_request_id, rebuild_status, rebuild_reason, rebuild_attempt, decision_source,
outcome, generation, policy_fingerprint, observed_policy_fingerprint,
observed_route_generation, effective_route_generation, feedback_status,
feedback_score_adjustment, feedback_effective_score_adjustment, feedback_reasons,
last_error, consecutive_failures, stall_count, last_send_duration_ms,
quality_window_sample_count, quality_window_failure_count, quality_window_drop_count,
quality_window_slow_count, old_hops, replacement_hops, payload
) VALUES (
$1::uuid, $2::uuid, $3::uuid, $4, $5, $6,
$7, $8, $9, $10, $11,
$12, $13, $14, $15,
$16, $17, $18,
$19, $20, $21,
$22, $23, $24, $25,
$26, $27, $28,
$29, $30, $31, $32::jsonb
)
ON CONFLICT (cluster_id, reporter_node_id, service_class, route_id, rebuild_request_id) DO UPDATE SET
replacement_route_id = EXCLUDED.replacement_route_id,
rebuild_status = EXCLUDED.rebuild_status,
rebuild_reason = EXCLUDED.rebuild_reason,
rebuild_attempt = EXCLUDED.rebuild_attempt,
decision_source = EXCLUDED.decision_source,
outcome = EXCLUDED.outcome,
generation = EXCLUDED.generation,
policy_fingerprint = EXCLUDED.policy_fingerprint,
observed_policy_fingerprint = EXCLUDED.observed_policy_fingerprint,
observed_route_generation = EXCLUDED.observed_route_generation,
effective_route_generation = EXCLUDED.effective_route_generation,
feedback_status = EXCLUDED.feedback_status,
feedback_score_adjustment = EXCLUDED.feedback_score_adjustment,
feedback_effective_score_adjustment = EXCLUDED.feedback_effective_score_adjustment,
feedback_reasons = EXCLUDED.feedback_reasons,
last_error = EXCLUDED.last_error,
consecutive_failures = EXCLUDED.consecutive_failures,
stall_count = EXCLUDED.stall_count,
last_send_duration_ms = EXCLUDED.last_send_duration_ms,
quality_window_sample_count = EXCLUDED.quality_window_sample_count,
quality_window_failure_count = EXCLUDED.quality_window_failure_count,
quality_window_drop_count = EXCLUDED.quality_window_drop_count,
quality_window_slow_count = EXCLUDED.quality_window_slow_count,
old_hops = EXCLUDED.old_hops,
replacement_hops = EXCLUDED.replacement_hops,
payload = EXCLUDED.payload,
updated_at = NOW()
RETURNING id::text, cluster_id::text, reporter_node_id::text, service_class, route_id,
replacement_route_id, rebuild_request_id, rebuild_status, rebuild_reason,
rebuild_attempt, decision_source, outcome, generation, policy_fingerprint,
observed_policy_fingerprint, observed_route_generation, effective_route_generation,
feedback_status, feedback_score_adjustment, feedback_effective_score_adjustment,
feedback_reasons, last_error, consecutive_failures, stall_count,
last_send_duration_ms, quality_window_sample_count, quality_window_failure_count,
quality_window_drop_count, quality_window_slow_count, old_hops, replacement_hops,
node_transition_status, node_transition_generation, node_transition_observed_at,
node_transition_matched, node_route_generation_status, node_route_generation_applied_at,
node_route_generation_withdrawn_at, node_route_generation_matched,
post_rebuild_selected_route_id, post_rebuild_send_packets, post_rebuild_send_failures,
post_rebuild_send_flow_packets, post_rebuild_send_flow_dropped,
guard_status, guard_severity, guard_reason, guard_transition_deadline_seconds,
guard_traffic_deadline_seconds, correlation_timeline, correlation_snapshot_at,
payload, created_at, updated_at
`, id, input.ClusterID, input.ReporterNodeID, input.ServiceClass, input.RouteID, input.ReplacementRouteID,
input.RebuildRequestID, input.RebuildStatus, input.RebuildReason, input.RebuildAttempt, input.DecisionSource,
input.Outcome, input.Generation, input.PolicyFingerprint, input.ObservedPolicyFingerprint,
input.ObservedRouteGeneration, input.EffectiveRouteGeneration, input.FeedbackStatus,
input.FeedbackScoreAdjustment, input.FeedbackEffectiveScoreAdjustment, input.FeedbackReasons,
input.LastError, input.ConsecutiveFailures, input.StallCount, input.LastSendDurationMs,
input.QualityWindowSampleCount, input.QualityWindowFailureCount, input.QualityWindowDropCount,
input.QualityWindowSlowCount, input.OldHops, input.ReplacementHops, []byte(payload))
return scanFabricServiceChannelRouteRebuildAttempt(row)
}
func (s *PostgresStore) ListFabricServiceChannelRouteRebuildAttempts(ctx context.Context, input ListFabricServiceChannelRouteRebuildAttemptsInput) ([]FabricServiceChannelRouteRebuildAttempt, error) {
limit := input.Limit
if limit <= 0 || limit > 200 {
limit = 100
}
offset := input.Offset
if offset < 0 {
offset = 0
}
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, reporter_node_id::text, service_class, route_id,
replacement_route_id, rebuild_request_id, rebuild_status, rebuild_reason,
rebuild_attempt, decision_source, outcome, generation, policy_fingerprint,
observed_policy_fingerprint, observed_route_generation, effective_route_generation,
feedback_status, feedback_score_adjustment, feedback_effective_score_adjustment,
feedback_reasons, last_error, consecutive_failures, stall_count,
last_send_duration_ms, quality_window_sample_count, quality_window_failure_count,
quality_window_drop_count, quality_window_slow_count, old_hops, replacement_hops,
node_transition_status, node_transition_generation, node_transition_observed_at,
node_transition_matched, node_route_generation_status, node_route_generation_applied_at,
node_route_generation_withdrawn_at, node_route_generation_matched,
post_rebuild_selected_route_id, post_rebuild_send_packets, post_rebuild_send_failures,
post_rebuild_send_flow_packets, post_rebuild_send_flow_dropped,
guard_status, guard_severity, guard_reason, guard_transition_deadline_seconds,
guard_traffic_deadline_seconds, correlation_timeline, correlation_snapshot_at,
payload, created_at, updated_at
FROM fabric_service_channel_route_rebuild_attempts
WHERE cluster_id = $1::uuid
AND (NULLIF($2, '') IS NULL OR reporter_node_id = NULLIF($2, '')::uuid)
AND ($3 = '' OR route_id = $3)
AND ($4 = '' OR replacement_route_id = $4)
AND ($5 = '' OR service_class = $5)
AND ($6 = '' OR rebuild_status = $6)
AND ($7 = '' OR rebuild_request_id = $7)
AND ($8 = '' OR generation = $8)
AND ($9 = '' OR payload->>'feedback_source' = $9)
AND ($10 = '' OR payload->>'feedback_channel_id' = $10)
AND ($11 = '' OR payload->>'feedback_violation_status' = $11)
ORDER BY updated_at DESC
LIMIT $12 OFFSET $13
`, input.ClusterID, input.ReporterNodeID, input.RouteID, input.ReplacementRouteID, input.ServiceClass, input.RebuildStatus, input.RebuildRequestID, input.Generation, input.FeedbackSource, input.FeedbackChannelID, input.FeedbackViolationStatus, limit, offset)
if err != nil {
return nil, err
}
defer rows.Close()
out := []FabricServiceChannelRouteRebuildAttempt{}
for rows.Next() {
item, err := scanFabricServiceChannelRouteRebuildAttempt(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) UpdateFabricServiceChannelRouteRebuildCorrelationSnapshot(ctx context.Context, input UpdateFabricServiceChannelRouteRebuildCorrelationSnapshotInput) error {
timeline := mustJSONRaw(input.Timeline)
_, err := s.db.Exec(ctx, `
UPDATE fabric_service_channel_route_rebuild_attempts
SET node_transition_status = $2,
node_transition_generation = $3,
node_transition_observed_at = $4,
node_transition_matched = $5,
node_route_generation_status = $6,
node_route_generation_applied_at = $7,
node_route_generation_withdrawn_at = $8,
node_route_generation_matched = $9,
post_rebuild_selected_route_id = $10,
post_rebuild_send_packets = $11,
post_rebuild_send_failures = $12,
post_rebuild_send_flow_packets = $13,
post_rebuild_send_flow_dropped = $14,
guard_status = $15,
guard_severity = $16,
guard_reason = $17,
guard_transition_deadline_seconds = $18,
guard_traffic_deadline_seconds = $19,
correlation_timeline = $20::jsonb,
correlation_snapshot_at = $21
WHERE id = $1::uuid
`, input.ID, input.NodeTransitionStatus, input.NodeTransitionGeneration, input.NodeTransitionObservedAt,
input.NodeTransitionMatched, input.NodeRouteGenerationStatus, input.NodeRouteGenerationAppliedAt,
input.NodeRouteGenerationWithdrawnAt, input.NodeRouteGenerationMatched, input.PostRebuildSelectedRouteID,
int64(input.PostRebuildSendPackets), int64(input.PostRebuildSendFailures), int64(input.PostRebuildSendFlowPackets),
int64(input.PostRebuildSendFlowDropped), input.GuardStatus, input.GuardSeverity, input.GuardReason,
input.GuardTransitionDeadlineSeconds, input.GuardTrafficDeadlineSeconds, []byte(timeline), input.CorrelationSnapshotAt)
return err
}
func (s *PostgresStore) GetFabricServiceChannelSchemaStatus(ctx context.Context, input GetFabricServiceChannelSchemaStatusInput) (FabricServiceChannelSchemaStatus, error) {
checks := fabricServiceChannelRequiredSchemaChecks()
status := FabricServiceChannelSchemaStatus{
ClusterID: input.ClusterID,
ObservedAt: time.Now().UTC(),
Status: "ready",
Reason: "schema_ready",
RequiredMigration: "000028_fabric_service_channel_rebuild_correlation_snapshot",
RequiredChecks: make([]FabricServiceChannelSchemaCheck, 0, len(checks)),
}
for _, check := range checks {
exists, err := s.fabricServiceChannelSchemaCheckExists(ctx, check)
if err != nil {
return FabricServiceChannelSchemaStatus{}, err
}
check.Status = "present"
if !exists {
check.Status = "missing"
status.MissingChecks = append(status.MissingChecks, check)
}
status.RequiredChecks = append(status.RequiredChecks, check)
}
status.RequiredCheckCount = len(status.RequiredChecks)
status.MissingCheckCount = len(status.MissingChecks)
status.PassedCheckCount = status.RequiredCheckCount - status.MissingCheckCount
if status.MissingCheckCount > 0 {
status.Status = "blocked"
status.Reason = "schema_migration_required"
status.RecommendedOperatorAction = "Apply backend migration 000028 before swapping or using this backend build."
}
return status, nil
}
func (s *PostgresStore) fabricServiceChannelSchemaCheckExists(ctx context.Context, check FabricServiceChannelSchemaCheck) (bool, error) {
if check.ColumnName == "" {
var exists bool
err := s.db.QueryRow(ctx, `
SELECT EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = $1
)
`, check.RelationName).Scan(&exists)
return exists, err
}
var exists bool
err := s.db.QueryRow(ctx, `
SELECT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = $1
AND column_name = $2
)
`, check.RelationName, check.ColumnName).Scan(&exists)
return exists, err
}
func fabricServiceChannelRequiredSchemaChecks() []FabricServiceChannelSchemaCheck {
const migration = "000028_fabric_service_channel_rebuild_correlation_snapshot"
const table = "fabric_service_channel_route_rebuild_attempts"
columns := []string{
"node_transition_status",
"node_transition_generation",
"node_transition_observed_at",
"node_transition_matched",
"node_route_generation_status",
"node_route_generation_applied_at",
"node_route_generation_withdrawn_at",
"node_route_generation_matched",
"post_rebuild_selected_route_id",
"post_rebuild_send_packets",
"post_rebuild_send_failures",
"post_rebuild_send_flow_packets",
"post_rebuild_send_flow_dropped",
"guard_status",
"guard_severity",
"guard_reason",
"guard_transition_deadline_seconds",
"guard_traffic_deadline_seconds",
"correlation_timeline",
"correlation_snapshot_at",
}
checks := []FabricServiceChannelSchemaCheck{{
CheckID: table,
RelationName: table,
RequiredBy: migration,
}}
for _, column := range columns {
checks = append(checks, FabricServiceChannelSchemaCheck{
CheckID: table + "." + column,
RelationName: table,
ColumnName: column,
RequiredBy: migration,
})
}
return checks
}
func (s *PostgresStore) UpsertFabricServiceChannelRouteRebuildAlertSilence(ctx context.Context, input SilenceFabricServiceChannelRouteRebuildAlertInput, expiresAt time.Time) (FabricServiceChannelRouteRebuildAlertSilence, error) {
payload := mustJSONRaw(map[string]any{
"schema_version": "rap.fabric_service_channel_rebuild_alert_silence.v1",
"reason": input.Reason,
"incident_source": input.IncidentSource,
"channel_id": input.ChannelID,
})
row := s.db.QueryRow(ctx, `
INSERT INTO fabric_service_channel_rebuild_alert_silences (
cluster_id, reporter_node_id, route_id, guard_status, generation,
reason, created_by_user_id, expires_at, payload
)
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, NULLIF($7, '')::uuid, $8, $9::jsonb)
ON CONFLICT (cluster_id, reporter_node_id, route_id, guard_status, generation)
DO UPDATE SET reason = EXCLUDED.reason,
created_by_user_id = EXCLUDED.created_by_user_id,
created_at = NOW(),
expires_at = EXCLUDED.expires_at,
payload = EXCLUDED.payload
RETURNING id::text, cluster_id::text, reporter_node_id::text, route_id,
guard_status, generation, reason, created_by_user_id::text,
created_at, expires_at, payload
`, input.ClusterID, input.ReporterNodeID, input.RouteID, input.GuardStatus, input.Generation, input.Reason, input.ActorUserID, expiresAt, []byte(payload))
return scanFabricServiceChannelRouteRebuildAlertSilence(row)
}
func (s *PostgresStore) ListFabricServiceChannelRouteRebuildAlertSilences(ctx context.Context, clusterID string, now time.Time) ([]FabricServiceChannelRouteRebuildAlertSilence, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, reporter_node_id::text, route_id,
guard_status, generation, reason, created_by_user_id::text,
created_at, expires_at, payload
FROM fabric_service_channel_rebuild_alert_silences
WHERE cluster_id = $1::uuid
AND expires_at > $2
ORDER BY created_at DESC
`, clusterID, now.UTC())
if err != nil {
return nil, err
}
defer rows.Close()
out := []FabricServiceChannelRouteRebuildAlertSilence{}
for rows.Next() {
item, err := scanFabricServiceChannelRouteRebuildAlertSilence(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) DeleteFabricServiceChannelRouteRebuildAlertSilence(ctx context.Context, input UnsilenceFabricServiceChannelRouteRebuildAlertInput) (FabricServiceChannelRouteRebuildAlertSilence, error) {
row := s.db.QueryRow(ctx, `
DELETE FROM fabric_service_channel_rebuild_alert_silences
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, reporter_node_id::text, route_id,
guard_status, generation, reason, created_by_user_id::text,
created_at, expires_at, payload
`, input.ClusterID, input.SilenceID)
return scanFabricServiceChannelRouteRebuildAlertSilence(row)
}
func (s *PostgresStore) ExpireFabricServiceChannelRouteFeedback(ctx context.Context, input ExpireFabricServiceChannelRouteFeedbackInput) (ExpireFabricServiceChannelRouteFeedbackResult, error) {
now := input.Now.UTC()
if now.IsZero() {
now = time.Now().UTC()
}
cooldownUntil := now.Add(fabricServiceChannelOperatorExpireCooldown)
tx, err := s.db.Begin(ctx)
if err != nil {
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
defer tx.Rollback(ctx)
expiredAtText := now.Format(time.RFC3339Nano)
cooldownUntilText := cooldownUntil.Format(time.RFC3339Nano)
rows, err := tx.Query(ctx, `
UPDATE fabric_service_channel_route_feedback_latest
SET expires_at = $6,
payload = jsonb_set(
jsonb_set(
jsonb_set(
jsonb_set(payload, '{operator_expired}', 'true'::jsonb, true),
'{operator_expire_reason}', to_jsonb($5::text), true
),
'{operator_expired_at}', to_jsonb($8::text), true
),
'{operator_retry_cooldown_until}', to_jsonb($7::text), true
)
WHERE cluster_id = $1::uuid
AND route_id = $2
AND (NULLIF($3, '') IS NULL OR reporter_node_id = NULLIF($3, '')::uuid)
AND ($4 = '' OR service_class = $4)
AND expires_at > $6
RETURNING observation_id::text
`, input.ClusterID, input.RouteID, input.ReporterNodeID, input.ServiceClass, input.Reason, now, cooldownUntilText, expiredAtText)
if err != nil {
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
var observationIDs []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
rows.Close()
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
observationIDs = append(observationIDs, id)
}
if err := rows.Err(); err != nil {
rows.Close()
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
rows.Close()
if len(observationIDs) > 0 {
for _, observationID := range observationIDs {
if _, err := tx.Exec(ctx, `
UPDATE fabric_service_channel_route_feedback_observations
SET expires_at = $2,
payload = jsonb_set(
jsonb_set(
jsonb_set(
jsonb_set(payload, '{operator_expired}', 'true'::jsonb, true),
'{operator_expire_reason}', to_jsonb($3::text), true
),
'{operator_expired_at}', to_jsonb($5::text), true
),
'{operator_retry_cooldown_until}', to_jsonb($4::text), true
)
WHERE id = $1::uuid
`, observationID, now, input.Reason, cooldownUntilText, expiredAtText); err != nil {
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
}
}
if err := tx.Commit(ctx); err != nil {
return ExpireFabricServiceChannelRouteFeedbackResult{}, err
}
return ExpireFabricServiceChannelRouteFeedbackResult{
ClusterID: input.ClusterID,
ReporterNodeID: input.ReporterNodeID,
RouteID: input.RouteID,
ServiceClass: input.ServiceClass,
ExpiredCount: len(observationIDs),
ExpiredAt: now,
CooldownUntil: cooldownUntil,
}, nil
}
func (s *PostgresStore) ListQoSPolicies(ctx context.Context, clusterID string) ([]MeshQoSPolicy, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, service_class, priority, reliability_mode,
drop_policy, bandwidth_policy, metadata, created_at, updated_at
FROM mesh_qos_policies
WHERE cluster_id = $1::uuid
ORDER BY priority ASC
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []MeshQoSPolicy
for rows.Next() {
item, err := scanQoSPolicy(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ListFabricEntryPoints(ctx context.Context, clusterID string) ([]FabricEntryPoint, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, name, status, endpoint_type, public_endpoint,
policy, metadata, created_by_user_id::text, created_at, updated_at
FROM fabric_entry_points
WHERE cluster_id = $1::uuid
ORDER BY name
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FabricEntryPoint
for rows.Next() {
item, err := scanFabricEntryPoint(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []FabricEntryPoint{}
}
return out, rows.Err()
}
func (s *PostgresStore) CreateFabricEntryPoint(ctx context.Context, input CreateFabricEntryPointInput) (FabricEntryPoint, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO fabric_entry_points (
id, cluster_id, name, status, endpoint_type, public_endpoint,
policy, metadata, created_by_user_id, created_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7::jsonb, $8::jsonb, $9::uuid, NOW(), NOW())
RETURNING id::text, cluster_id::text, name, status, endpoint_type, public_endpoint,
policy, metadata, created_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, input.Name, input.Status, input.EndpointType, input.PublicEndpoint, []byte(input.Policy), []byte(input.Metadata), input.ActorUserID)
return scanFabricEntryPoint(row)
}
func (s *PostgresStore) SetFabricEntryPointNode(ctx context.Context, input SetFabricEntryPointNodeInput) (FabricEntryPointNode, error) {
row := s.db.QueryRow(ctx, `
WITH endpoint_ok AS (
SELECT id
FROM fabric_entry_points
WHERE id = $2::uuid
AND cluster_id = $1::uuid
), membership_ok AS (
SELECT node_id
FROM cluster_memberships
WHERE cluster_id = $1::uuid
AND node_id = $3::uuid
AND membership_status = 'active'
)
INSERT INTO fabric_entry_point_nodes (
entry_point_id, cluster_id, node_id, status, priority, metadata, added_by_user_id, added_at
)
SELECT endpoint_ok.id, $1::uuid, membership_ok.node_id, $4, $5, $6::jsonb, $7::uuid, NOW()
FROM endpoint_ok
CROSS JOIN membership_ok
ON CONFLICT (entry_point_id, node_id) DO UPDATE SET
status = EXCLUDED.status,
priority = EXCLUDED.priority,
metadata = EXCLUDED.metadata
RETURNING entry_point_id::text, cluster_id::text, node_id::text, status, priority,
metadata, added_by_user_id::text, added_at
`, input.ClusterID, input.EntryPointID, input.NodeID, input.Status, input.Priority, []byte(input.Metadata), input.ActorUserID)
return scanFabricEntryPointNode(row)
}
func (s *PostgresStore) ListFabricEntryPointNodes(ctx context.Context, clusterID, entryPointID string) ([]FabricEntryPointNode, error) {
rows, err := s.db.Query(ctx, `
SELECT entry_point_id::text, cluster_id::text, node_id::text, status, priority,
metadata, added_by_user_id::text, added_at
FROM fabric_entry_point_nodes
WHERE cluster_id = $1::uuid
AND entry_point_id = $2::uuid
ORDER BY priority, added_at
`, clusterID, entryPointID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FabricEntryPointNode
for rows.Next() {
item, err := scanFabricEntryPointNode(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []FabricEntryPointNode{}
}
return out, rows.Err()
}
func (s *PostgresStore) ListFabricEgressPools(ctx context.Context, clusterID string) ([]FabricEgressPool, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, name, status, description, route_scope,
policy, metadata, created_by_user_id::text, created_at, updated_at
FROM fabric_egress_pools
WHERE cluster_id = $1::uuid
ORDER BY name
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FabricEgressPool
for rows.Next() {
item, err := scanFabricEgressPool(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []FabricEgressPool{}
}
return out, rows.Err()
}
func (s *PostgresStore) CreateFabricEgressPool(ctx context.Context, input CreateFabricEgressPoolInput) (FabricEgressPool, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO fabric_egress_pools (
id, cluster_id, name, status, description, route_scope,
policy, metadata, created_by_user_id, created_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6::jsonb, $7::jsonb, $8::jsonb, $9::uuid, NOW(), NOW())
RETURNING id::text, cluster_id::text, name, status, description, route_scope,
policy, metadata, created_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, input.Name, input.Status, input.Description, []byte(input.RouteScope), []byte(input.Policy), []byte(input.Metadata), input.ActorUserID)
return scanFabricEgressPool(row)
}
func (s *PostgresStore) SetFabricEgressPoolNode(ctx context.Context, input SetFabricEgressPoolNodeInput) (FabricEgressPoolNode, error) {
row := s.db.QueryRow(ctx, `
WITH pool_ok AS (
SELECT id
FROM fabric_egress_pools
WHERE id = $2::uuid
AND cluster_id = $1::uuid
), membership_ok AS (
SELECT node_id
FROM cluster_memberships
WHERE cluster_id = $1::uuid
AND node_id = $3::uuid
AND membership_status = 'active'
)
INSERT INTO fabric_egress_pool_nodes (
egress_pool_id, cluster_id, node_id, status, priority, metadata, added_by_user_id, added_at
)
SELECT pool_ok.id, $1::uuid, membership_ok.node_id, $4, $5, $6::jsonb, $7::uuid, NOW()
FROM pool_ok
CROSS JOIN membership_ok
ON CONFLICT (egress_pool_id, node_id) DO UPDATE SET
status = EXCLUDED.status,
priority = EXCLUDED.priority,
metadata = EXCLUDED.metadata
RETURNING egress_pool_id::text, cluster_id::text, node_id::text, status, priority,
metadata, added_by_user_id::text, added_at
`, input.ClusterID, input.EgressPoolID, input.NodeID, input.Status, input.Priority, []byte(input.Metadata), input.ActorUserID)
return scanFabricEgressPoolNode(row)
}
func (s *PostgresStore) ListFabricEgressPoolNodes(ctx context.Context, clusterID, egressPoolID string) ([]FabricEgressPoolNode, error) {
rows, err := s.db.Query(ctx, `
SELECT egress_pool_id::text, cluster_id::text, node_id::text, status, priority,
metadata, added_by_user_id::text, added_at
FROM fabric_egress_pool_nodes
WHERE cluster_id = $1::uuid
AND egress_pool_id = $2::uuid
ORDER BY priority, added_at
`, clusterID, egressPoolID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FabricEgressPoolNode
for rows.Next() {
item, err := scanFabricEgressPoolNode(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
if out == nil {
out = []FabricEgressPoolNode{}
}
return out, rows.Err()
}
func (s *PostgresStore) GetClusterAuthorityState(ctx context.Context, clusterID string) (ClusterAuthorityState, error) {
row := s.db.QueryRow(ctx, `
SELECT cluster_id::text, authority_state, mutation_mode, term, notes, updated_by_user_id::text, updated_at
FROM cluster_authority_states
WHERE cluster_id = $1::uuid
`, clusterID)
return scanAuthorityState(row)
}
func (s *PostgresStore) UpdateClusterAuthorityState(ctx context.Context, input UpdateClusterAuthorityInput) (ClusterAuthorityState, error) {
row := s.db.QueryRow(ctx, `
INSERT INTO cluster_authority_states (
cluster_id, authority_state, mutation_mode, term, notes, updated_by_user_id, updated_at
) VALUES ($1::uuid, $2, $3, 1, $4, $5::uuid, NOW())
ON CONFLICT (cluster_id) DO UPDATE SET
authority_state = EXCLUDED.authority_state,
mutation_mode = EXCLUDED.mutation_mode,
term = cluster_authority_states.term + 1,
notes = EXCLUDED.notes,
updated_by_user_id = EXCLUDED.updated_by_user_id,
updated_at = EXCLUDED.updated_at
RETURNING cluster_id::text, authority_state, mutation_mode, term, notes, updated_by_user_id::text, updated_at
`, input.ClusterID, input.AuthorityState, input.MutationMode, input.Notes, input.ActorUserID)
return scanAuthorityState(row)
}
func (s *PostgresStore) ListClusterAdminSummaries(ctx context.Context) ([]ClusterAdminSummary, error) {
rows, err := s.db.Query(ctx, `
SELECT cluster_id::text, slug, name, status, region, authority_state, mutation_mode,
cluster_key_algorithm, cluster_key_fingerprint,
node_count, healthy_node_count, pending_join_count, active_role_assignment_count, last_node_seen_at
FROM cluster_admin_summaries
ORDER BY name
`)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ClusterAdminSummary
for rows.Next() {
var item ClusterAdminSummary
if err := rows.Scan(
&item.ClusterID,
&item.Slug,
&item.Name,
&item.Status,
&item.Region,
&item.AuthorityState,
&item.MutationMode,
&item.ClusterKeyAlgorithm,
&item.ClusterKeyFingerprint,
&item.NodeCount,
&item.HealthyNodeCount,
&item.PendingJoinCount,
&item.ActiveRoleAssignmentCount,
&item.LastNodeSeenAt,
); err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) CreateVPNConnection(ctx context.Context, input CreateVPNConnectionInput) (VPNConnection, error) {
id := uuid.NewString()
status := VPNConnectionStatusDisabled
if input.DesiredState == VPNConnectionDesiredEnabled {
status = VPNConnectionStatusEnabled
}
row := s.db.QueryRow(ctx, `
INSERT INTO vpn_connections (
id, cluster_id, organization_id, name, target_endpoint, protocol_family,
credential_ref, mode, desired_state, allowed_node_policy, routing_usage,
route_policy, qos_policy, placement_policy, status, metadata,
created_by_user_id, updated_by_user_id, created_at, updated_at
) VALUES (
$1::uuid, $2::uuid, $3::uuid, $4, $5::jsonb, $6,
$7, $8, $9, $10::jsonb, $11::jsonb,
$12::jsonb, $13::jsonb, $14::jsonb, $15, $16::jsonb,
$17::uuid, $17::uuid, NOW(), NOW()
)
RETURNING id::text, cluster_id::text, organization_id::text, name, target_endpoint,
protocol_family, credential_ref, mode, desired_state, allowed_node_policy,
routing_usage, route_policy, qos_policy, placement_policy, status, metadata,
created_by_user_id::text, updated_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, input.OrganizationID, input.Name, []byte(input.TargetEndpoint), input.ProtocolFamily,
input.CredentialRef, input.Mode, input.DesiredState, []byte(input.AllowedNodePolicy), []byte(input.RoutingUsage),
[]byte(input.RoutePolicy), []byte(input.QoSPolicy), []byte(input.PlacementPolicy), status, []byte(input.Metadata),
input.ActorUserID)
return scanVPNConnection(row)
}
func (s *PostgresStore) ListVPNConnections(ctx context.Context, clusterID string) ([]VPNConnection, error) {
rows, err := s.db.Query(ctx, `
SELECT vpn_connections.id::text, vpn_connections.cluster_id::text, organization_id::text, name, target_endpoint,
protocol_family, credential_ref, mode, desired_state, allowed_node_policy,
routing_usage, route_policy, qos_policy, placement_policy, vpn_connections.status,
vpn_connections.metadata || CASE WHEN l.id IS NULL THEN '{}'::jsonb ELSE jsonb_build_object(
'client_config',
COALESCE(vpn_connections.metadata->'client_config', '{}'::jsonb) ||
jsonb_build_object('vpn_address', '10.77.0.2/24') ||
CASE
WHEN COALESCE((vpn_connections.route_policy->>'full_tunnel')::boolean, false)
THEN jsonb_build_object('routes', jsonb_build_array('0.0.0.0/0'))
ELSE '{}'::jsonb
END ||
CASE
WHEN jsonb_typeof(gateway_status.status_payload->'exit_dns_servers') = 'array'
AND jsonb_array_length(gateway_status.status_payload->'exit_dns_servers') > 0
THEN jsonb_build_object('dns_servers', gateway_status.status_payload->'exit_dns_servers')
WHEN jsonb_typeof(vpn_connections.route_policy->'dns_servers') = 'array'
AND jsonb_array_length(vpn_connections.route_policy->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vpn_connections.route_policy->'dns_servers')
WHEN jsonb_typeof(vpn_connections.target_endpoint->'dns_servers') = 'array'
AND jsonb_array_length(vpn_connections.target_endpoint->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vpn_connections.target_endpoint->'dns_servers')
ELSE '{}'::jsonb
END ||
jsonb_strip_nulls(jsonb_build_object(
'runtime_status', CASE
WHEN COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false) THEN 'packet_forwarding_active'
WHEN COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false) THEN 'runtime_available'
WHEN gateway_status.observed_status IS NOT NULL THEN gateway_status.observed_status
ELSE 'lease_active'
END,
'gateway_node_id', l.owner_node_id::text,
'gateway_assignment_status', gateway_status.observed_status,
'gateway_interface', gateway_status.status_payload->>'gateway_interface',
'gateway_vpn_cidr', gateway_status.status_payload->>'gateway_vpn_cidr',
'relay_transport', gateway_status.status_payload->>'relay_transport',
'packet_forwarding', COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false),
'runtime_available', COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false),
'runtime_observed_at', gateway_status.observed_at
))
) END AS metadata,
created_by_user_id::text, updated_by_user_id::text, vpn_connections.created_at, vpn_connections.updated_at
FROM vpn_connections
LEFT JOIN vpn_connection_leases l
ON l.cluster_id = vpn_connections.cluster_id
AND l.vpn_connection_id = vpn_connections.id
AND l.status = 'active'
AND l.expires_at > NOW()
LEFT JOIN vpn_connection_assignment_latest_statuses gateway_status
ON gateway_status.cluster_id = vpn_connections.cluster_id
AND gateway_status.vpn_connection_id = vpn_connections.id
AND gateway_status.node_id = l.owner_node_id
WHERE vpn_connections.cluster_id = $1::uuid
ORDER BY vpn_connections.created_at DESC
`, clusterID)
if err != nil {
return nil, err
}
defer rows.Close()
return scanVPNConnections(rows)
}
func (s *PostgresStore) GetVPNConnection(ctx context.Context, clusterID, vpnConnectionID string) (VPNConnection, error) {
row := s.db.QueryRow(ctx, `
SELECT vpn_connections.id::text, vpn_connections.cluster_id::text, organization_id::text, name, target_endpoint,
protocol_family, credential_ref, mode, desired_state, allowed_node_policy,
routing_usage, route_policy, qos_policy, placement_policy, vpn_connections.status,
vpn_connections.metadata || CASE WHEN l.id IS NULL THEN '{}'::jsonb ELSE jsonb_build_object(
'client_config',
COALESCE(vpn_connections.metadata->'client_config', '{}'::jsonb) ||
jsonb_build_object('vpn_address', '10.77.0.2/24') ||
CASE
WHEN COALESCE((vpn_connections.route_policy->>'full_tunnel')::boolean, false)
THEN jsonb_build_object('routes', jsonb_build_array('0.0.0.0/0'))
ELSE '{}'::jsonb
END ||
CASE
WHEN jsonb_typeof(gateway_status.status_payload->'exit_dns_servers') = 'array'
AND jsonb_array_length(gateway_status.status_payload->'exit_dns_servers') > 0
THEN jsonb_build_object('dns_servers', gateway_status.status_payload->'exit_dns_servers')
WHEN jsonb_typeof(vpn_connections.route_policy->'dns_servers') = 'array'
AND jsonb_array_length(vpn_connections.route_policy->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vpn_connections.route_policy->'dns_servers')
WHEN jsonb_typeof(vpn_connections.target_endpoint->'dns_servers') = 'array'
AND jsonb_array_length(vpn_connections.target_endpoint->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vpn_connections.target_endpoint->'dns_servers')
ELSE '{}'::jsonb
END ||
jsonb_strip_nulls(jsonb_build_object(
'runtime_status', CASE
WHEN COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false) THEN 'packet_forwarding_active'
WHEN COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false) THEN 'runtime_available'
WHEN gateway_status.observed_status IS NOT NULL THEN gateway_status.observed_status
ELSE 'lease_active'
END,
'gateway_node_id', l.owner_node_id::text,
'gateway_assignment_status', gateway_status.observed_status,
'gateway_interface', gateway_status.status_payload->>'gateway_interface',
'gateway_vpn_cidr', gateway_status.status_payload->>'gateway_vpn_cidr',
'relay_transport', gateway_status.status_payload->>'relay_transport',
'packet_forwarding', COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false),
'runtime_available', COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false),
'runtime_observed_at', gateway_status.observed_at
))
) END AS metadata,
created_by_user_id::text, updated_by_user_id::text, vpn_connections.created_at, vpn_connections.updated_at
FROM vpn_connections
LEFT JOIN vpn_connection_leases l
ON l.cluster_id = vpn_connections.cluster_id
AND l.vpn_connection_id = vpn_connections.id
AND l.status = 'active'
AND l.expires_at > NOW()
LEFT JOIN vpn_connection_assignment_latest_statuses gateway_status
ON gateway_status.cluster_id = vpn_connections.cluster_id
AND gateway_status.vpn_connection_id = vpn_connections.id
AND gateway_status.node_id = l.owner_node_id
WHERE vpn_connections.cluster_id = $1::uuid
AND vpn_connections.id = $2::uuid
`, clusterID, vpnConnectionID)
return scanVPNConnection(row)
}
func (s *PostgresStore) UpdateVPNConnectionDesiredState(ctx context.Context, input UpdateVPNConnectionDesiredStateInput) (VPNConnection, error) {
status := VPNConnectionStatusDisabled
if input.DesiredState == VPNConnectionDesiredEnabled {
status = VPNConnectionStatusEnabled
}
row := s.db.QueryRow(ctx, `
UPDATE vpn_connections
SET desired_state = $3,
status = $4,
updated_by_user_id = $5::uuid,
updated_at = NOW()
WHERE cluster_id = $1::uuid
AND id = $2::uuid
RETURNING id::text, cluster_id::text, organization_id::text, name, target_endpoint,
protocol_family, credential_ref, mode, desired_state, allowed_node_policy,
routing_usage, route_policy, qos_policy, placement_policy, status, metadata,
created_by_user_id::text, updated_by_user_id::text, created_at, updated_at
`, input.ClusterID, input.VPNConnectionID, input.DesiredState, status, input.ActorUserID)
return scanVPNConnection(row)
}
func (s *PostgresStore) UpsertVPNConnectionRoutePolicy(ctx context.Context, input UpsertVPNConnectionRoutePolicyInput) (VPNConnectionRoutePolicy, error) {
id := uuid.NewString()
row := s.db.QueryRow(ctx, `
INSERT INTO vpn_connection_route_policies (
id, vpn_connection_id, cluster_id, organization_id, route_type, destination,
action, service_type, priority, policy, status, created_by_user_id, created_at, updated_at
)
SELECT $1::uuid, vc.id, vc.cluster_id, vc.organization_id, $4, $5,
$6, $7, $8, $9::jsonb, $10, $11::uuid, NOW(), NOW()
FROM vpn_connections vc
WHERE vc.cluster_id = $2::uuid
AND vc.id = $3::uuid
RETURNING id::text, vpn_connection_id::text, cluster_id::text, organization_id::text,
route_type, destination, action, service_type, priority, policy, status,
created_by_user_id::text, created_at, updated_at
`, id, input.ClusterID, input.VPNConnectionID, input.RouteType, input.Destination, input.Action,
input.ServiceType, input.Priority, []byte(input.Policy), input.Status, input.ActorUserID)
return scanVPNRoutePolicy(row)
}
func (s *PostgresStore) ListVPNConnectionRoutePolicies(ctx context.Context, clusterID, vpnConnectionID string) ([]VPNConnectionRoutePolicy, error) {
rows, err := s.db.Query(ctx, `
SELECT id::text, vpn_connection_id::text, cluster_id::text, organization_id::text,
route_type, destination, action, service_type, priority, policy, status,
created_by_user_id::text, created_at, updated_at
FROM vpn_connection_route_policies
WHERE cluster_id = $1::uuid
AND vpn_connection_id = $2::uuid
ORDER BY priority ASC, created_at DESC
`, clusterID, vpnConnectionID)
if err != nil {
return nil, err
}
defer rows.Close()
return scanVPNRoutePolicies(rows)
}
func (s *PostgresStore) SetVPNConnectionAllowedNodes(ctx context.Context, input SetVPNConnectionAllowedNodesInput) ([]VPNConnectionAllowedNode, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return nil, err
}
defer tx.Rollback(ctx)
var exists string
if err := tx.QueryRow(ctx, `
SELECT id::text
FROM vpn_connections
WHERE cluster_id = $1::uuid
AND id = $2::uuid
FOR UPDATE
`, input.ClusterID, input.VPNConnectionID).Scan(&exists); err != nil {
return nil, err
}
if _, err := tx.Exec(ctx, `
DELETE FROM vpn_connection_allowed_nodes
WHERE cluster_id = $1::uuid
AND vpn_connection_id = $2::uuid
`, input.ClusterID, input.VPNConnectionID); err != nil {
return nil, err
}
for _, nodeID := range input.NodeIDs {
if _, err := tx.Exec(ctx, `
INSERT INTO vpn_connection_allowed_nodes (
vpn_connection_id, cluster_id, node_id, role_preference, status,
metadata, created_by_user_id, created_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4, 'active', $5::jsonb, $6::uuid, NOW())
`, input.VPNConnectionID, input.ClusterID, nodeID, input.RolePreference, []byte(input.Metadata), input.ActorUserID); err != nil {
return nil, err
}
}
items, err := listVPNConnectionAllowedNodes(ctx, tx, input.ClusterID, input.VPNConnectionID)
if err != nil {
return nil, err
}
if err := tx.Commit(ctx); err != nil {
return nil, err
}
return items, nil
}
func (s *PostgresStore) ListVPNConnectionAllowedNodes(ctx context.Context, clusterID, vpnConnectionID string) ([]VPNConnectionAllowedNode, error) {
return listVPNConnectionAllowedNodes(ctx, s.db, clusterID, vpnConnectionID)
}
func (s *PostgresStore) AcquireVPNConnectionLease(ctx context.Context, input AcquireVPNConnectionLeaseInput, expiresAt time.Time, fencingToken string) (VPNConnectionLease, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return VPNConnectionLease{}, err
}
defer tx.Rollback(ctx)
if _, err := tx.Exec(ctx, `
UPDATE vpn_connection_leases
SET status = 'expired'
WHERE vpn_connection_id = $1::uuid
AND cluster_id = $2::uuid
AND status = 'active'
AND expires_at <= NOW()
`, input.VPNConnectionID, input.ClusterID); err != nil {
return VPNConnectionLease{}, err
}
existingRow := tx.QueryRow(ctx, `
SELECT id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
FROM vpn_connection_leases
WHERE vpn_connection_id = $1::uuid
AND cluster_id = $2::uuid
AND status = 'active'
AND expires_at > NOW()
FOR UPDATE
`, input.VPNConnectionID, input.ClusterID)
existing, err := scanVPNLease(existingRow)
if err == nil {
if existing.OwnerNodeID == input.OwnerNodeID {
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return existing, nil
}
return VPNConnectionLease{}, ErrVPNLeaseAlreadyActive
}
if !errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, err
}
id := uuid.NewString()
row := tx.QueryRow(ctx, `
WITH next_generation AS (
SELECT COALESCE(MAX(lease_generation), 0) + 1 AS value
FROM vpn_connection_leases
WHERE vpn_connection_id = $2::uuid
)
INSERT INTO vpn_connection_leases (
id, vpn_connection_id, cluster_id, owner_node_id, lease_generation,
fencing_token, status, acquired_at, renewed_at, expires_at, metadata
)
SELECT $1::uuid, vc.id, vc.cluster_id, $4::uuid, next_generation.value,
$5, 'active', NOW(), NOW(), $6, $7::jsonb
FROM vpn_connections vc, next_generation
WHERE vc.cluster_id = $3::uuid
AND vc.id = $2::uuid
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, id, input.VPNConnectionID, input.ClusterID, input.OwnerNodeID, fencingToken, expiresAt, []byte(input.Metadata))
item, err := scanVPNLease(row)
if err != nil {
if isUniqueViolation(err) {
return VPNConnectionLease{}, ErrVPNLeaseAlreadyActive
}
return VPNConnectionLease{}, err
}
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return item, nil
}
func (s *PostgresStore) RenewVPNConnectionLease(ctx context.Context, input RenewVPNConnectionLeaseInput, expiresAt time.Time) (VPNConnectionLease, error) {
row := s.db.QueryRow(ctx, `
UPDATE vpn_connection_leases
SET renewed_at = NOW(),
expires_at = $6
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND owner_node_id = $4::uuid
AND fencing_token = $5
AND status = 'active'
AND expires_at > NOW()
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, input.LeaseID, input.VPNConnectionID, input.ClusterID, input.OwnerNodeID, input.FencingToken, expiresAt)
return scanVPNLease(row)
}
func (s *PostgresStore) RenewNodeVPNAssignmentLease(ctx context.Context, input RenewNodeVPNAssignmentLeaseInput, expiresAt time.Time) (VPNConnectionLease, error) {
row := s.db.QueryRow(ctx, `
UPDATE vpn_connection_leases
SET renewed_at = NOW(),
expires_at = $5
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND owner_node_id = $4::uuid
AND status = 'active'
AND expires_at > NOW()
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, input.LeaseID, input.VPNConnectionID, input.ClusterID, input.OwnerNodeID, expiresAt)
return scanVPNLease(row)
}
func (s *PostgresStore) ReleaseVPNConnectionLease(ctx context.Context, input ReleaseVPNConnectionLeaseInput) (VPNConnectionLease, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return VPNConnectionLease{}, err
}
defer tx.Rollback(ctx)
row := tx.QueryRow(ctx, `
UPDATE vpn_connection_leases
SET status = 'released',
released_at = NOW()
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND owner_node_id = $4::uuid
AND fencing_token = $5
AND status = 'active'
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, input.LeaseID, input.VPNConnectionID, input.ClusterID, input.OwnerNodeID, input.FencingToken)
item, err := scanVPNLease(row)
if err == nil {
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return item, nil
}
if !errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, err
}
row = tx.QueryRow(ctx, `
SELECT id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
FROM vpn_connection_leases
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND owner_node_id = $4::uuid
AND fencing_token = $5
AND status = 'released'
`, input.LeaseID, input.VPNConnectionID, input.ClusterID, input.OwnerNodeID, input.FencingToken)
item, err = scanVPNLease(row)
if err != nil {
return VPNConnectionLease{}, err
}
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return item, nil
}
func (s *PostgresStore) FenceVPNConnectionLease(ctx context.Context, input FenceVPNConnectionLeaseInput) (VPNConnectionLease, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return VPNConnectionLease{}, err
}
defer tx.Rollback(ctx)
row := tx.QueryRow(ctx, `
UPDATE vpn_connection_leases
SET status = 'fenced',
fenced_at = NOW(),
metadata = metadata || $4::jsonb
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND status = 'active'
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, input.LeaseID, input.VPNConnectionID, input.ClusterID, []byte(fmt.Sprintf(`{"fence_reason":%q}`, input.Reason)))
item, err := scanVPNLease(row)
if err == nil {
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return item, nil
}
if !errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, err
}
row = tx.QueryRow(ctx, `
SELECT id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
FROM vpn_connection_leases
WHERE id = $1::uuid
AND vpn_connection_id = $2::uuid
AND cluster_id = $3::uuid
AND status = 'fenced'
`, input.LeaseID, input.VPNConnectionID, input.ClusterID)
item, err = scanVPNLease(row)
if err != nil {
return VPNConnectionLease{}, err
}
if err := tx.Commit(ctx); err != nil {
return VPNConnectionLease{}, err
}
return item, nil
}
func (s *PostgresStore) GetActiveVPNConnectionLease(ctx context.Context, clusterID, vpnConnectionID string) (VPNConnectionLease, error) {
row := s.db.QueryRow(ctx, `
SELECT id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
FROM vpn_connection_leases
WHERE cluster_id = $1::uuid
AND vpn_connection_id = $2::uuid
AND status = 'active'
AND expires_at > NOW()
`, clusterID, vpnConnectionID)
return scanVPNLease(row)
}
func (s *PostgresStore) CheckVPNLeaseOwnerEligibility(ctx context.Context, clusterID, vpnConnectionID, ownerNodeID string) (VPNLeaseOwnerEligibility, error) {
row := s.db.QueryRow(ctx, `
SELECT vc.id::text,
vc.cluster_id::text,
vc.organization_id::text,
$3::text AS owner_node_id,
COALESCE(cm.membership_status, '') AS membership_status,
COALESCE(n.registration_status, '') AS node_registration_status,
(
COALESCE(vc.allowed_node_policy->>'mode', 'explicit') = 'any_capable'
OR EXISTS (
SELECT 1
FROM vpn_connection_allowed_nodes van
WHERE van.vpn_connection_id = vc.id
AND van.cluster_id = vc.cluster_id
AND van.node_id = $3::uuid
AND van.status = 'active'
)
OR (
COALESCE(vc.allowed_node_policy->>'mode', 'explicit') = 'explicit'
AND COALESCE(vc.allowed_node_policy->'node_ids', '[]'::jsonb) ? $3::text
)
) AS allowed_by_policy,
EXISTS (
SELECT 1
FROM node_role_assignments nra
WHERE nra.cluster_id = vc.cluster_id
AND nra.node_id = $3::uuid
AND nra.status = 'active'
AND nra.role IN ('vpn-exit', 'vpn-connector', 'ipv4-egress')
AND (nra.organization_id IS NULL OR nra.organization_id = vc.organization_id)
) AS has_authorized_role
FROM vpn_connections vc
LEFT JOIN cluster_memberships cm ON cm.cluster_id = vc.cluster_id AND cm.node_id = $3::uuid
LEFT JOIN nodes n ON n.id = $3::uuid
WHERE vc.cluster_id = $1::uuid
AND vc.id = $2::uuid
`, clusterID, vpnConnectionID, ownerNodeID)
var item VPNLeaseOwnerEligibility
if err := row.Scan(
&item.VPNConnectionID,
&item.ClusterID,
&item.OrganizationID,
&item.OwnerNodeID,
&item.MembershipStatus,
&item.NodeRegistrationStatus,
&item.AllowedByPolicy,
&item.HasAuthorizedRole,
); err != nil {
return VPNLeaseOwnerEligibility{}, err
}
return item, nil
}
func (s *PostgresStore) ExpireStaleVPNConnectionLeases(ctx context.Context, clusterID string, now time.Time) ([]VPNConnectionLease, error) {
rows, err := s.db.Query(ctx, `
UPDATE vpn_connection_leases
SET status = 'expired'
WHERE cluster_id = $1::uuid
AND status = 'active'
AND expires_at <= $2
RETURNING id::text, vpn_connection_id::text, cluster_id::text, owner_node_id::text,
lease_generation, fencing_token, status, acquired_at, renewed_at, expires_at,
released_at, fenced_at, metadata
`, clusterID, now)
if err != nil {
return nil, err
}
defer rows.Close()
var out []VPNConnectionLease
for rows.Next() {
item, err := scanVPNLease(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ListNodeVPNAssignments(ctx context.Context, clusterID, nodeID string) ([]NodeVPNAssignment, error) {
rows, err := s.db.Query(ctx, `
WITH active_node AS (
SELECT 1
FROM cluster_memberships cm
JOIN nodes n ON n.id = cm.node_id
WHERE cm.cluster_id = $1::uuid
AND cm.node_id = $2::uuid
AND cm.membership_status = 'active'
AND n.registration_status = 'active'
),
visible AS (
SELECT vc.*,
(
COALESCE(vc.allowed_node_policy->>'mode', 'explicit') = 'any_capable'
OR EXISTS (
SELECT 1
FROM vpn_connection_allowed_nodes van
WHERE van.vpn_connection_id = vc.id
AND van.cluster_id = vc.cluster_id
AND van.node_id = $2::uuid
AND van.status = 'active'
)
OR (
COALESCE(vc.allowed_node_policy->>'mode', 'explicit') = 'explicit'
AND COALESCE(vc.allowed_node_policy->'node_ids', '[]'::jsonb) ? $2::text
)
) AS allowed_by_policy,
EXISTS (
SELECT 1
FROM node_role_assignments nra
WHERE nra.cluster_id = vc.cluster_id
AND nra.node_id = $2::uuid
AND nra.status = 'active'
AND nra.role IN ('vpn-exit', 'vpn-connector', 'ipv4-egress')
AND (nra.organization_id IS NULL OR nra.organization_id = vc.organization_id)
) AS has_authorized_role,
EXISTS (
SELECT 1
FROM vpn_connection_leases active_owner
WHERE active_owner.cluster_id = vc.cluster_id
AND active_owner.vpn_connection_id = vc.id
AND active_owner.owner_node_id = $2::uuid
AND active_owner.status = 'active'
AND active_owner.expires_at > NOW()
) AS is_active_owner
FROM vpn_connections vc
WHERE vc.cluster_id = $1::uuid
AND vc.desired_state = 'enabled'
)
SELECT v.id::text,
v.cluster_id::text,
v.organization_id::text,
v.name,
v.target_endpoint,
v.protocol_family,
v.mode,
v.desired_state,
v.routing_usage,
v.route_policy,
v.qos_policy,
v.placement_policy,
v.status,
(v.credential_ref IS NOT NULL) AS has_credential_ref,
CASE WHEN v.is_active_owner THEN 'active_owner' ELSE 'eligible_candidate' END AS assignment_reason,
CASE WHEN l.id IS NULL THEN NULL ELSE jsonb_build_object(
'lease_id', l.id::text,
'owner_node_id', l.owner_node_id::text,
'lease_generation', l.lease_generation,
'status', l.status,
'renewed_at', l.renewed_at,
'expires_at', l.expires_at
) END AS active_lease,
v.updated_at
FROM visible v
JOIN active_node an ON TRUE
LEFT JOIN vpn_connection_leases l
ON l.cluster_id = v.cluster_id
AND l.vpn_connection_id = v.id
AND l.status = 'active'
AND l.expires_at > NOW()
WHERE (v.allowed_by_policy AND v.has_authorized_role)
OR v.is_active_owner
ORDER BY v.name ASC, v.id ASC
`, clusterID, nodeID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []NodeVPNAssignment
for rows.Next() {
item, err := scanNodeVPNAssignment(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func (s *PostgresStore) ReportNodeVPNAssignmentStatus(ctx context.Context, input ReportNodeVPNAssignmentStatusInput) (NodeVPNAssignmentStatus, error) {
tx, err := s.db.Begin(ctx)
if err != nil {
return NodeVPNAssignmentStatus{}, err
}
defer func() { _ = tx.Rollback(ctx) }()
id := uuid.NewString()
row := tx.QueryRow(ctx, `
INSERT INTO vpn_connection_assignment_status_reports (
id, vpn_connection_id, cluster_id, node_id, observed_status, status_payload, observed_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, $6::jsonb, $7)
RETURNING id::text, vpn_connection_id::text, cluster_id::text, node_id::text,
observed_status, status_payload, observed_at
`, id, input.VPNConnectionID, input.ClusterID, input.NodeID, input.ObservedStatus, []byte(input.StatusPayload), input.ObservedAt)
item, err := scanNodeVPNAssignmentStatus(row)
if err != nil {
return NodeVPNAssignmentStatus{}, err
}
if _, err := tx.Exec(ctx, `
INSERT INTO vpn_connection_assignment_latest_statuses (
vpn_connection_id, cluster_id, node_id, report_id, observed_status, status_payload, observed_at, updated_at
) VALUES ($1::uuid, $2::uuid, $3::uuid, $4::uuid, $5, $6::jsonb, $7, NOW())
ON CONFLICT (vpn_connection_id, node_id) DO UPDATE SET
report_id = EXCLUDED.report_id,
observed_status = EXCLUDED.observed_status,
status_payload = EXCLUDED.status_payload,
observed_at = EXCLUDED.observed_at,
updated_at = NOW()
`, input.VPNConnectionID, input.ClusterID, input.NodeID, item.ID, input.ObservedStatus, []byte(input.StatusPayload), input.ObservedAt); err != nil {
return NodeVPNAssignmentStatus{}, err
}
if err := tx.Commit(ctx); err != nil {
return NodeVPNAssignmentStatus{}, err
}
return item, nil
}
func (s *PostgresStore) RecordAudit(ctx context.Context, event ClusterAuditEvent) error {
if len(event.Payload) == 0 {
event.Payload = json.RawMessage(`{}`)
}
_, err := s.db.Exec(ctx, `
INSERT INTO cluster_audit_events (id, cluster_id, actor_user_id, event_type, target_type, target_id, payload, created_at)
VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5, $6, $7::jsonb, COALESCE($8, NOW()))
`, uuid.NewString(), event.ClusterID, event.ActorUserID, event.EventType, event.TargetType, event.TargetID, []byte(event.Payload), event.CreatedAt)
return err
}
func (s *PostgresStore) ListAuditEvents(ctx context.Context, input ListAuditEventsInput) ([]ClusterAuditEvent, error) {
input.ClusterID = strings.TrimSpace(input.ClusterID)
input.EventTypes = compactStringSlice(input.EventTypes)
input.TargetTypes = compactStringSlice(input.TargetTypes)
if input.Limit <= 0 || input.Limit > 200 {
input.Limit = 100
}
rows, err := s.db.Query(ctx, `
SELECT id::text, cluster_id::text, actor_user_id::text, event_type, target_type, target_id, payload, created_at
FROM cluster_audit_events
WHERE cluster_id = $1::uuid
AND (cardinality($2::text[]) = 0 OR event_type = ANY($2::text[]))
AND (cardinality($3::text[]) = 0 OR target_type = ANY($3::text[]))
ORDER BY created_at DESC
LIMIT $4
`, input.ClusterID, input.EventTypes, input.TargetTypes, input.Limit)
if err != nil {
return nil, err
}
defer rows.Close()
var out []ClusterAuditEvent
for rows.Next() {
item, err := scanAuditEvent(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func compactStringSlice(values []string) []string {
out := []string{}
for _, value := range values {
trimmed := strings.TrimSpace(value)
if trimmed == "" || containsString(out, trimmed) {
continue
}
out = append(out, trimmed)
}
return out
}
type scanner interface {
Scan(dest ...any) error
}
type rowQuerier interface {
Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error)
}
func scanCluster(row scanner) (Cluster, error) {
var item Cluster
if err := row.Scan(&item.ID, &item.Slug, &item.Name, &item.Status, &item.Region, &item.Metadata, &item.CreatedAt, &item.UpdatedAt); err != nil {
return Cluster{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanClusterAuthority(row scanner) (ClusterAuthorityKey, error) {
var item ClusterAuthorityKey
if err := row.Scan(
&item.ClusterID,
&item.AuthorityState,
&item.KeyAlgorithm,
&item.PublicKey,
&item.PublicKeyFingerprint,
&item.PrivateKey,
&item.CreatedAt,
&item.UpdatedAt,
&item.Metadata,
); err != nil {
return ClusterAuthorityKey{}, err
}
item.SchemaVersion = clusterauth.AuthoritySchemaVersion
ensureRaw(&item.Metadata, `{}`)
item.QuorumDescriptor = clusterAuthorityQuorumDescriptorFromMetadata(item.Metadata)
return item, nil
}
func clusterAuthorityQuorumDescriptorFromMetadata(metadata json.RawMessage) *QuorumDescriptor {
if len(metadata) == 0 || !json.Valid(metadata) {
return nil
}
var envelope struct {
QuorumDescriptor *QuorumDescriptor `json:"quorum_descriptor"`
Quorum *QuorumDescriptor `json:"quorum"`
}
if err := json.Unmarshal(metadata, &envelope); err != nil {
return nil
}
if envelope.QuorumDescriptor != nil {
return envelope.QuorumDescriptor
}
return envelope.Quorum
}
func scanNodeGroup(row scanner) (ClusterNodeGroup, error) {
var item ClusterNodeGroup
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.ParentGroupID,
&item.Name,
&item.Description,
&item.SortOrder,
&item.Metadata,
&item.CreatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return ClusterNodeGroup{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanClusterNode(row scanner) (ClusterNode, error) {
var item ClusterNode
if err := row.Scan(
&item.ID,
&item.OwnerOrganizationID,
&item.NodeKey,
&item.Name,
&item.OwnershipType,
&item.RegistrationStatus,
&item.HealthStatus,
&item.VersionState,
&item.PartitionState,
&item.ReportedVersion,
&item.LastSeenAt,
&item.MembershipStatus,
&item.MembershipMetadata,
&item.NodeGroupID,
&item.NodeGroupName,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return ClusterNode{}, err
}
ensureRaw(&item.MembershipMetadata, `{}`)
return item, nil
}
func scanJoinToken(row scanner) (NodeJoinToken, error) {
var item NodeJoinToken
var signatureRaw json.RawMessage
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.Scope,
&item.ExpiresAt,
&item.MaxUses,
&item.UsedCount,
&item.Status,
&item.CreatedByUserID,
&item.CreatedAt,
&item.RevokedAt,
&item.AuthorityPayload,
&signatureRaw,
); err != nil {
return NodeJoinToken{}, err
}
ensureRaw(&item.Scope, `{}`)
ensureRaw(&item.AuthorityPayload, `{}`)
if len(signatureRaw) > 0 && string(signatureRaw) != "{}" {
var signature ClusterSignature
if err := json.Unmarshal(signatureRaw, &signature); err != nil {
return NodeJoinToken{}, err
}
item.AuthoritySignature = &signature
}
return item, nil
}
func scanJoinRequest(row scanner) (NodeJoinRequest, error) {
var item NodeJoinRequest
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.JoinTokenID,
&item.NodeName,
&item.NodeFingerprint,
&item.PublicKey,
&item.ReportedCapabilities,
&item.ReportedFacts,
&item.RequestedRoles,
&item.Status,
&item.ReviewedByUserID,
&item.ReviewedAt,
&item.ApprovedNodeID,
&item.RejectionReason,
&item.CreatedAt,
&item.UpdatedAt,
&item.ApprovalPayload,
&item.ApprovalSignature,
); err != nil {
return NodeJoinRequest{}, err
}
ensureRaw(&item.ReportedCapabilities, `{}`)
ensureRaw(&item.ReportedFacts, `{}`)
ensureRaw(&item.RequestedRoles, `[]`)
ensureRaw(&item.ApprovalPayload, `{}`)
ensureRaw(&item.ApprovalSignature, `{}`)
return item, nil
}
func scanRoleAssignment(row scanner) (NodeRoleAssignment, error) {
var item NodeRoleAssignment
if err := row.Scan(&item.ID, &item.ClusterID, &item.NodeID, &item.OrganizationID, &item.Role, &item.Status, &item.Policy, &item.AssignedByUserID, &item.AssignedAt, &item.RevokedAt); err != nil {
return NodeRoleAssignment{}, err
}
ensureRaw(&item.Policy, `{}`)
return item, nil
}
func scanHeartbeat(row scanner) (NodeHeartbeat, error) {
var item NodeHeartbeat
if err := row.Scan(&item.ID, &item.ClusterID, &item.NodeID, &item.HealthStatus, &item.ReportedVersion, &item.Capabilities, &item.ServiceStates, &item.Metadata, &item.ObservedAt); err != nil {
return NodeHeartbeat{}, err
}
ensureRaw(&item.Capabilities, `{}`)
ensureRaw(&item.ServiceStates, `{}`)
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanReleaseVersion(row scanner) (ReleaseVersion, error) {
var item ReleaseVersion
var signatureRaw json.RawMessage
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.Product,
&item.Version,
&item.Channel,
&item.Status,
&item.Compatibility,
&item.Changelog,
&item.CreatedByUserID,
&item.CreatedAt,
&item.AuthorityPayload,
&signatureRaw,
); err != nil {
return ReleaseVersion{}, err
}
ensureRaw(&item.Compatibility, `{}`)
ensureRaw(&item.AuthorityPayload, `{}`)
if len(signatureRaw) > 0 && string(signatureRaw) != "{}" {
var signature ClusterSignature
if err := json.Unmarshal(signatureRaw, &signature); err != nil {
return ReleaseVersion{}, err
}
item.AuthoritySignature = &signature
}
return item, nil
}
func scanReleaseArtifact(row scanner) (ReleaseArtifact, error) {
var item ReleaseArtifact
if err := row.Scan(
&item.ID,
&item.ReleaseID,
&item.ClusterID,
&item.Product,
&item.Version,
&item.OS,
&item.Arch,
&item.InstallType,
&item.Kind,
&item.URL,
&item.SHA256,
&item.SizeBytes,
&item.Signature,
&item.Metadata,
&item.CreatedAt,
); err != nil {
return ReleaseArtifact{}, err
}
ensureRaw(&item.Metadata, `{}`)
item.URLs = releaseArtifactURLsFromMetadata(item.Metadata)
return item, nil
}
func releaseArtifactURLsFromMetadata(metadata json.RawMessage) []string {
var payload map[string]json.RawMessage
if len(metadata) == 0 || json.Unmarshal(metadata, &payload) != nil {
return nil
}
for _, key := range []string{"artifact_urls", "urls"} {
raw, ok := payload[key]
if !ok {
continue
}
var urls []string
if json.Unmarshal(raw, &urls) != nil {
continue
}
out := make([]string, 0, len(urls))
seen := map[string]bool{}
for _, url := range urls {
url = strings.TrimSpace(url)
if url == "" || seen[url] {
continue
}
seen[url] = true
out = append(out, url)
}
return out
}
return nil
}
func scanNodeUpdatePolicy(row scanner) (NodeUpdatePolicy, error) {
var item NodeUpdatePolicy
if err := row.Scan(
&item.ClusterID,
&item.NodeID,
&item.Product,
&item.Channel,
&item.TargetVersion,
&item.Strategy,
&item.Enabled,
&item.RollbackAllowed,
&item.HealthWindowSec,
&item.UpdatedByUserID,
&item.UpdatedAt,
); err != nil {
return NodeUpdatePolicy{}, err
}
return item, nil
}
func scanNodeUpdateStatus(row scanner) (NodeUpdateStatus, error) {
var item NodeUpdateStatus
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.NodeID,
&item.Product,
&item.CurrentVersion,
&item.TargetVersion,
&item.Phase,
&item.Status,
&item.AttemptID,
&item.ErrorMessage,
&item.RollbackVersion,
&item.Payload,
&item.ObservedAt,
); err != nil {
return NodeUpdateStatus{}, err
}
ensureRaw(&item.Payload, `{}`)
return item, nil
}
func scanAuditEvent(row scanner) (ClusterAuditEvent, error) {
var item ClusterAuditEvent
if err := row.Scan(&item.ID, &item.ClusterID, &item.ActorUserID, &item.EventType, &item.TargetType, &item.TargetID, &item.Payload, &item.CreatedAt); err != nil {
return ClusterAuditEvent{}, err
}
ensureRaw(&item.Payload, `{}`)
return item, nil
}
func scanFabricTestingFlag(row scanner) (FabricTestingFlag, error) {
var item FabricTestingFlag
if err := row.Scan(
&item.ID,
&item.ScopeType,
&item.ScopeID,
&item.ClusterID,
&item.Enabled,
&item.TelemetryEnabled,
&item.SyntheticLinksEnabled,
&item.HistoryRetentionHours,
&item.Metadata,
&item.UpdatedByUserID,
&item.UpdatedAt,
); err != nil {
return FabricTestingFlag{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanNodeTelemetry(row scanner) (NodeTelemetryObservation, error) {
var item NodeTelemetryObservation
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.NodeID,
&item.CPUPercent,
&item.MemoryUsedBytes,
&item.MemoryTotalBytes,
&item.DiskUsedBytes,
&item.DiskTotalBytes,
&item.NetworkRxBytes,
&item.NetworkTxBytes,
&item.ProcessCount,
&item.Payload,
&item.ObservedAt,
); err != nil {
return NodeTelemetryObservation{}, err
}
ensureRaw(&item.Payload, `{}`)
return item, nil
}
func scanDesiredWorkload(row scanner) (NodeWorkloadDesiredState, error) {
var item NodeWorkloadDesiredState
if err := row.Scan(
&item.ClusterID,
&item.NodeID,
&item.ServiceType,
&item.DesiredState,
&item.Version,
&item.RuntimeMode,
&item.ArtifactRef,
&item.Config,
&item.Environment,
&item.UpdatedByUserID,
&item.UpdatedAt,
); err != nil {
return NodeWorkloadDesiredState{}, err
}
ensureRaw(&item.Config, `{}`)
ensureRaw(&item.Environment, `{}`)
return item, nil
}
func scanWorkloadStatus(row scanner) (NodeWorkloadStatus, error) {
var item NodeWorkloadStatus
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.NodeID,
&item.ServiceType,
&item.ReportedState,
&item.RuntimeMode,
&item.Version,
&item.StatusPayload,
&item.ObservedAt,
); err != nil {
return NodeWorkloadStatus{}, err
}
ensureRaw(&item.StatusPayload, `{}`)
return item, nil
}
func scanMeshLink(row scanner) (MeshLinkObservation, error) {
var item MeshLinkObservation
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.SourceNodeID,
&item.TargetNodeID,
&item.LinkStatus,
&item.LatencyMs,
&item.QualityScore,
&item.Metadata,
&item.ObservedAt,
); err != nil {
return MeshLinkObservation{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanRouteIntent(row scanner) (MeshRouteIntent, error) {
var item MeshRouteIntent
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.SourceSelector,
&item.DestinationSelector,
&item.ServiceClass,
&item.Priority,
&item.Status,
&item.Policy,
&item.CreatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return MeshRouteIntent{}, err
}
ensureRaw(&item.SourceSelector, `{}`)
ensureRaw(&item.DestinationSelector, `{}`)
ensureRaw(&item.Policy, `{}`)
return item, nil
}
func scanFabricServiceChannelRouteFeedback(row scanner) (FabricServiceChannelRouteFeedbackObservation, error) {
var item FabricServiceChannelRouteFeedbackObservation
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.ReporterNodeID,
&item.RouteID,
&item.ServiceClass,
&item.FeedbackStatus,
&item.ScoreAdjustment,
&item.Reasons,
&item.LastError,
&item.ConsecutiveFailures,
&item.StallCount,
&item.LastSendDurationMs,
&item.Payload,
&item.ObservedAt,
&item.ExpiresAt,
); err != nil {
return FabricServiceChannelRouteFeedbackObservation{}, err
}
ensureRaw(&item.Payload, `{}`)
item.RetryCooldownUntil = fabricServiceChannelRetryCooldownUntil(item.Payload)
return item, nil
}
func scanFabricServiceChannelLeaseRecord(row scanner) (FabricServiceChannelLeaseRecord, error) {
var item FabricServiceChannelLeaseRecord
var rawLease json.RawMessage
if err := row.Scan(
&item.ClusterID,
&item.ChannelID,
&item.TokenHash,
&item.ResourceID,
&item.ServiceClass,
&item.SelectedEntryNodeID,
&item.ExpiresAt,
&rawLease,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return FabricServiceChannelLeaseRecord{}, err
}
if len(rawLease) > 0 {
if err := json.Unmarshal(rawLease, &item.Lease); err != nil {
return FabricServiceChannelLeaseRecord{}, err
}
}
return item, nil
}
func scanFabricServiceChannelRouteRebuildAttempt(row scanner) (FabricServiceChannelRouteRebuildAttempt, error) {
var item FabricServiceChannelRouteRebuildAttempt
var timeline json.RawMessage
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.ReporterNodeID,
&item.ServiceClass,
&item.RouteID,
&item.ReplacementRouteID,
&item.RebuildRequestID,
&item.RebuildStatus,
&item.RebuildReason,
&item.RebuildAttempt,
&item.DecisionSource,
&item.Outcome,
&item.Generation,
&item.PolicyFingerprint,
&item.ObservedPolicyFingerprint,
&item.ObservedRouteGeneration,
&item.EffectiveRouteGeneration,
&item.FeedbackStatus,
&item.FeedbackScoreAdjustment,
&item.FeedbackEffectiveScoreAdjustment,
&item.FeedbackReasons,
&item.LastError,
&item.ConsecutiveFailures,
&item.StallCount,
&item.LastSendDurationMs,
&item.QualityWindowSampleCount,
&item.QualityWindowFailureCount,
&item.QualityWindowDropCount,
&item.QualityWindowSlowCount,
&item.OldHops,
&item.ReplacementHops,
&item.NodeTransitionStatus,
&item.NodeTransitionGeneration,
&item.NodeTransitionObservedAt,
&item.NodeTransitionMatched,
&item.NodeRouteGenerationStatus,
&item.NodeRouteGenerationAppliedAt,
&item.NodeRouteGenerationWithdrawnAt,
&item.NodeRouteGenerationMatched,
&item.PostRebuildSelectedRouteID,
&item.PostRebuildSendPackets,
&item.PostRebuildSendFailures,
&item.PostRebuildSendFlowPackets,
&item.PostRebuildSendFlowDropped,
&item.GuardStatus,
&item.GuardSeverity,
&item.GuardReason,
&item.GuardTransitionDeadlineSeconds,
&item.GuardTrafficDeadlineSeconds,
&timeline,
&item.CorrelationSnapshotAt,
&item.Payload,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return FabricServiceChannelRouteRebuildAttempt{}, err
}
ensureRaw(&item.Payload, `{}`)
if len(timeline) > 0 && string(timeline) != "null" {
_ = json.Unmarshal(timeline, &item.Timeline)
}
enrichFabricServiceChannelRouteRebuildAttemptFeedbackCorrelation(&item)
return item, nil
}
func enrichFabricServiceChannelRouteRebuildAttemptFeedbackCorrelation(item *FabricServiceChannelRouteRebuildAttempt) {
if item == nil || len(item.Payload) == 0 || !json.Valid(item.Payload) {
return
}
payload := jsonObject(item.Payload)
item.FeedbackObservationID = firstNonEmptyString(item.FeedbackObservationID, jsonString(payload, "feedback_observation_id"))
item.FeedbackSource = firstNonEmptyString(item.FeedbackSource, jsonString(payload, "feedback_source"))
item.FeedbackChannelID = firstNonEmptyString(item.FeedbackChannelID, jsonString(payload, "feedback_channel_id"))
item.FeedbackResourceID = firstNonEmptyString(item.FeedbackResourceID, jsonString(payload, "feedback_resource_id"))
item.FeedbackViolationStatus = firstNonEmptyString(item.FeedbackViolationStatus, jsonString(payload, "feedback_violation_status"))
item.FeedbackViolationReason = firstNonEmptyString(item.FeedbackViolationReason, jsonString(payload, "feedback_violation_reason"))
if item.FeedbackObservedAt == nil {
item.FeedbackObservedAt = parseOptionalPayloadTime(jsonString(payload, "feedback_observed_at"))
}
if item.FeedbackExpiresAt == nil {
item.FeedbackExpiresAt = parseOptionalPayloadTime(jsonString(payload, "feedback_expires_at"))
}
}
func parseOptionalPayloadTime(value string) *time.Time {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
parsed, err := time.Parse(time.RFC3339Nano, value)
if err != nil {
return nil
}
parsed = parsed.UTC()
return &parsed
}
func scanFabricServiceChannelRouteRebuildAlertSilence(row scanner) (FabricServiceChannelRouteRebuildAlertSilence, error) {
var item FabricServiceChannelRouteRebuildAlertSilence
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.ReporterNodeID,
&item.RouteID,
&item.GuardStatus,
&item.Generation,
&item.Reason,
&item.CreatedByUserID,
&item.CreatedAt,
&item.ExpiresAt,
&item.Payload,
); err != nil {
return FabricServiceChannelRouteRebuildAlertSilence{}, err
}
ensureRaw(&item.Payload, `{}`)
item.DisplayRouteID = item.RouteID
var payload map[string]any
if err := json.Unmarshal(item.Payload, &payload); err == nil && payload != nil {
if value, ok := payload["incident_source"].(string); ok {
item.IncidentSource = strings.TrimSpace(value)
}
if value, ok := payload["channel_id"].(string); ok {
item.ChannelID = strings.TrimSpace(value)
}
}
if channelID, routeID, ok := fabricServiceChannelParseAccessDecisionSilenceRouteID(item.RouteID); ok {
item.IncidentSource = firstNonEmptyString(item.IncidentSource, "access_decision")
item.ChannelID = firstNonEmptyString(item.ChannelID, channelID)
item.DisplayRouteID = routeID
}
return item, nil
}
func scanQoSPolicy(row scanner) (MeshQoSPolicy, error) {
var item MeshQoSPolicy
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.ServiceClass,
&item.Priority,
&item.ReliabilityMode,
&item.DropPolicy,
&item.BandwidthPolicy,
&item.Metadata,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return MeshQoSPolicy{}, err
}
ensureRaw(&item.BandwidthPolicy, `{}`)
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanFabricEntryPoint(row scanner) (FabricEntryPoint, error) {
var item FabricEntryPoint
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.Name,
&item.Status,
&item.EndpointType,
&item.PublicEndpoint,
&item.Policy,
&item.Metadata,
&item.CreatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return FabricEntryPoint{}, err
}
ensureRaw(&item.Policy, `{}`)
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanFabricEntryPointNode(row scanner) (FabricEntryPointNode, error) {
var item FabricEntryPointNode
if err := row.Scan(
&item.EntryPointID,
&item.ClusterID,
&item.NodeID,
&item.Status,
&item.Priority,
&item.Metadata,
&item.AddedByUserID,
&item.AddedAt,
); err != nil {
return FabricEntryPointNode{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanFabricEgressPool(row scanner) (FabricEgressPool, error) {
var item FabricEgressPool
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.Name,
&item.Status,
&item.Description,
&item.RouteScope,
&item.Policy,
&item.Metadata,
&item.CreatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return FabricEgressPool{}, err
}
ensureRaw(&item.RouteScope, `{}`)
ensureRaw(&item.Policy, `{}`)
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanFabricEgressPoolNode(row scanner) (FabricEgressPoolNode, error) {
var item FabricEgressPoolNode
if err := row.Scan(
&item.EgressPoolID,
&item.ClusterID,
&item.NodeID,
&item.Status,
&item.Priority,
&item.Metadata,
&item.AddedByUserID,
&item.AddedAt,
); err != nil {
return FabricEgressPoolNode{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanAuthorityState(row scanner) (ClusterAuthorityState, error) {
var item ClusterAuthorityState
if err := row.Scan(
&item.ClusterID,
&item.AuthorityState,
&item.MutationMode,
&item.Term,
&item.Notes,
&item.UpdatedByUserID,
&item.UpdatedAt,
); err != nil {
return ClusterAuthorityState{}, err
}
return item, nil
}
func (s *PostgresStore) GetVPNClientProfile(
ctx context.Context,
clusterID, organizationID, userID, preferredEntryNodeID, preferredExitNodeID string,
generatedAt time.Time,
) (VPNClientProfile, error) {
var allowed bool
if err := s.db.QueryRow(ctx, `
SELECT EXISTS (
SELECT 1
FROM organization_memberships
WHERE organization_id = $1::uuid
AND user_id = $2::uuid
AND status = 'active'
)
`, organizationID, userID).Scan(&allowed); err != nil {
return VPNClientProfile{}, err
}
if !allowed {
return VPNClientProfile{}, ErrVPNLeaseOwnerNotAllowed
}
rows, err := s.db.Query(ctx, `
SELECT vc.id::text,
vc.name,
vc.protocol_family,
vc.mode,
vc.desired_state,
vc.status,
vc.target_endpoint,
vc.routing_usage,
vc.route_policy,
vc.qos_policy,
vc.placement_policy,
COALESCE((
SELECT jsonb_agg(van.node_id::text ORDER BY van.created_at, van.node_id::text)
FROM vpn_connection_allowed_nodes van
WHERE van.vpn_connection_id = vc.id
AND van.status = 'active'
), '[]'::jsonb) AS allowed_node_ids,
COALESCE(vc.placement_policy->'entry_node_ids', '[]'::jsonb) AS entry_node_ids,
COALESCE(vc.placement_policy->>'exit_node_id', '') AS exit_node_id,
COALESCE(pool.id::text, '') AS exit_pool_id,
COALESCE(pool.name, vc.name) AS exit_pool_name,
CASE WHEN l.id IS NULL THEN NULL ELSE jsonb_build_object(
'lease_id', l.id::text,
'owner_node_id', l.owner_node_id::text,
'lease_generation', l.lease_generation,
'status', l.status,
'renewed_at', l.renewed_at,
'expires_at', l.expires_at
) END AS active_lease,
COALESCE((
SELECT jsonb_agg(jsonb_build_object(
'id', rp.id::text,
'route_type', rp.route_type,
'destination', rp.destination,
'action', rp.action,
'service_type', rp.service_type,
'priority', rp.priority,
'policy', rp.policy,
'status', rp.status
) ORDER BY rp.priority, rp.destination)
FROM vpn_connection_route_policies rp
WHERE rp.vpn_connection_id = vc.id
AND rp.status = 'active'
), '[]'::jsonb) AS route_policies,
COALESCE(vc.metadata->'client_config', '{}'::jsonb) ||
jsonb_build_object('vpn_address', '10.77.0.2/24') ||
CASE
WHEN COALESCE((vc.route_policy->>'full_tunnel')::boolean, false)
THEN jsonb_build_object('routes', jsonb_build_array('0.0.0.0/0'))
ELSE '{}'::jsonb
END ||
CASE
WHEN jsonb_typeof(gateway_status.status_payload->'exit_dns_servers') = 'array'
AND jsonb_array_length(gateway_status.status_payload->'exit_dns_servers') > 0
THEN jsonb_build_object('dns_servers', gateway_status.status_payload->'exit_dns_servers')
WHEN jsonb_typeof(vc.route_policy->'dns_servers') = 'array'
AND jsonb_array_length(vc.route_policy->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vc.route_policy->'dns_servers')
WHEN jsonb_typeof(vc.target_endpoint->'dns_servers') = 'array'
AND jsonb_array_length(vc.target_endpoint->'dns_servers') > 0
THEN jsonb_build_object('dns_servers', vc.target_endpoint->'dns_servers')
ELSE '{}'::jsonb
END ||
CASE WHEN l.id IS NULL THEN '{}'::jsonb ELSE jsonb_strip_nulls(jsonb_build_object(
'runtime_status', CASE
WHEN COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false) THEN 'packet_forwarding_active'
WHEN COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false) THEN 'runtime_available'
WHEN gateway_status.observed_status IS NOT NULL THEN gateway_status.observed_status
ELSE 'lease_active'
END,
'gateway_node_id', l.owner_node_id::text,
'gateway_assignment_status', gateway_status.observed_status,
'gateway_interface', gateway_status.status_payload->>'gateway_interface',
'gateway_vpn_cidr', gateway_status.status_payload->>'gateway_vpn_cidr',
'relay_transport', gateway_status.status_payload->>'relay_transport',
'packet_forwarding', COALESCE((gateway_status.status_payload->>'packet_forwarding')::boolean, false),
'runtime_available', COALESCE((gateway_status.status_payload->>'runtime_available')::boolean, false),
'runtime_observed_at', gateway_status.observed_at
)) END AS client_config
FROM vpn_connections vc
LEFT JOIN LATERAL (
SELECT ep.id, ep.name
FROM fabric_egress_pools ep
WHERE ep.cluster_id = vc.cluster_id
AND ep.status = 'active'
AND (
ep.id::text = COALESCE(vc.placement_policy->>'exit_pool_id', '')
OR ep.name = COALESCE(vc.placement_policy->>'exit_pool_name', '')
OR EXISTS (
SELECT 1
FROM fabric_egress_pool_nodes epn
WHERE epn.egress_pool_id = ep.id
AND epn.cluster_id = vc.cluster_id
AND epn.status = 'active'
AND epn.node_id::text = ANY (
SELECT jsonb_array_elements_text(COALESCE(vc.placement_policy->'exit_node_ids', '[]'::jsonb))
)
)
)
ORDER BY
CASE
WHEN ep.id::text = COALESCE(vc.placement_policy->>'exit_pool_id', '') THEN 0
WHEN ep.name = COALESCE(vc.placement_policy->>'exit_pool_name', '') THEN 1
ELSE 2
END,
ep.name
LIMIT 1
) pool ON TRUE
LEFT JOIN vpn_connection_leases l
ON l.cluster_id = vc.cluster_id
AND l.vpn_connection_id = vc.id
AND l.status = 'active'
AND l.expires_at > NOW()
LEFT JOIN vpn_connection_assignment_latest_statuses gateway_status
ON gateway_status.cluster_id = vc.cluster_id
AND gateway_status.vpn_connection_id = vc.id
AND gateway_status.node_id = l.owner_node_id
WHERE vc.cluster_id = $1::uuid
AND vc.organization_id = $2::uuid
AND vc.desired_state = 'enabled'
ORDER BY vc.name ASC, vc.id ASC
`, clusterID, organizationID)
if err != nil {
return VPNClientProfile{}, err
}
defer rows.Close()
profile := VPNClientProfile{
SchemaVersion: "rap.vpn_client_profile.v1",
ClusterID: clusterID,
OrganizationID: organizationID,
UserID: userID,
GeneratedAt: generatedAt,
}
for rows.Next() {
var item VPNClientConnection
var allowedRaw, entryRaw []byte
var activeLeaseRaw []byte
if err := rows.Scan(
&item.ID,
&item.Name,
&item.ProtocolFamily,
&item.Mode,
&item.DesiredState,
&item.Status,
&item.TargetEndpoint,
&item.RoutingUsage,
&item.RoutePolicy,
&item.QoSPolicy,
&item.PlacementPolicy,
&allowedRaw,
&entryRaw,
&item.ExitNodeID,
&item.ExitPoolID,
&item.ExitPoolName,
&activeLeaseRaw,
&item.RoutePolicies,
&item.ClientConfig,
); err != nil {
return VPNClientProfile{}, err
}
_ = json.Unmarshal(allowedRaw, &item.AllowedNodeIDs)
_ = json.Unmarshal(entryRaw, &item.EntryNodeIDs)
if len(activeLeaseRaw) > 0 && string(activeLeaseRaw) != "null" {
var lease NodeVPNAssignmentLease
if err := json.Unmarshal(activeLeaseRaw, &lease); err == nil {
item.ActiveLease = &lease
}
}
ensureRaw(&item.TargetEndpoint, `{}`)
ensureRaw(&item.RoutingUsage, `[]`)
ensureRaw(&item.RoutePolicy, `{}`)
ensureRaw(&item.QoSPolicy, `{}`)
ensureRaw(&item.PlacementPolicy, `{}`)
ensureRaw(&item.RoutePolicies, `[]`)
ensureRaw(&item.ClientConfig, `{}`)
if item.ExitPoolName != "" || item.ExitPoolID != "" {
item.ClientConfig = mergeJSONObjects(item.ClientConfig, map[string]any{
"exit_pool": map[string]any{
"id": item.ExitPoolID,
"name": firstNonEmptyMetadataString(item.ExitPoolName, item.Name),
"kind": "virtual_pool",
},
})
}
item.ClientConfig = enrichVPNClientFabricRoute(item, preferredEntryNodeID, preferredExitNodeID)
profile.Connections = append(profile.Connections, item)
}
if err := rows.Err(); err != nil {
return VPNClientProfile{}, err
}
entryEndpoints, err := s.vpnEntryEndpointCandidates(ctx, clusterID, vpnProfileEntryNodeIDs(profile))
if err != nil {
return VPNClientProfile{}, err
}
exitEndpoints, err := s.vpnEntryEndpointCandidates(ctx, clusterID, vpnProfileExitNodeIDs(profile))
if err != nil {
return VPNClientProfile{}, err
}
for i := range profile.Connections {
profile.Connections[i].ClientConfig = enrichVPNClientEntryEndpointCandidates(profile.Connections[i], entryEndpoints)
profile.Connections[i].ClientConfig = enrichVPNClientExitEndpointCandidates(profile.Connections[i], exitEndpoints)
}
return profile, nil
}
func scanVPNConnection(row scanner) (VPNConnection, error) {
var item VPNConnection
if err := row.Scan(
&item.ID,
&item.ClusterID,
&item.OrganizationID,
&item.Name,
&item.TargetEndpoint,
&item.ProtocolFamily,
&item.CredentialRef,
&item.Mode,
&item.DesiredState,
&item.AllowedNodePolicy,
&item.RoutingUsage,
&item.RoutePolicy,
&item.QoSPolicy,
&item.PlacementPolicy,
&item.Status,
&item.Metadata,
&item.CreatedByUserID,
&item.UpdatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return VPNConnection{}, err
}
ensureRaw(&item.TargetEndpoint, `{}`)
ensureRaw(&item.AllowedNodePolicy, `{"mode":"explicit","node_ids":[]}`)
ensureRaw(&item.RoutingUsage, `[]`)
ensureRaw(&item.RoutePolicy, `{}`)
ensureRaw(&item.QoSPolicy, `{}`)
ensureRaw(&item.PlacementPolicy, `{}`)
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanVPNConnections(rows pgx.Rows) ([]VPNConnection, error) {
var out []VPNConnection
for rows.Next() {
item, err := scanVPNConnection(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func scanVPNAllowedNode(row scanner) (VPNConnectionAllowedNode, error) {
var item VPNConnectionAllowedNode
if err := row.Scan(
&item.VPNConnectionID,
&item.ClusterID,
&item.NodeID,
&item.RolePreference,
&item.Status,
&item.Metadata,
&item.CreatedByUserID,
&item.CreatedAt,
); err != nil {
return VPNConnectionAllowedNode{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func vpnProfileEntryNodeIDs(profile VPNClientProfile) []string {
var out []string
for _, connection := range profile.Connections {
route := vpnFabricRouteFromClientConfig(connection.ClientConfig)
out = append(out, route.SelectedEntryNodeID)
out = append(out, connection.EntryNodeIDs...)
}
return dedupeStrings(out)
}
func vpnProfileExitNodeIDs(profile VPNClientProfile) []string {
var out []string
for _, connection := range profile.Connections {
route := vpnFabricRouteFromClientConfig(connection.ClientConfig)
out = append(out, route.SelectedExitNodeID)
out = append(out, route.ExitPoolNodeIDs...)
out = append(out, connection.ExitNodeID)
out = append(out, connection.AllowedNodeIDs...)
}
return dedupeStrings(out)
}
func (s *PostgresStore) vpnEntryEndpointCandidates(ctx context.Context, clusterID string, entryNodeIDs []string) (map[string][]map[string]any, error) {
entryNodeIDs = dedupeStrings(entryNodeIDs)
out := make(map[string][]map[string]any, len(entryNodeIDs))
if len(entryNodeIDs) == 0 {
return out, nil
}
rows, err := s.db.Query(ctx, `
SELECT node_id::text, capabilities, metadata
FROM node_latest_heartbeats
WHERE cluster_id = $1::uuid
AND node_id::text = ANY($2::text[])
`, clusterID, entryNodeIDs)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var nodeID string
var capabilities json.RawMessage
var metadata json.RawMessage
if err := rows.Scan(&nodeID, &capabilities, &metadata); err != nil {
return nil, err
}
candidates := vpnEntryEndpointCandidatesFromHeartbeat(nodeID, capabilities, metadata)
if len(candidates) > 0 {
out[nodeID] = candidates
}
}
return out, rows.Err()
}
func vpnEntryEndpointCandidatesFromHeartbeat(nodeID string, capabilities json.RawMessage, metadata json.RawMessage) []map[string]any {
var payload struct {
MeshEndpointReport struct {
PeerEndpoint string `json:"peer_endpoint"`
Transport string `json:"transport"`
ConnectivityMode string `json:"connectivity_mode"`
NATType string `json:"nat_type"`
Region string `json:"region"`
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates"`
} `json:"mesh_endpoint_report"`
}
if len(metadata) == 0 || json.Unmarshal(metadata, &payload) != nil {
return nil
}
certByCandidate := endpointCandidateCertsFromHeartbeatMetadata(metadata)
report := payload.MeshEndpointReport
var out []map[string]any
seen := map[string]struct{}{}
for _, candidate := range report.EndpointCandidates {
address := strings.TrimSpace(candidate.Address)
candidateNodeID := strings.TrimSpace(candidate.NodeID)
if candidateNodeID == "" {
candidateNodeID = nodeID
}
transport := strings.TrimSpace(candidate.Transport)
if transport == "" {
transport = strings.TrimSpace(report.Transport)
}
if !usableVPNFabricPeerEndpoint(address, transport) {
continue
}
connectivityMode := strings.TrimSpace(candidate.ConnectivityMode)
if connectivityMode == "" {
connectivityMode = strings.TrimSpace(report.ConnectivityMode)
}
natType := strings.TrimSpace(candidate.NATType)
if natType == "" {
natType = strings.TrimSpace(report.NATType)
}
region := strings.TrimSpace(candidate.Region)
if region == "" {
region = strings.TrimSpace(report.Region)
}
reachability := strings.TrimSpace(candidate.Reachability)
if reachability == "" {
reachability = "unverified"
}
endpointID := strings.TrimSpace(candidate.EndpointID)
if endpointID == "" {
endpointID = "mesh-" + candidateNodeID
}
key := candidateNodeID + "\x00" + strings.ToLower(transport) + "\x00" + strings.ToLower(address)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
item := map[string]any{
"node_id": candidateNodeID,
"endpoint_id": endpointID,
"transport": transport,
"address": address,
"reachability": reachability,
"connectivity_mode": connectivityMode,
"nat_type": natType,
"region": region,
"priority": candidate.Priority,
"status": "reported",
"source": "node_latest_heartbeat.mesh_endpoint_report.endpoint_candidates",
}
if certSHA256 := firstNonEmptyMetadataString(
endpointCandidateMetadataString(candidate.Metadata, "tls_cert_sha256", "peer_cert_sha256"),
certByCandidate[endpointID],
certByCandidate[address],
certByCandidate[candidateNodeID+"\x00"+address],
); certSHA256 != "" {
item["tls_cert_sha256"] = certSHA256
item["peer_cert_sha256"] = certSHA256
}
if apiBaseURL := vpnEntryAPIBaseURL(address); apiBaseURL != "" {
item["api_base_url"] = apiBaseURL
}
out = append(out, item)
}
if len(out) == 0 {
address := strings.TrimSpace(report.PeerEndpoint)
if usableVPNFabricPeerEndpoint(address, strings.TrimSpace(report.Transport)) {
item := map[string]any{
"node_id": nodeID,
"endpoint_id": "mesh-peer-endpoint-" + nodeID,
"transport": strings.TrimSpace(report.Transport),
"address": address,
"reachability": "unverified",
"connectivity_mode": strings.TrimSpace(report.ConnectivityMode),
"nat_type": strings.TrimSpace(report.NATType),
"region": strings.TrimSpace(report.Region),
"priority": 100,
"status": "reported",
"source": "node_latest_heartbeat.mesh_endpoint_report.peer_endpoint",
}
if apiBaseURL := vpnEntryAPIBaseURL(address); apiBaseURL != "" {
item["api_base_url"] = apiBaseURL
}
out = append(out, item)
}
}
return out
}
func endpointCandidateCertsFromHeartbeatMetadata(metadata json.RawMessage) map[string]string {
out := map[string]string{}
var payload map[string]any
if len(metadata) == 0 || json.Unmarshal(metadata, &payload) != nil {
return out
}
report, _ := payload["mesh_endpoint_report"].(map[string]any)
candidates, _ := report["endpoint_candidates"].([]any)
for _, raw := range candidates {
candidate, _ := raw.(map[string]any)
if candidate == nil {
continue
}
meta, _ := candidate["metadata"].(map[string]any)
cert := strings.TrimSpace(metadataAnyString(meta["tls_cert_sha256"]))
if cert == "" {
cert = strings.TrimSpace(metadataAnyString(meta["peer_cert_sha256"]))
}
if cert == "" {
continue
}
endpointID := strings.TrimSpace(metadataAnyString(candidate["endpoint_id"]))
address := strings.TrimSpace(metadataAnyString(candidate["address"]))
nodeID := strings.TrimSpace(metadataAnyString(candidate["node_id"]))
if endpointID != "" {
out[endpointID] = cert
}
if address != "" {
out[address] = cert
}
if nodeID != "" && address != "" {
out[nodeID+"\x00"+address] = cert
}
}
return out
}
func metadataAnyString(value any) string {
switch typed := value.(type) {
case string:
return typed
default:
return ""
}
}
func firstNonEmptyMetadataString(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func usableVPNFabricPeerEndpoint(address string, transport string) bool {
address = strings.TrimSpace(address)
if address == "" {
return false
}
transport = strings.ToLower(strings.TrimSpace(transport))
if !strings.Contains(transport, "quic") {
return false
}
parsed, err := url.Parse(address)
if err != nil {
return false
}
if strings.ToLower(parsed.Scheme) != "quic" {
return false
}
host := parsed.Hostname()
if host == "" {
return false
}
ip := net.ParseIP(host)
if ip == nil {
return true
}
if ip.IsUnspecified() || ip.IsLoopback() {
return false
}
return true
}
func endpointCandidateMetadataString(metadata json.RawMessage, keys ...string) string {
if len(metadata) == 0 {
return ""
}
var values map[string]any
if json.Unmarshal(metadata, &values) != nil {
return ""
}
for _, key := range keys {
if value, ok := values[key].(string); ok && strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func heartbeatCapabilityEnabled(capabilities json.RawMessage, name string) bool {
var cfg map[string]any
if len(capabilities) == 0 || json.Unmarshal(capabilities, &cfg) != nil {
return false
}
value, ok := cfg[name]
if !ok {
return false
}
switch typed := value.(type) {
case bool:
return typed
case string:
return strings.EqualFold(strings.TrimSpace(typed), "true") || strings.EqualFold(strings.TrimSpace(typed), "enabled")
default:
return false
}
}
func vpnEntryAPIBaseURL(address string) string {
address = strings.TrimRight(strings.TrimSpace(address), "/")
if address == "" {
return ""
}
if !strings.HasPrefix(address, "http://") && !strings.HasPrefix(address, "https://") {
return ""
}
return address + "/api/v1"
}
func enrichVPNClientEntryEndpointCandidates(connection VPNClientConnection, endpoints map[string][]map[string]any) json.RawMessage {
var cfg map[string]any
if err := json.Unmarshal(connection.ClientConfig, &cfg); err != nil || cfg == nil {
cfg = map[string]any{}
}
route := vpnFabricRouteFromClientConfig(connection.ClientConfig)
entryIDs := dedupeStrings(append([]string{route.SelectedEntryNodeID}, connection.EntryNodeIDs...))
var candidates []map[string]any
seen := map[string]struct{}{}
for _, nodeID := range entryIDs {
for _, candidate := range endpoints[nodeID] {
address, _ := candidate["address"].(string)
endpointID, _ := candidate["endpoint_id"].(string)
key := nodeID + "\x00" + endpointID + "\x00" + address
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
enriched := make(map[string]any, len(candidate)+1)
for k, v := range candidate {
enriched[k] = v
}
enriched["selected_entry"] = nodeID != "" && nodeID == route.SelectedEntryNodeID
candidates = append(candidates, enriched)
}
}
cfg["vpn_entry_endpoint_candidates"] = candidates
cfg["vpn_entry_endpoint_candidate_count"] = len(candidates)
out, err := json.Marshal(cfg)
if err != nil {
return connection.ClientConfig
}
return out
}
func enrichVPNClientExitEndpointCandidates(connection VPNClientConnection, endpoints map[string][]map[string]any) json.RawMessage {
var cfg map[string]any
if err := json.Unmarshal(connection.ClientConfig, &cfg); err != nil || cfg == nil {
cfg = map[string]any{}
}
route := vpnFabricRouteFromClientConfig(connection.ClientConfig)
exitIDs := dedupeStrings(append([]string{route.SelectedExitNodeID}, route.ExitPoolNodeIDs...))
exitIDs = dedupeStrings(append(exitIDs, connection.ExitNodeID))
exitIDs = dedupeStrings(append(exitIDs, connection.AllowedNodeIDs...))
var candidates []map[string]any
seen := map[string]struct{}{}
for _, nodeID := range exitIDs {
for _, candidate := range endpoints[nodeID] {
address, _ := candidate["address"].(string)
endpointID, _ := candidate["endpoint_id"].(string)
key := nodeID + "\x00" + endpointID + "\x00" + address
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
enriched := make(map[string]any, len(candidate)+2)
for k, v := range candidate {
enriched[k] = v
}
enriched["selected_exit"] = nodeID != "" && nodeID == route.SelectedExitNodeID
enriched["exit_pool_member"] = true
candidates = append(candidates, enriched)
}
}
cfg["vpn_exit_endpoint_candidates"] = candidates
cfg["vpn_exit_endpoint_candidate_count"] = len(candidates)
out, err := json.Marshal(cfg)
if err != nil {
return connection.ClientConfig
}
return out
}
func listVPNConnectionAllowedNodes(ctx context.Context, q rowQuerier, clusterID, vpnConnectionID string) ([]VPNConnectionAllowedNode, error) {
rows, err := q.Query(ctx, `
SELECT vpn_connection_id::text, cluster_id::text, node_id::text, role_preference,
status, metadata, created_by_user_id::text, created_at
FROM vpn_connection_allowed_nodes
WHERE cluster_id = $1::uuid
AND vpn_connection_id = $2::uuid
ORDER BY role_preference, created_at DESC
`, clusterID, vpnConnectionID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []VPNConnectionAllowedNode
for rows.Next() {
item, err := scanVPNAllowedNode(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func scanVPNRoutePolicy(row scanner) (VPNConnectionRoutePolicy, error) {
var item VPNConnectionRoutePolicy
if err := row.Scan(
&item.ID,
&item.VPNConnectionID,
&item.ClusterID,
&item.OrganizationID,
&item.RouteType,
&item.Destination,
&item.Action,
&item.ServiceType,
&item.Priority,
&item.Policy,
&item.Status,
&item.CreatedByUserID,
&item.CreatedAt,
&item.UpdatedAt,
); err != nil {
return VPNConnectionRoutePolicy{}, err
}
ensureRaw(&item.Policy, `{}`)
return item, nil
}
func scanVPNRoutePolicies(rows pgx.Rows) ([]VPNConnectionRoutePolicy, error) {
var out []VPNConnectionRoutePolicy
for rows.Next() {
item, err := scanVPNRoutePolicy(rows)
if err != nil {
return nil, err
}
out = append(out, item)
}
return out, rows.Err()
}
func scanVPNLease(row scanner) (VPNConnectionLease, error) {
var item VPNConnectionLease
if err := row.Scan(
&item.ID,
&item.VPNConnectionID,
&item.ClusterID,
&item.OwnerNodeID,
&item.LeaseGeneration,
&item.FencingToken,
&item.Status,
&item.AcquiredAt,
&item.RenewedAt,
&item.ExpiresAt,
&item.ReleasedAt,
&item.FencedAt,
&item.Metadata,
); err != nil {
return VPNConnectionLease{}, err
}
ensureRaw(&item.Metadata, `{}`)
return item, nil
}
func scanNodeVPNAssignment(row scanner) (NodeVPNAssignment, error) {
var item NodeVPNAssignment
var activeLeaseRaw json.RawMessage
if err := row.Scan(
&item.VPNConnectionID,
&item.ClusterID,
&item.OrganizationID,
&item.Name,
&item.TargetEndpoint,
&item.ProtocolFamily,
&item.Mode,
&item.DesiredState,
&item.RoutingUsage,
&item.RoutePolicy,
&item.QoSPolicy,
&item.PlacementPolicy,
&item.Status,
&item.HasCredentialRef,
&item.AssignmentReason,
&activeLeaseRaw,
&item.UpdatedAt,
); err != nil {
return NodeVPNAssignment{}, err
}
ensureRaw(&item.TargetEndpoint, `{}`)
ensureRaw(&item.RoutingUsage, `[]`)
ensureRaw(&item.RoutePolicy, `{}`)
ensureRaw(&item.QoSPolicy, `{}`)
ensureRaw(&item.PlacementPolicy, `{}`)
if len(activeLeaseRaw) > 0 && string(activeLeaseRaw) != "null" {
var lease NodeVPNAssignmentLease
if err := json.Unmarshal(activeLeaseRaw, &lease); err != nil {
return NodeVPNAssignment{}, err
}
item.ActiveLease = &lease
}
return item, nil
}
func scanNodeVPNAssignmentStatus(row scanner) (NodeVPNAssignmentStatus, error) {
var item NodeVPNAssignmentStatus
if err := row.Scan(
&item.ID,
&item.VPNConnectionID,
&item.ClusterID,
&item.NodeID,
&item.ObservedStatus,
&item.StatusPayload,
&item.ObservedAt,
); err != nil {
return NodeVPNAssignmentStatus{}, err
}
ensureRaw(&item.StatusPayload, `{}`)
return item, nil
}
func isUniqueViolation(err error) bool {
var pgErr *pgconn.PgError
if errors.As(err, &pgErr) {
return pgErr.Code == "23505"
}
return false
}
func getJoinRequestForUpdate(ctx context.Context, tx pgx.Tx, clusterID, joinRequestID string) (NodeJoinRequest, error) {
row := tx.QueryRow(ctx, `
SELECT id::text, cluster_id::text, join_token_id::text, node_name, node_fingerprint, public_key,
reported_capabilities, reported_facts, requested_roles, status, reviewed_by_user_id::text,
reviewed_at, approved_node_id::text, rejection_reason, created_at, updated_at, approval_payload, approval_signature
FROM node_join_requests
WHERE cluster_id = $1::uuid
AND id = $2::uuid
FOR UPDATE
`, clusterID, joinRequestID)
return scanJoinRequest(row)
}
func ensureRaw(raw *json.RawMessage, fallback string) {
if len(*raw) == 0 {
*raw = json.RawMessage(fallback)
}
}
func mergeJSONObjects(raw json.RawMessage, values map[string]any) json.RawMessage {
out := map[string]any{}
_ = json.Unmarshal(raw, &out)
if out == nil {
out = map[string]any{}
}
for key, value := range values {
out[key] = value
}
payload, err := json.Marshal(out)
if err != nil {
return raw
}
return payload
}
func enrichVPNClientFabricRoute(item VPNClientConnection, preferredEntryNodeID, preferredExitNodeID string) json.RawMessage {
var cfg map[string]any
if err := json.Unmarshal(item.ClientConfig, &cfg); err != nil || cfg == nil {
cfg = map[string]any{}
}
entryPool := dedupeStrings(append([]string{}, item.EntryNodeIDs...))
placementPolicy := jsonObjectFromRaw(item.PlacementPolicy)
entrySelector, _ := placementPolicy["entry_selector"].(string)
clientNodeEntry := strings.EqualFold(strings.TrimSpace(entrySelector), "client_node") || placementPolicy["android_node_agent_target"] == true
if len(entryPool) == 0 && !clientNodeEntry {
entryPool = dedupeStrings(append([]string{}, item.AllowedNodeIDs...))
}
exitPool := []string{}
if item.ExitNodeID != "" {
exitPool = append(exitPool, item.ExitNodeID)
}
if item.ActiveLease != nil && item.ActiveLease.OwnerNodeID != "" {
exitPool = append(exitPool, item.ActiveLease.OwnerNodeID)
}
exitPool = append(exitPool, item.AllowedNodeIDs...)
exitPool = dedupeStrings(exitPool)
preferredEntryNodeID = strings.TrimSpace(preferredEntryNodeID)
selectedEntry := ""
if !clientNodeEntry {
selectedEntry = selectPreferredNode(entryPool, preferredEntryNodeID)
}
selectedExit := selectPreferredNode(exitPool, preferredExitNodeID)
if selectedExit == "" && item.ActiveLease != nil && item.ActiveLease.OwnerNodeID != "" {
selectedExit = item.ActiveLease.OwnerNodeID
}
status := "waiting_for_entry_and_exit"
switch {
case selectedEntry != "" && selectedExit != "":
status = "planned"
case clientNodeEntry && selectedExit != "":
status = "planned"
case selectedEntry == "":
status = "waiting_for_entry"
case selectedExit == "":
status = "waiting_for_exit"
}
routeCandidates := vpnFabricRouteCandidates(entryPool, exitPool, selectedEntry, selectedExit)
cfg["vpn_fabric_route"] = map[string]any{
"schema_version": "rap.vpn_fabric_route.v1",
"status": status,
"preferred_data_plane": "fabric_service_channel",
"fallback_data_plane": "none",
"backend_relay_fallback": false,
"selection_mode": "farm_authoritative_client_node_to_exit_pool",
"route_authority": "fabric_farm",
"entry_selector": firstNonEmptyString(entrySelector, "entry-node"),
"client_node_entry": clientNodeEntry,
"vpn_builds_routes": false,
"vpn_builds_tunnels": false,
"farm_builds_routes": true,
"farm_builds_tunnels": true,
"entry_pool_node_ids": entryPool,
"exit_pool_node_ids": exitPool,
"selected_entry_node_id": selectedEntry,
"selected_exit_node_id": selectedExit,
"active_lease_owner_node": selectedExit,
"route_candidates": routeCandidates,
"route_candidate_count": len(routeCandidates),
"route_policy": "full_tunnel_or_connection_policy",
}
cfg["vpn_dataplane_contract"] = map[string]any{
"schema_version": "rap.vpn_packet_dataplane.v1",
"tunnel_type": "universal_ip_packet",
"application_protocol_agnostic": true,
"packet_forwarding_channel": "vpn_packet",
"control_plane_packet_relay_mode": "fabric_service_channel_only",
"route_authority": "fabric_farm",
"backend_relay_allowed": false,
"requires_fabric_service_channel": true,
"vpn_builds_routes": false,
"vpn_builds_tunnels": false,
"farm_builds_routes": true,
"farm_builds_tunnels": true,
"traffic_contract": map[string]any{
"all_ip_traffic": true,
"protocol_specific_routing": false,
"diagnostics_only_protocol_summaries": true,
},
"route_selection": map[string]any{
"mode": "farm_authoritative_lowest_latency_healthy_route_to_exit_pool",
"entry_selector": firstNonEmptyString(entrySelector, "entry-node"),
"client_node_entry": clientNodeEntry,
"selected_entry_node_id": selectedEntry,
"selected_exit_node_id": selectedExit,
"route_candidates": routeCandidates,
},
"failover": map[string]any{
"enabled": true,
"owner": "fabric_farm",
"client_topology_hidden": true,
"preserve_vpn_connection_id": true,
"alternate_route_count": alternateVPNRouteCount(routeCandidates, selectedEntry, selectedExit),
"reroute_triggers": []string{
"client_node_mesh_path_unhealthy",
"exit_unhealthy",
"mesh_route_latency_regression",
"mesh_route_loss_regression",
"queue_backpressure",
"lease_owner_replaced",
},
},
"backpressure": map[string]any{
"queue_policy": "bounded_queue_then_route_failover",
"drop_policy": "drop_only_when_all_routes_unavailable_or_queue_full",
"bulk_and_realtime": "same_packet_path",
"flow_isolation": "hash_by_ip_protocol_and_ports",
"target_dataplane": "fabric_farm_entry_to_exit_service_channel",
"temporary_fallback": "none",
},
}
out, err := json.Marshal(cfg)
if err != nil {
return item.ClientConfig
}
return out
}
func jsonObjectFromRaw(raw json.RawMessage) map[string]any {
var out map[string]any
if len(raw) == 0 || json.Unmarshal(raw, &out) != nil || out == nil {
return map[string]any{}
}
return out
}
func vpnFabricRouteCandidates(entryPool, exitPool []string, selectedEntry, selectedExit string) []map[string]any {
type pair struct {
entry string
exit string
}
pairs := make([]pair, 0, len(entryPool)*len(exitPool)+1)
if len(entryPool) == 0 && selectedExit != "" {
pairs = append(pairs, pair{exit: selectedExit})
}
if len(entryPool) == 0 {
for _, exit := range exitPool {
if exit != "" {
pairs = append(pairs, pair{exit: exit})
}
}
}
if selectedEntry != "" && selectedExit != "" {
pairs = append(pairs, pair{entry: selectedEntry, exit: selectedExit})
}
for _, entry := range entryPool {
for _, exit := range exitPool {
if entry == "" || exit == "" {
continue
}
pairs = append(pairs, pair{entry: entry, exit: exit})
}
}
seen := map[string]struct{}{}
out := make([]map[string]any, 0, len(pairs))
for _, pair := range pairs {
if pair.exit == "" {
continue
}
key := pair.entry + "\x00" + pair.exit
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
priority := len(out) + 1
role := "alternate"
if pair.exit == selectedExit && (pair.entry == selectedEntry || selectedEntry == "") {
role = "preferred"
priority = 0
}
candidate := map[string]any{
"exit_node_id": pair.exit,
"role": role,
"priority": priority,
"status": "candidate",
"source_role": "vpn-client",
"route_scope": "client_node_to_exit_pool",
}
if pair.entry != "" {
candidate["entry_node_id"] = pair.entry
}
out = append(out, candidate)
}
return out
}
func alternateVPNRouteCount(candidates []map[string]any, selectedEntry, selectedExit string) int {
count := 0
for _, candidate := range candidates {
entry, _ := candidate["entry_node_id"].(string)
exit, _ := candidate["exit_node_id"].(string)
if entry == "" || exit == "" {
continue
}
if entry == selectedEntry && exit == selectedExit {
continue
}
count++
}
return count
}
func selectPreferredNode(pool []string, preferred string) string {
preferred = strings.TrimSpace(preferred)
if preferred != "" {
for _, value := range pool {
if value == preferred {
return value
}
}
}
if len(pool) > 0 {
return pool[0]
}
return ""
}
func dedupeStrings(values []string) []string {
seen := make(map[string]struct{}, len(values))
out := make([]string, 0, len(values))
for _, value := range values {
if value == "" {
continue
}
if _, ok := seen[value]; ok {
continue
}
seen[value] = struct{}{}
out = append(out, value)
}
return out
}