Files
rdp-proxy/scripts/fabric/c17z12-rendezvous-relay-smoke-ssh.ps1
T
2026-04-28 22:29:50 +03:00

918 lines
43 KiB
PowerShell

param(
[string]$DockerSshAlias = "test-docker",
[string]$BackendImageTag = "rap-backend:c17z18-rendezvous-smoke",
[string]$NodeAgentImageTag = "rap-node-agent:c17z18-rendezvous-smoke",
[string]$AdminEmail = "fabric-owner-c17z18@example.local",
[string]$AdminPassword = "SmokePass!123",
[int]$ApiPort = 18120,
[int]$PostgresPort = 15442,
[int]$RedisPort = 16442,
[int]$MeshBasePort = 19120,
[switch]$KeepRunning
)
Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"
$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..\..")).ProviderPath
$backendPublicBaseUrl = "http://192.168.200.61:$ApiPort/api/v1"
$backendContainerBaseUrl = "http://127.0.0.1:$ApiPort/api/v1"
$runId = "c17z18-" + (Get-Date -Format "yyyyMMdd-HHmmss")
$remoteBuildDir = "/tmp/rap-c17z18-build-$runId"
$postgresName = "rap_c17z12_postgres"
$redisName = "rap_c17z12_redis"
$backendName = "rap_c17z12_backend"
$nodePrefix = "rap_c17z12_node_"
function Invoke-RemoteDocker {
param([string[]]$Arguments)
& ssh $DockerSshAlias docker @Arguments
if ($LASTEXITCODE -ne 0) {
throw "ssh $DockerSshAlias docker $($Arguments -join ' ') failed with exit code $LASTEXITCODE"
}
}
function Invoke-RemoteDockerText {
param([string[]]$Arguments)
$previousErrorActionPreference = $ErrorActionPreference
$ErrorActionPreference = "Continue"
try {
$output = & ssh $DockerSshAlias docker @Arguments 2>&1 | ForEach-Object { $_.ToString() }
}
finally {
$ErrorActionPreference = $previousErrorActionPreference
}
if ($LASTEXITCODE -ne 0) {
throw "ssh $DockerSshAlias docker $($Arguments -join ' ') failed with exit code $LASTEXITCODE"
}
return $output
}
function Invoke-RemoteShell {
param([string]$Command)
& ssh $DockerSshAlias $Command
if ($LASTEXITCODE -ne 0) {
throw "ssh $DockerSshAlias $Command failed with exit code $LASTEXITCODE"
}
}
function Invoke-RemotePostgresSql {
param([string]$Sql)
$Sql | & ssh $DockerSshAlias "docker exec -i $postgresName psql -U rap_user -d remote_access_platform -v ON_ERROR_STOP=1 -f -"
if ($LASTEXITCODE -ne 0) {
throw "remote psql command failed"
}
}
function Send-RemoteFile {
param(
[string]$RemotePath,
[string]$Content
)
$Content | & ssh $DockerSshAlias "cat > '$RemotePath'"
if ($LASTEXITCODE -ne 0) {
throw "write remote file failed: $RemotePath"
}
}
function Send-RemoteBuildContext {
Write-Host "Uploading backend and node-agent build context to $DockerSshAlias..."
Invoke-RemoteShell -Command "rm -rf '$remoteBuildDir' && mkdir -p '$remoteBuildDir'"
& tar -czf - -C $repoRoot "backend" "agents/rap-node-agent" | & ssh $DockerSshAlias "tar -xzf - -C '$remoteBuildDir'"
if ($LASTEXITCODE -ne 0) {
throw "upload build context failed"
}
}
function Invoke-Api {
param(
[string]$Method,
[string]$Path,
[object]$Body = $null
)
$uri = "$backendPublicBaseUrl$Path"
if ($null -eq $Body) {
return Invoke-RestMethod -Method $Method -Uri $uri -TimeoutSec 30
}
return Invoke-RestMethod -Method $Method -Uri $uri -ContentType "application/json" -Body ($Body | ConvertTo-Json -Depth 50) -TimeoutSec 30
}
function Wait-HttpReady {
param([string]$Url)
for ($i = 0; $i -lt 60; $i++) {
try {
$response = Invoke-WebRequest -UseBasicParsing -Uri $Url -TimeoutSec 2
if ($response.StatusCode -ge 200 -and $response.StatusCode -lt 300) {
return
}
}
catch {
Start-Sleep -Seconds 1
}
}
throw "Timed out waiting for $Url"
}
function Remove-C17Z12Containers {
$names = @($backendName, $postgresName, $redisName)
foreach ($key in @("a", "b", "c", "r", "idle")) {
$names += "$nodePrefix$key"
}
foreach ($name in $names) {
& ssh $DockerSshAlias docker rm -f $name 2>$null | Out-Null
}
}
function New-EndpointCandidate {
param(
[string]$EndpointID,
[string]$NodeID,
[string]$Address,
[string]$Transport,
[string]$Reachability,
[string]$ConnectivityMode,
[string]$NATType,
[int]$Priority,
[string[]]$PolicyTags = @()
)
return @{
endpoint_id = $EndpointID
node_id = $NodeID
transport = $Transport
address = $Address
address_family = "ipv4"
reachability = $Reachability
nat_type = $NATType
connectivity_mode = $ConnectivityMode
region = "docker-test"
priority = $Priority
policy_tags = $PolicyTags
last_verified_at = (Get-Date).ToUniversalTime().ToString("o")
metadata = @{
stage = "c17z18"
run_id = $runId
service_workload_traffic = $false
production_payload_forwarding = $false
}
}
}
function Get-OptionalProperty {
param(
[object]$Object,
[string]$PropertyName
)
if ($null -eq $Object) {
return $null
}
$property = $Object.PSObject.Properties[$PropertyName]
if ($null -eq $property) {
return $null
}
return $property.Value
}
function Get-OptionalArrayCount {
param(
[object]$Object,
[string]$PropertyName
)
$value = Get-OptionalProperty -Object $Object -PropertyName $PropertyName
if ($null -eq $value) {
return 0
}
return @($value).Count
}
function Get-LatestHeartbeatMetadataReport {
param(
[string]$NodeID,
[string]$PropertyName
)
$heartbeats = Invoke-Api -Method Get -Path "/clusters/$clusterID/nodes/$NodeID/heartbeats?actor_user_id=$actorUserID&limit=5"
$latest = @($heartbeats.heartbeats) | Select-Object -First 1
$metadata = Get-OptionalProperty -Object $latest -PropertyName "metadata"
return Get-OptionalProperty -Object $metadata -PropertyName $PropertyName
}
function Get-LatestRendezvousLeaseReport {
param([string]$NodeID)
return Get-LatestHeartbeatMetadataReport -NodeID $NodeID -PropertyName "mesh_rendezvous_lease_report"
}
function Get-LatestRoutePathDecisionReport {
param([string]$NodeID)
return Get-LatestHeartbeatMetadataReport -NodeID $NodeID -PropertyName "mesh_route_path_decision_report"
}
function Get-LatestRouteGenerationReport {
param([string]$NodeID)
return Get-LatestHeartbeatMetadataReport -NodeID $NodeID -PropertyName "mesh_route_generation_report"
}
function Get-LatestRouteHealthConfigReport {
param([string]$NodeID)
return Get-LatestHeartbeatMetadataReport -NodeID $NodeID -PropertyName "mesh_route_health_config_report"
}
function Get-LatestRouteHealthFeedbackRefreshReport {
param([string]$NodeID)
return Get-LatestHeartbeatMetadataReport -NodeID $NodeID -PropertyName "mesh_route_health_feedback_refresh_report"
}
function Select-C17Z18RouteHealthSnapshot {
param([object[]]$MeshLinks)
$routeHealthLinks = @($MeshLinks | Where-Object {
$_.metadata.observation_type -eq "synthetic_route_health" -and
$_.metadata.config_source -eq "control_plane"
})
$directHealth = @($routeHealthLinks | Where-Object {
$_.metadata.route_id -eq $directIntent.route_intent.id -and
$_.link_status -eq "reachable"
})
$rendezvousHealth = @($routeHealthLinks | Where-Object {
$_.source_node_id -eq $nodeAID -and
$_.metadata.route_id -eq $rendezvousIntent.route_intent.id -and
$_.link_status -eq "reachable"
})
$replacementRouteHealth = @($rendezvousHealth | Where-Object {
$_.metadata.route_path_decision_applied -eq $true -and
$_.metadata.route_path_decision_selected_relay_id -eq $nodeSID -and
(@($_.metadata.expected_effective_hops) -contains $nodeSID) -and
-not (@($_.metadata.expected_effective_hops) -contains $nodeRID) -and
(@($_.metadata.observed_ack_path) -contains $nodeSID) -and
-not (@($_.metadata.observed_ack_path) -contains $nodeRID) -and
$_.metadata.route_path_drift_detected -eq $false
})
return [pscustomobject]@{
route_health_links = $routeHealthLinks
direct_health = $directHealth
rendezvous_health = $rendezvousHealth
replacement_route_health = $replacementRouteHealth
}
}
function Get-C17Z18MeshLinkSnapshot {
$links = Invoke-Api -Method Get -Path "/clusters/$clusterID/mesh/links?actor_user_id=$actorUserID"
$meshLinks = @($links.mesh_links)
$routeHealth = Select-C17Z18RouteHealthSnapshot -MeshLinks $meshLinks
return [pscustomobject]@{
links = $links
mesh_links = $meshLinks
route_health_links = @($routeHealth.route_health_links)
direct_health = @($routeHealth.direct_health)
rendezvous_health = @($routeHealth.rendezvous_health)
replacement_route_health = @($routeHealth.replacement_route_health)
}
}
function Wait-C17Z18ReplacementRouteHealthSnapshot {
param([int]$TimeoutSeconds = 40)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
$latest = $null
do {
$latest = Get-C17Z18MeshLinkSnapshot
if (@($latest.replacement_route_health).Count -gt 0) {
return $latest
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
return $latest
}
Write-Host "C17Z18 rendezvous relay replacement smoke run: $runId"
Write-Host "Using SSH Docker host: $DockerSshAlias"
Remove-C17Z12Containers
Send-RemoteBuildContext
Write-Host "Building backend image on docker-test..."
Invoke-RemoteDocker -Arguments @("build", "-f", "$remoteBuildDir/backend/Dockerfile", "-t", $BackendImageTag, "$remoteBuildDir/backend")
Write-Host "Building node-agent image on docker-test..."
Invoke-RemoteDocker -Arguments @("build", "-f", "$remoteBuildDir/agents/rap-node-agent/Dockerfile", "-t", $NodeAgentImageTag, $remoteBuildDir)
Write-Host "Starting PostgreSQL and Redis..."
Invoke-RemoteDocker -Arguments @(
"run", "-d",
"--name", $postgresName,
"-e", "POSTGRES_DB=remote_access_platform",
"-e", "POSTGRES_USER=rap_user",
"-e", "POSTGRES_PASSWORD=rap_password",
"-p", "$PostgresPort`:5432",
"postgres:16"
)
Invoke-RemoteDocker -Arguments @(
"run", "-d",
"--name", $redisName,
"-p", "$RedisPort`:6379",
"redis:7"
)
Invoke-RemoteShell -Command "for i in `$(seq 1 60); do docker exec $postgresName pg_isready -U rap_user -d remote_access_platform >/dev/null 2>&1 && exit 0; sleep 1; done; exit 1"
Write-Host "Applying migrations..."
Invoke-RemoteShell -Command "for f in `$(find '$remoteBuildDir/backend/migrations' -name '*.up.sql' | sort); do docker exec -i $postgresName psql -U rap_user -d remote_access_platform -v ON_ERROR_STOP=1 -f - < `$f; done"
Write-Host "Seeding platform owner..."
$adminUserID = [guid]::NewGuid().ToString()
$adminHash = '$2a$10$AqLRexkI1yXbuiMPU6dHM.KVUhF.t..9NolyK4OOodQTyTsHyG.7u'
Invoke-RemotePostgresSql -Sql @"
INSERT INTO users (id, email, password_hash, mfa_enabled, platform_role)
VALUES ('$adminUserID'::uuid, '$AdminEmail', '$adminHash', FALSE, 'platform_admin')
ON CONFLICT (email) DO UPDATE SET
password_hash = EXCLUDED.password_hash,
platform_role = 'platform_admin',
updated_at = NOW();
"@
Write-Host "Starting backend..."
Invoke-RemoteDocker -Arguments @(
"run", "-d",
"--name", $backendName,
"--network", "host",
"-e", "APP_NAME=rap-api",
"-e", "APP_ENV=c17z18-smoke",
"-e", "HTTP_HOST=0.0.0.0",
"-e", "HTTP_PORT=$ApiPort",
"-e", "POSTGRES_DSN=postgres://rap_user:rap_password@127.0.0.1:$PostgresPort/remote_access_platform?sslmode=disable",
"-e", "REDIS_ADDR=127.0.0.1:$RedisPort",
"-e", "AUTH_ACCESS_TOKEN_SECRET=c17z18-access-secret",
"-e", "AUTH_REFRESH_HASH_SECRET=c17z18-refresh-secret",
$BackendImageTag
)
Wait-HttpReady -Url "http://192.168.200.61:$ApiPort/readyz"
Write-Host "Logging in as platform owner..."
$login = Invoke-Api -Method Post -Path "/auth/login" -Body @{
email = $AdminEmail
password = $AdminPassword
device_fingerprint = "c17z18-smoke-device"
device_label = "C17Z18 rendezvous relay replacement smoke"
trust_device = $true
}
$actorUserID = $login.user.id
Write-Host "Creating C17Z18 cluster..."
$cluster = Invoke-Api -Method Post -Path "/clusters/" -Body @{
actor_user_id = $actorUserID
slug = "c17z18-$((New-Guid).Guid.Substring(0, 8))"
name = "C17Z18 Rendezvous Relay Replacement Smoke"
region = "docker-test"
metadata = @{
stage = "c17z18"
run_id = $runId
production_forwarding = $false
service_workload_traffic = $false
created_by = "c17z12-rendezvous-relay-smoke-ssh.ps1:c17z18"
}
}
$clusterID = $cluster.cluster.id
Write-Host "Enabling test-only Fabric flags..."
Invoke-Api -Method Put -Path "/fabric/testing-flags" -Body @{
actor_user_id = $actorUserID
scope_type = "platform"
scope_id = $null
cluster_id = $null
enabled = $true
telemetry_enabled = $true
synthetic_links_enabled = $true
history_retention_hours = 24
metadata = @{
stage = "c17z18"
run_id = $runId
production_forwarding = $false
service_workload_traffic = $false
}
} | Out-Null
$joinToken = Invoke-Api -Method Post -Path "/clusters/$clusterID/join-tokens" -Body @{
actor_user_id = $actorUserID
scope = @{ purpose = "c17z18-rendezvous-relay-replacement-smoke"; roles = @("core-mesh", "relay-node") }
expires_at = (Get-Date).ToUniversalTime().AddHours(2).ToString("o")
max_uses = 5
}
$nodeSpecs = @(
@{ key = "a"; name = "c17z18-node-a-entry"; roles = @("core-mesh"); port = $MeshBasePort; transport = "direct_tcp_tls"; connectivity = "direct"; nat = "none" },
@{ key = "r"; name = "c17z18-node-r-stale-relay"; roles = @("core-mesh", "relay-node"); port = ($MeshBasePort + 1); transport = "direct_tcp_tls"; connectivity = "direct"; nat = "none" },
@{ key = "b"; name = "c17z18-node-b-direct"; roles = @("core-mesh"); port = ($MeshBasePort + 2); transport = "direct_tcp_tls"; connectivity = "direct"; nat = "none" },
@{ key = "c"; name = "c17z18-node-c-outbound"; roles = @("core-mesh"); port = ($MeshBasePort + 3); transport = "outbound_reverse"; connectivity = "outbound_only"; nat = "symmetric" },
@{ key = "idle"; name = "c17z18-node-s-alt-relay"; roles = @("core-mesh", "relay-node"); port = ($MeshBasePort + 4); transport = "direct_tcp_tls"; connectivity = "direct"; nat = "none" }
)
$nodes = @{}
foreach ($spec in $nodeSpecs) {
$fingerprint = "c17z18-fp-$($spec.key)-$([guid]::NewGuid().ToString('N'))"
$publicKey = "c17z18-pub-$($spec.key)-$([guid]::NewGuid().ToString('N'))"
$joinRequest = Invoke-Api -Method Post -Path "/clusters/$clusterID/join-requests" -Body @{
join_token = $joinToken.join_token.token
node_name = $spec.name
node_fingerprint = $fingerprint
public_key = $publicKey
reported_capabilities = @{
can_accept_node_ingress = $true
can_route_mesh = $true
testing_node = $true
mesh_rendezvous_relay_control_contract = $true
mesh_rendezvous_lease_telemetry = $true
mesh_rendezvous_lease_refresh_contract = $true
mesh_rendezvous_relay_replacement_contract = $true
mesh_route_path_decision_contract = $true
mesh_route_generation_tracker = $true
}
reported_facts = @{
os = "linux"
runtime = "docker-test"
stage = "c17z18"
run_id = $runId
connectivity_mode = $spec.connectivity
nat_type = $spec.nat
}
requested_roles = $spec.roles
}
$approved = Invoke-Api -Method Post -Path "/clusters/$clusterID/join-requests/$($joinRequest.join_request.id)/approve" -Body @{
actor_user_id = $actorUserID
node_key = $fingerprint
ownership_type = "platform_managed"
owner_organization_id = $null
}
$nodeID = $approved.node_bootstrap.node_id
foreach ($role in $spec.roles) {
Invoke-Api -Method Post -Path "/clusters/$clusterID/nodes/$nodeID/roles" -Body @{
actor_user_id = $actorUserID
role = $role
status = "active"
policy = @{
stage = "c17z18"
run_id = $runId
synthetic_only = $true
production_payload_forwarding = $false
}
} | Out-Null
}
$nodes[$spec.key] = [pscustomobject]@{
id = $nodeID
name = $spec.name
fingerprint = $fingerprint
public_key = $publicKey
port = $spec.port
roles = $spec.roles
transport = $spec.transport
connectivity = $spec.connectivity
nat = $spec.nat
}
}
$nodeAID = $nodes["a"].id
$nodeRID = $nodes["r"].id
$nodeBID = $nodes["b"].id
$nodeCID = $nodes["c"].id
$nodeSID = $nodes["idle"].id
$routeExpiresAt = (Get-Date).ToUniversalTime().AddHours(2).ToString("o")
$staleRelayEndpoint = "http://127.0.0.1:$($MeshBasePort + 90)"
$peerEndpointsDirect = @{}
$peerEndpointsDirect[$nodeAID] = "http://127.0.0.1:$($nodes["a"].port)"
$peerEndpointsDirect[$nodeBID] = "http://127.0.0.1:$($nodes["b"].port)"
$peerEndpointsRelayControl = @{}
$peerEndpointsRelayControl[$nodeAID] = "http://127.0.0.1:$($nodes["a"].port)"
$peerEndpointsRelayControl[$nodeRID] = $staleRelayEndpoint
$peerEndpointsRelayControl[$nodeSID] = "http://127.0.0.1:$($nodes["idle"].port)"
$peerEndpointCandidatesRelay = @{}
$peerEndpointCandidatesRelay[$nodeRID] = @(
New-EndpointCandidate `
-EndpointID "relay-r-public" `
-NodeID $nodeRID `
-Address $staleRelayEndpoint `
-Transport "direct_tcp_tls" `
-Reachability "public" `
-ConnectivityMode "direct" `
-NATType "none" `
-Priority 10 `
-PolicyTags @("relay-control", "same-site")
)
$peerEndpointCandidatesRelay[$nodeSID] = @(
New-EndpointCandidate `
-EndpointID "relay-s-alt-fast" `
-NodeID $nodeSID `
-Address "http://127.0.0.1:$($nodes["idle"].port)" `
-Transport "direct_tcp_tls" `
-Reachability "public" `
-ConnectivityMode "direct" `
-NATType "none" `
-Priority 1 `
-PolicyTags @("relay-control", "same-site", "fast-path")
)
$peerEndpointCandidatesRelay[$nodeCID] = @(
New-EndpointCandidate `
-EndpointID "node-c-outbound-only" `
-NodeID $nodeCID `
-Address "http://127.0.0.1:$($nodes["c"].port)" `
-Transport "outbound_reverse" `
-Reachability "outbound_only" `
-ConnectivityMode "outbound_only" `
-NATType "symmetric" `
-Priority 5 `
-PolicyTags @("nat", "outbound-only")
)
Write-Host "Creating direct baseline and outbound-only relay-control route intents..."
$directIntent = Invoke-Api -Method Post -Path "/clusters/$clusterID/mesh/route-intents" -Body @{
actor_user_id = $actorUserID
source_selector = @{ node_id = $nodeAID }
destination_selector = @{ node_id = $nodeBID }
service_class = "control"
priority = 10
policy = @{
synthetic_enabled = $true
peer_endpoints = $peerEndpointsDirect
hops = @($nodeAID, $nodeBID)
allowed_channels = @("fabric_control", "route_control")
max_ttl = 4
max_hops = 4
expires_at = $routeExpiresAt
route_version = "$runId-direct"
policy_version = "$runId-policy"
peer_directory_version = "$runId-peers"
production_forwarding = $false
}
}
$rendezvousIntent = Invoke-Api -Method Post -Path "/clusters/$clusterID/mesh/route-intents" -Body @{
actor_user_id = $actorUserID
source_selector = @{ node_id = $nodeAID }
destination_selector = @{ node_id = $nodeCID }
service_class = "control"
priority = 20
policy = @{
synthetic_enabled = $true
peer_endpoints = $peerEndpointsRelayControl
peer_endpoint_candidates = $peerEndpointCandidatesRelay
hops = @($nodeAID, $nodeRID, $nodeSID, $nodeCID)
allowed_channels = @("fabric_control", "route_control")
max_ttl = 6
max_hops = 6
rendezvous_leases = @(
@{
lease_id = "$runId-explicit-stale-relay-lease"
peer_node_id = $nodeCID
relay_node_id = $nodeRID
relay_endpoint = $staleRelayEndpoint
transport = "relay_control"
connectivity_mode = "relay_required"
route_ids = @()
allowed_channels = @("fabric_control", "route_control")
priority = 4
control_plane_only = $true
issued_at = (Get-Date).ToUniversalTime().ToString("o")
expires_at = $routeExpiresAt
reason = "smoke_stale_relay_replacement"
metadata = @{
stage = "c17z18"
run_id = $runId
lease_refresh_contract = "node_scoped_synthetic_config_get"
relay_replacement_contract = "stale_relay_feedback_policy"
production_payload_forwarding = $false
}
}
)
expires_at = $routeExpiresAt
route_version = "$runId-rendezvous"
policy_version = "$runId-policy"
peer_directory_version = "$runId-peers"
production_forwarding = $false
}
}
$configs = @{}
foreach ($key in @("a", "r", "b", "c", "idle")) {
$configs[$key] = Invoke-Api -Method Get -Path "/clusters/$clusterID/nodes/$($nodes[$key].id)/mesh/synthetic-config"
}
$nodeAPeerCandidates = Get-OptionalProperty -Object (Get-OptionalProperty -Object $configs["a"].synthetic_mesh_config -PropertyName "peer_endpoint_candidates") -PropertyName $nodeCID
$nodeAInitialStaleLeases = @(Get-OptionalProperty -Object $configs["a"].synthetic_mesh_config -PropertyName "rendezvous_leases" | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeRID
})
$nodeAInitialAltLeases = @(Get-OptionalProperty -Object $configs["a"].synthetic_mesh_config -PropertyName "rendezvous_leases" | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeSID
})
$nodeCLeases = @(Get-OptionalProperty -Object $configs["c"].synthetic_mesh_config -PropertyName "rendezvous_leases" | Where-Object {
$_.peer_node_id -eq $nodeCID -and ($_.relay_node_id -eq $nodeRID -or $_.relay_node_id -eq $nodeSID)
})
Write-Host "Starting node-agent containers..."
foreach ($key in @("r", "idle", "b", "c", "a")) {
$node = $nodes[$key]
$containerName = "$nodePrefix$key"
$remoteStateDir = "/tmp/$runId-$key"
Invoke-RemoteShell -Command "rm -rf '$remoteStateDir' && mkdir -p '$remoteStateDir'"
$identity = @{
node_id = $node.id
cluster_id = $clusterID
node_name = $node.name
node_fingerprint = $node.fingerprint
public_key = $node.public_key
identity_status = "active"
created_at = (Get-Date).ToUniversalTime().ToString("o")
updated_at = (Get-Date).ToUniversalTime().ToString("o")
} | ConvertTo-Json -Depth 10
Send-RemoteFile -RemotePath "$remoteStateDir/identity.json" -Content $identity
Invoke-RemoteDocker -Arguments @(
"create",
"--name", $containerName,
"--network", "host",
"-e", "RAP_BACKEND_URL=$backendContainerBaseUrl",
"-e", "RAP_NODE_STATE_DIR=/tmp/state",
"-e", "RAP_HEARTBEAT_INTERVAL_SECONDS=5",
"-e", "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED=true",
"-e", "RAP_MESH_LISTEN_ADDR=0.0.0.0:$($node.port)",
"-e", "RAP_MESH_ADVERTISE_ENDPOINT=http://127.0.0.1:$($node.port)",
"-e", "RAP_MESH_ADVERTISE_TRANSPORT=$($node.transport)",
"-e", "RAP_MESH_CONNECTIVITY_MODE=$($node.connectivity)",
"-e", "RAP_MESH_NAT_TYPE=$($node.nat)",
"-e", "RAP_MESH_REGION=docker-test",
$NodeAgentImageTag,
"-backend-url", $backendContainerBaseUrl,
"-state-dir", "/tmp/state",
"-heartbeat-interval", "5s",
"-mesh-synthetic-runtime-enabled",
"-mesh-listen-addr", "0.0.0.0:$($node.port)",
"-mesh-advertise-endpoint", "http://127.0.0.1:$($node.port)",
"-mesh-advertise-transport", $node.transport,
"-mesh-connectivity-mode", $node.connectivity,
"-mesh-nat-type", $node.nat,
"-mesh-region", "docker-test"
) | Out-Null
Invoke-RemoteDocker -Arguments @("cp", "$remoteStateDir/.", "$containerName`:/tmp/state")
Invoke-RemoteDocker -Arguments @("start", $containerName) | Out-Null
}
Write-Host "Waiting for rendezvous relay-control observations..."
Start-Sleep -Seconds 40
Write-Host "Waiting for replacement route-health effective path..."
$meshSnapshot = Wait-C17Z18ReplacementRouteHealthSnapshot -TimeoutSeconds 40
$links = $meshSnapshot.links
$summary = Invoke-Api -Method Get -Path "/cluster-admin-summaries?actor_user_id=$actorUserID"
$nodeALeaseReport = Get-LatestRendezvousLeaseReport -NodeID $nodeAID
$nodeRLeaseReport = Get-LatestRendezvousLeaseReport -NodeID $nodeRID
$nodeSLeaseReport = Get-LatestRendezvousLeaseReport -NodeID $nodeSID
$nodeCLeaseReport = Get-LatestRendezvousLeaseReport -NodeID $nodeCID
$nodeAPathDecisionReport = Get-LatestRoutePathDecisionReport -NodeID $nodeAID
$nodeARouteGenerationReport = Get-LatestRouteGenerationReport -NodeID $nodeAID
$nodeARouteHealthConfigReport = Get-LatestRouteHealthConfigReport -NodeID $nodeAID
$nodeARouteHealthFeedbackRefreshReport = Get-LatestRouteHealthFeedbackRefreshReport -NodeID $nodeAID
$refreshedNodeAConfig = Invoke-Api -Method Get -Path "/clusters/$clusterID/nodes/$nodeAID/mesh/synthetic-config"
$nodeAReportedLeases = @(Get-OptionalProperty -Object $nodeALeaseReport -PropertyName "leases")
$nodeAReportedReplacementLeases = @($nodeAReportedLeases | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeSID -and $_.reason -eq "stale_relay_replacement"
})
$nodeAReportedStaleRelayLeases = @($nodeAReportedLeases | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeRID
})
$nodeAReplacementLeases = @(Get-OptionalProperty -Object $refreshedNodeAConfig.synthetic_mesh_config -PropertyName "rendezvous_leases" | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeSID -and $_.reason -eq "stale_relay_replacement"
})
$nodeAWithdrawnStaleLeases = @(Get-OptionalProperty -Object $refreshedNodeAConfig.synthetic_mesh_config -PropertyName "rendezvous_leases" | Where-Object {
$_.peer_node_id -eq $nodeCID -and $_.relay_node_id -eq $nodeRID
})
$nodeARelayPolicy = Get-OptionalProperty -Object $refreshedNodeAConfig.synthetic_mesh_config -PropertyName "rendezvous_relay_policy"
$nodeAInitialPathDecisionReport = Get-OptionalProperty -Object $configs["a"].synthetic_mesh_config -PropertyName "route_path_decisions"
$nodeAConfigPathDecisionReport = Get-OptionalProperty -Object $refreshedNodeAConfig.synthetic_mesh_config -PropertyName "route_path_decisions"
$nodeAReportedPathDecisions = @(Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "decisions")
$nodeAReportedReplacementPathDecisions = @($nodeAReportedPathDecisions | Where-Object {
$_.route_id -eq $rendezvousIntent.route_intent.id -and
$_.selected_relay_id -eq $nodeSID -and
$_.decision_source -eq "stale_relay_replacement" -and
(@($_.effective_hops) -contains $nodeSID) -and
-not (@($_.effective_hops) -contains $nodeRID)
})
$nodeLogs = @{}
foreach ($key in @("a", "r", "b", "c", "idle")) {
$nodeLogs[$key] = Invoke-RemoteDockerText -Arguments @("logs", "--tail", "120", "$nodePrefix$key")
}
$backendLogs = Invoke-RemoteDockerText -Arguments @("logs", "--tail", "80", $backendName)
$meshLinks = @($meshSnapshot.mesh_links)
$routeHealthLinks = @($meshSnapshot.route_health_links)
$directHealth = @($meshSnapshot.direct_health)
$rendezvousHealth = @($meshSnapshot.rendezvous_health)
$replacementRouteHealth = @($meshSnapshot.replacement_route_health)
$managerLinks = @($meshLinks | Where-Object { $_.metadata.observation_type -eq "peer_connection_manager" })
$relayControlLinks = @($managerLinks | Where-Object {
$_.source_node_id -eq $nodeAID -and
$_.target_node_id -eq $nodeCID -and
$_.link_status -eq "reachable" -and
$_.metadata.transport_mode -eq "relay_control" -and
$_.metadata.rendezvous_resolved -eq $true -and
$_.metadata.relay_candidate -eq $true -and
$_.metadata.connection_state -eq "relay_ready"
})
$replacementRelayControlLinks = @($relayControlLinks | Where-Object {
$_.metadata.relay_node_id -eq $nodeSID
})
$replacementRelayReadyFromLeaseReport = (
$nodeAReportedReplacementLeases.Count -gt 0 -and
(Get-OptionalProperty -Object $nodeAReportedReplacementLeases[0] -PropertyName "relay_ready") -eq $true -and
(Get-OptionalProperty -Object $nodeAReportedReplacementLeases[0] -PropertyName "connection_state") -eq "relay_ready"
)
$nodeALog = $nodeLogs["a"] -join "`n"
$directRouteDelivered = $nodeALog -match ('"event":"fabric_route_delivery_succeeded","route_id":"' + [regex]::Escape($directIntent.route_intent.id) + '"')
$leaseReportBoundaryFlagsDisabled = $true
foreach ($report in @($nodeALeaseReport, $nodeRLeaseReport, $nodeSLeaseReport, $nodeCLeaseReport)) {
if ($null -eq $report -or
(Get-OptionalProperty -Object $report -PropertyName "control_plane_only") -ne $true -or
(Get-OptionalProperty -Object $report -PropertyName "relay_payload_forwarding") -ne $false -or
(Get-OptionalProperty -Object $report -PropertyName "production_payload_forwarding") -ne $false -or
(Get-OptionalProperty -Object $report -PropertyName "service_workload_traffic") -ne $false) {
$leaseReportBoundaryFlagsDisabled = $false
}
}
$pathDecisionBoundaryFlagsDisabled = (
$null -ne $nodeAPathDecisionReport -and
(Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "control_plane_only") -eq $true -and
(Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "production_payload_forwarding") -eq $false -and
(Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "service_workload_traffic") -eq $false -and
(Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "route_path_forwarding_runtime") -eq $false
)
$routeGenerationBoundaryFlagsDisabled = (
$null -ne $nodeARouteGenerationReport -and
(Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "control_plane_only") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "production_payload_forwarding") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "service_workload_traffic") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "route_path_forwarding_runtime") -eq $false
)
$nodeAWithdrawnDecisionCount = Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "withdrawn_decision_count"
$nodeATotalWithdrawnDecisionCount = Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "total_withdrawn_decision_count"
$nodeAWithdrawnDecisions = @(Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "withdrawn_decisions")
$routeHealthConfigBoundaryFlagsDisabled = (
$null -ne $nodeARouteHealthConfigReport -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "control_plane_only") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "route_health_only") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "synthetic_route_health_route_path_runtime") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "production_route_path_forwarding_runtime") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "production_payload_forwarding") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "service_workload_traffic") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "test_service_route_config_changed") -eq $false
)
$routeHealthFeedbackRefreshBoundaryFlagsDisabled = (
$null -ne $nodeARouteHealthFeedbackRefreshReport -and
(Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "control_plane_only") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "route_health_only") -eq $true -and
(Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "production_payload_forwarding") -eq $false -and
(Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "service_workload_traffic") -eq $false
)
$passMatrix = [ordered]@{
backend_ready = $true
platform_owner_login = [bool]$actorUserID
cluster_created = [bool]$clusterID
fabric_testing_flags_enabled = $true
node_a_scoped_config_enabled = $configs["a"].synthetic_mesh_config.enabled -eq $true
node_a_has_direct_and_rendezvous_routes = @($configs["a"].synthetic_mesh_config.routes).Count -eq 2
node_a_has_outbound_peer_candidate = @($nodeAPeerCandidates).Count -gt 0
node_a_has_initial_stale_rendezvous_lease = $nodeAInitialStaleLeases.Count -gt 0
node_a_initial_lease_is_control_plane_only = ($nodeAInitialStaleLeases.Count -gt 0 -and $nodeAInitialStaleLeases[0].control_plane_only -eq $true)
node_a_initial_lease_uses_relay_control = ($nodeAInitialStaleLeases.Count -gt 0 -and $nodeAInitialStaleLeases[0].transport -eq "relay_control")
node_a_initial_auto_alt_relay_candidate = $nodeAInitialAltLeases.Count -gt 0
node_a_initial_path_decision_report = (Get-OptionalProperty -Object $nodeAInitialPathDecisionReport -PropertyName "schema_version") -eq "c17z18.route_path_decisions.v1"
node_a_report_replacement_lease_uses_alt_relay = ($nodeAReportedReplacementLeases.Count -gt 0 -and $nodeAReportedReplacementLeases[0].relay_node_id -eq $nodeSID)
node_a_report_stale_relay_lease_withdrawn = $nodeAReportedStaleRelayLeases.Count -eq 0
node_a_report_replacement_reason = ($nodeAReportedReplacementLeases.Count -gt 0 -and $nodeAReportedReplacementLeases[0].reason -eq "stale_relay_replacement")
node_a_reports_c17z18_path_decisions = (Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "schema_version") -eq "c17z18.mesh_route_path_decision_report.v1"
node_a_path_decision_replacement_count = (Get-OptionalProperty -Object $nodeAPathDecisionReport -PropertyName "replacement_decision_count") -gt 0
node_a_path_decision_uses_alt_relay = $nodeAReportedReplacementPathDecisions.Count -gt 0
node_a_path_decision_boundary_flags_disabled = $pathDecisionBoundaryFlagsDisabled
node_a_reports_c17z18_route_generation = (Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "schema_version") -eq "c17z18.mesh_route_generation_report.v1"
node_a_route_generation_active = (Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "active_decision_count") -gt 0
node_a_route_generation_applied = (Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "applied_decision_count") -gt 0
node_a_route_generation_withdrawn = ($nodeAWithdrawnDecisionCount -gt 0 -or $nodeATotalWithdrawnDecisionCount -gt 0 -or $nodeAWithdrawnDecisions.Count -gt 0)
node_a_route_generation_changed = (Get-OptionalProperty -Object $nodeARouteGenerationReport -PropertyName "generation_changed") -eq $true
node_a_route_generation_boundary_flags_disabled = $routeGenerationBoundaryFlagsDisabled
node_a_reports_c17z20_route_health_config = (Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "schema_version") -eq "c17z20.mesh_route_health_config_report.v1"
node_a_reports_c17z20_route_health_feedback_refresh = (Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "schema_version") -eq "c17z20.mesh_route_health_feedback_refresh_report.v1"
node_a_route_health_feedback_refresh_supported = (Get-OptionalProperty -Object $nodeARouteHealthFeedbackRefreshReport -PropertyName "feedback_refresh_supported") -eq $true
node_a_route_health_feedback_refresh_boundary_flags_disabled = $routeHealthFeedbackRefreshBoundaryFlagsDisabled
node_a_route_health_config_applied = (Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "route_path_decision_applied_count") -gt 0
node_a_route_health_config_replacement = (Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "replacement_route_health_route_count") -gt 0
node_a_route_health_config_boundary_flags_disabled = $routeHealthConfigBoundaryFlagsDisabled
node_a_route_health_uses_effective_alt_relay = $replacementRouteHealth.Count -gt 0
node_a_route_health_has_no_effective_path_drift = ($replacementRouteHealth.Count -gt 0 -and (Get-OptionalProperty -Object $replacementRouteHealth[0].metadata -PropertyName "route_path_drift_detected") -eq $false)
node_a_route_health_selected_alt_next_hop = $nodeALog -match ('"event":"fabric_route_selected","route_id":"' + [regex]::Escape($rendezvousIntent.route_intent.id) + '".*"next_node_id":"' + [regex]::Escape($nodeSID) + '"')
outbound_node_has_relay_lease = $nodeCLeases.Count -gt 0
direct_baseline_health_reported = $directRouteDelivered
node_a_loaded_c17z20_control_plane_config = ($nodeAReplacementLeases.Count -gt 0 -and (Get-OptionalProperty -Object $nodeARouteHealthConfigReport -PropertyName "schema_version") -eq "c17z20.mesh_route_health_config_report.v1")
node_a_resolved_waiting_rendezvous = ($replacementRelayControlLinks.Count -gt 0 -or $replacementRelayReadyFromLeaseReport)
relay_control_manager_link_reachable = ($replacementRelayControlLinks.Count -gt 0 -or $replacementRelayReadyFromLeaseReport)
relay_ready_recorded = ($replacementRelayControlLinks.Count -gt 0 -or $replacementRelayReadyFromLeaseReport)
node_a_reports_c17z18_lease_telemetry = (Get-OptionalProperty -Object $nodeALeaseReport -PropertyName "schema_version") -eq "c17z18.mesh_rendezvous_lease_report.v1"
node_a_lease_report_entry_observer = (Get-OptionalProperty -Object $nodeALeaseReport -PropertyName "entry_observer_count") -gt 0
node_a_lease_report_relay_ready = (Get-OptionalProperty -Object $nodeALeaseReport -PropertyName "relay_control_ready_count") -gt 0
node_a_lease_report_refresh_contract = (Get-OptionalProperty -Object $nodeALeaseReport -PropertyName "refresh_contract") -eq "node_scoped_synthetic_config_get"
stale_relay_refresh_succeeded_on_cluster = (
((Get-OptionalProperty -Object $nodeRLeaseReport -PropertyName "last_refresh_reason") -eq "stale_relay" -and (Get-OptionalProperty -Object $nodeRLeaseReport -PropertyName "refresh_success_count") -gt 0) -or
((Get-OptionalProperty -Object $nodeSLeaseReport -PropertyName "last_refresh_reason") -eq "stale_relay" -and (Get-OptionalProperty -Object $nodeSLeaseReport -PropertyName "refresh_success_count") -gt 0)
)
alt_relay_node_reports_admitted_relay_lease = (Get-OptionalProperty -Object $nodeSLeaseReport -PropertyName "admitted_as_relay_count") -gt 0
outbound_node_reports_admitted_peer_lease = (Get-OptionalProperty -Object $nodeCLeaseReport -PropertyName "admitted_as_peer_count") -gt 0
lease_telemetry_boundary_flags_disabled = $leaseReportBoundaryFlagsDisabled
production_forwarding_disabled = (
$configs["a"].synthetic_mesh_config.production_forwarding -eq $false -and
$configs["r"].synthetic_mesh_config.production_forwarding -eq $false -and
$configs["b"].synthetic_mesh_config.production_forwarding -eq $false -and
$configs["c"].synthetic_mesh_config.production_forwarding -eq $false -and
$configs["idle"].synthetic_mesh_config.production_forwarding -eq $false
)
}
$result = [pscustomobject]@{
stage = "C17Z18 rendezvous relay replacement docker-test smoke"
run_id = $runId
backend_base_url = $backendPublicBaseUrl
cluster_id = $clusterID
node_ids = @{
a = $nodes["a"].id
r = $nodes["r"].id
s = $nodes["idle"].id
b = $nodes["b"].id
c = $nodes["c"].id
idle = $nodes["idle"].id
}
route_intents = @{
direct = $directIntent.route_intent.id
rendezvous = $rendezvousIntent.route_intent.id
}
scoped_config_route_counts = @{
a = @($configs["a"].synthetic_mesh_config.routes).Count
r = @($configs["r"].synthetic_mesh_config.routes).Count
b = @($configs["b"].synthetic_mesh_config.routes).Count
c = @($configs["c"].synthetic_mesh_config.routes).Count
idle = @($configs["idle"].synthetic_mesh_config.routes).Count
}
rendezvous_lease_counts = @{
a = Get-OptionalArrayCount -Object $configs["a"].synthetic_mesh_config -PropertyName "rendezvous_leases"
r = Get-OptionalArrayCount -Object $configs["r"].synthetic_mesh_config -PropertyName "rendezvous_leases"
b = Get-OptionalArrayCount -Object $configs["b"].synthetic_mesh_config -PropertyName "rendezvous_leases"
c = Get-OptionalArrayCount -Object $configs["c"].synthetic_mesh_config -PropertyName "rendezvous_leases"
idle = Get-OptionalArrayCount -Object $configs["idle"].synthetic_mesh_config -PropertyName "rendezvous_leases"
}
node_a_initial_stale_rendezvous_lease = $nodeAInitialStaleLeases | Select-Object -First 1
node_a_reported_replacement_rendezvous_lease = $nodeAReportedReplacementLeases | Select-Object -First 1
node_a_current_replacement_rendezvous_lease = $nodeAReplacementLeases | Select-Object -First 1
node_a_current_rendezvous_relay_policy = $nodeARelayPolicy
node_a_initial_route_path_decisions = $nodeAInitialPathDecisionReport
node_a_current_route_path_decisions = $nodeAConfigPathDecisionReport
node_a_reported_route_path_decision = $nodeAReportedReplacementPathDecisions | Select-Object -First 1
route_path_decision_reports = @{
a = $nodeAPathDecisionReport
}
route_generation_reports = @{
a = $nodeARouteGenerationReport
}
route_health_config_reports = @{
a = $nodeARouteHealthConfigReport
}
route_health_feedback_refresh_reports = @{
a = $nodeARouteHealthFeedbackRefreshReport
}
rendezvous_lease_reports = @{
a = $nodeALeaseReport
r = $nodeRLeaseReport
s = $nodeSLeaseReport
c = $nodeCLeaseReport
}
mesh_link_count = $meshLinks.Count
route_health_count = $routeHealthLinks.Count
peer_connection_manager_link_count = $managerLinks.Count
relay_control_link_count = $relayControlLinks.Count
direct_route_delivery_succeeded = $directRouteDelivered
pass_matrix = $passMatrix
direct_route_health = $directHealth | Select-Object -First 3
replacement_route_health = $replacementRouteHealth | Select-Object -First 3
relay_control_links = $relayControlLinks | Select-Object -First 5
replacement_relay_control_links = $replacementRelayControlLinks | Select-Object -First 5
cluster_summaries = $summary.cluster_summaries
backend_log_tail = $backendLogs
node_log_tail = $nodeLogs
containers_left_running = [bool]$KeepRunning
}
$failed = @($passMatrix.GetEnumerator() | Where-Object { -not $_.Value })
$result | ConvertTo-Json -Depth 60
if ($failed.Count -gt 0) {
throw "C17Z18 rendezvous relay replacement smoke failed: $($failed.Name -join ', ')"
}
if (-not $KeepRunning) {
Write-Host "Cleaning up C17Z18 containers..."
Remove-C17Z12Containers
Invoke-RemoteShell -Command "rm -rf '$remoteBuildDir'"
}