param( [string]$ApiBaseUrl = "http://192.168.200.61:18121/api/v1", [string]$ClusterID = "cfc0743d-d960-49fb-9de8-96e063d5e4aa", [string]$ActorUserID = "f67d943f-5397-4b3a-a229-695fe67ad700", [string]$EntryNodeName = "test-1", [string]$PrimaryExitNodeName = "test-2", [string]$AlternateExitNodeName = "test-3", [string]$EntryBaseUrl = "http://192.168.200.61:19131", [string]$DockerSSH = "test-docker", [int]$PreRebuildBatchCount = 18, [int]$PostRebuildBatchCount = 36, [int]$PacketsPerBatch = 8, [int]$BatchDelayMilliseconds = 25, [string]$RequiredNodeVersion = "0.2.185", [string]$ResultPath = "artifacts\c18z10-live-service-channel-exit-pool-smoke-result.json" ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" Add-Type -AssemblyName System.Net.Http $repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..\..")).ProviderPath $runId = "c18z10-" + (Get-Date -Format "yyyyMMdd-HHmmss") $resourceId = "vpn-$runId" function Invoke-Api { param( [string]$Method, [string]$Path, [object]$Body = $null ) $uri = "$ApiBaseUrl$Path" try { if ($null -eq $Body) { return Invoke-RestMethod -Method $Method -Uri $uri -TimeoutSec 30 } return Invoke-RestMethod -Method $Method -Uri $uri -ContentType "application/json" -Body ($Body | ConvertTo-Json -Depth 80) -TimeoutSec 30 } catch { $statusCode = $null if ($_.Exception.Response) { $statusCode = [int]$_.Exception.Response.StatusCode } $details = $_.ErrorDetails.Message if (-not $details) { $details = $_.Exception.Message } throw "$Method $Path failed with HTTP $statusCode`: $details" } } function Get-NodeByName { param([string]$Name) $nodes = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes?actor_user_id=$ActorUserID").nodes $node = @($nodes | Where-Object { $_.name -eq $Name }) | Select-Object -First 1 if ($null -eq $node) { throw "Node '$Name' was not found in cluster $ClusterID" } return $node } function Get-MeshPort { param([string]$Name) switch ($Name) { "test-1" { return 19131 } "test-2" { return 19132 } "test-3" { return 19133 } default { return 19131 } } } function Enable-TestMeshListener { param([object]$Node) $port = Get-MeshPort -Name $Node.name Invoke-Api -Method PUT -Path "/clusters/$ClusterID/nodes/$($Node.id)/workloads/mesh-listener/desired" -Body @{ actor_user_id = $ActorUserID desired_state = "enabled" runtime_mode = "container" version = "c18z10-live-fsc-exit-pool" config = @{ listen_addr = "0.0.0.0:$port" listen_port_mode = "manual" advertise_endpoint = "http://192.168.200.61:$port" advertise_transport = "direct_http" connectivity_mode = "private_lan" nat_type = "none" region = "docker-test" production_forwarding = $true } environment = @{} } | Out-Null } function Clear-OldSmokeRouteIntents { param( [string]$SourceNodeID, [string]$DestinationNodeID ) $items = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/mesh/route-intents?actor_user_id=$ActorUserID").route_intents foreach ($item in @($items)) { if ([string]$item.lifecycle_status -ne "active") { continue } if ([string]$item.service_class -ne "vpn_packets") { continue } if ([string]$item.source_selector.node_id -ne $SourceNodeID -or [string]$item.destination_selector.node_id -ne $DestinationNodeID) { continue } $smoke = "" if ($null -ne $item.policy -and $null -ne $item.policy.metadata) { $prop = $item.policy.metadata.PSObject.Properties["smoke"] if ($null -ne $prop) { $smoke = [string]$prop.Value } } if ($smoke -ne "c18z1_live_service_channel_ingress" -and $smoke -ne "c18z2_live_service_channel_soak" -and $smoke -ne "c18z3_live_service_channel_entry_ws_fallback" -and $smoke -ne "c18z4_live_service_channel_session_pressure" -and $smoke -ne "c18z5_live_service_channel_exit_restart" -and $smoke -ne "c18z6_live_service_channel_active_rebuild" -and $smoke -ne "c18z7_live_service_channel_concurrent_isolation" -and $smoke -ne "c18z8_live_service_channel_backpressure_isolation" -and $smoke -ne "c18z9_live_service_channel_route_pool" -and $smoke -ne "c18z10_live_service_channel_exit_pool") { continue } Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$($item.id)/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } } function New-RouteIntent { param( [string]$SourceNodeID, [string]$DestinationNodeID, [int]$Priority, [string]$Label, [string[]]$Hops, [string]$ExitPoolID = "c18z10-exit-pool" ) if ($null -eq $Hops -or $Hops.Count -eq 0) { $Hops = @($SourceNodeID, $DestinationNodeID) } $expiresAt = (Get-Date).ToUniversalTime().AddMinutes(10).ToString("o") return Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents" -Body @{ actor_user_id = $ActorUserID source_selector = @{ node_id = $SourceNodeID } destination_selector = @{ node_id = $DestinationNodeID } service_class = "vpn_packets" priority = $Priority policy = @{ synthetic_enabled = $true route_version = "$runId-$Label" policy_version = "$runId-$Label" peer_directory_version = "$runId-$Label" hops = @($Hops) allowed_channels = @("vpn_packet", "fabric_control") max_ttl = 8 max_hops = 8 expires_at = $expiresAt metadata = @{ smoke = "c18z10_live_service_channel_exit_pool" run_id = $runId label = $Label hop_count = $Hops.Count exit_pool_id = $ExitPoolID } } } } function Get-SyntheticConfig { param([string]$NodeID) return Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/mesh/synthetic-config?actor_user_id=$ActorUserID" } function Get-LatestHeartbeat { param([string]$NodeID) return (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/heartbeats?actor_user_id=$ActorUserID&limit=1").heartbeats[0] } function Get-LatestRuntimeReport { param([string]$NodeID) $hb = Get-LatestHeartbeat -NodeID $NodeID return @{ heartbeat = $hb report = $hb.metadata.fabric_service_channel_runtime_report } } function Wait-ForRuntimeReady { param( [string]$NodeID, [int]$MinRoutes, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $report = $latest.report if ($null -ne $report -and $report.enabled -eq $true -and $report.production_payload_forwarding -eq $true -and [int]$report.route_candidate_total -ge $MinRoutes) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for production service-channel runtime ready on node $NodeID" } function Wait-ForRuntimeConfigVersion { param( [string]$NodeID, [string]$ConfigVersion, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID if ($null -ne $latest.report) { $loadedVersion = [string]$latest.report.config_version if ($loadedVersion -ge $ConfigVersion) { return $latest } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for node $NodeID to load synthetic config $ConfigVersion" } function Wait-ForRouteIntentVisible { param( [string]$NodeID, [string[]]$RouteIDs, [int]$TimeoutSeconds = 60 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $config = Get-SyntheticConfig -NodeID $NodeID $routes = @($config.synthetic_mesh_config.routes) $present = @($routes | Where-Object { $RouteIDs -contains $_.route_id }) if ($present.Count -ge $RouteIDs.Count) { return $config } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for routes '$($RouteIDs -join ",")' in synthetic config for node $NodeID" } function Wait-ForRouteIntentNotVisible { param( [string]$NodeID, [string]$RouteID, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $config = Get-SyntheticConfig -NodeID $NodeID $routes = @($config.synthetic_mesh_config.routes) $present = @($routes | Where-Object { $_.route_id -eq $RouteID }) if ($present.Count -eq 0) { return $config } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for route '$RouteID' to disappear from synthetic config for node $NodeID" } function New-ServiceChannelLease { param( [string]$EntryNodeID, [string[]]$ExitNodeIDs, [string]$PreferredExitNodeID, [string]$VPNResourceID = $resourceId ) if ($null -eq $ExitNodeIDs -or $ExitNodeIDs.Count -eq 0) { throw "At least one exit node id is required" } if ([string]::IsNullOrWhiteSpace($PreferredExitNodeID)) { $PreferredExitNodeID = [string]$ExitNodeIDs[0] } return (Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/leases" -Body @{ actor_user_id = $ActorUserID organization_id = "org-c18z10-smoke" user_id = $ActorUserID resource_id = $VPNResourceID service_class = "vpn_packets" entry_node_ids = @($EntryNodeID) exit_node_ids = @($ExitNodeIDs) preferred_entry_node_id = $EntryNodeID preferred_exit_node_id = $PreferredExitNodeID allowed_channels = @("vpn_packet", "bulk", "control") ttl_seconds = 300 metadata = @{ smoke = "c18z10_live_service_channel_exit_pool" run_id = $runId } }).fabric_service_channel_lease } function ConvertTo-Base64UrlJson { param([object]$Value) $json = $Value | ConvertTo-Json -Depth 80 -Compress $bytes = [System.Text.Encoding]::UTF8.GetBytes($json) return [Convert]::ToBase64String($bytes).TrimEnd("=").Replace("+", "-").Replace("/", "_") } function Get-ObjectPropertyValue { param( [object]$Object, [string]$Name ) if ($null -eq $Object) { return $null } $prop = $Object.PSObject.Properties[$Name] if ($null -eq $prop) { return $null } return $prop.Value } function New-TestIPv4UDPPacket { param([int]$SourcePort) $payload = [System.Text.Encoding]::ASCII.GetBytes("c18z1-$SourcePort") $totalLength = 20 + 8 + $payload.Length $packet = New-Object byte[] $totalLength $packet[0] = 0x45 $packet[1] = 0 $packet[2] = [byte](($totalLength -shr 8) -band 0xff) $packet[3] = [byte]($totalLength -band 0xff) $packet[8] = 64 $packet[9] = 17 $packet[12] = 10; $packet[13] = 18; $packet[14] = 1; $packet[15] = 10 $packet[16] = 10; $packet[17] = 18; $packet[18] = 2; $packet[19] = 20 $udpOffset = 20 $destPort = 3389 $udpLength = 8 + $payload.Length $packet[$udpOffset] = [byte](($SourcePort -shr 8) -band 0xff) $packet[$udpOffset + 1] = [byte]($SourcePort -band 0xff) $packet[$udpOffset + 2] = [byte](($destPort -shr 8) -band 0xff) $packet[$udpOffset + 3] = [byte]($destPort -band 0xff) $packet[$udpOffset + 4] = [byte](($udpLength -shr 8) -band 0xff) $packet[$udpOffset + 5] = [byte]($udpLength -band 0xff) [Array]::Copy($payload, 0, $packet, 28, $payload.Length) return $packet } function New-PacketBatchBody { param([byte[][]]$Packets) $stream = [System.IO.MemoryStream]::new() foreach ($packet in $Packets) { $length = $packet.Length $stream.WriteByte([byte](($length -shr 24) -band 0xff)) $stream.WriteByte([byte](($length -shr 16) -band 0xff)) $stream.WriteByte([byte](($length -shr 8) -band 0xff)) $stream.WriteByte([byte]($length -band 0xff)) $stream.Write($packet, 0, $packet.Length) } return $stream.ToArray() } function Invoke-ServiceChannelPost { param( [object]$Lease, [int]$PortStart, [string]$VPNResourceID = $resourceId ) $packets = @() for ($i = 0; $i -lt 8; $i++) { $packets += ,(New-TestIPv4UDPPacket -SourcePort ($PortStart + $i)) } $path = $Lease.entry_http.path_template. Replace("{cluster_id}", $ClusterID). Replace("{channel_id}", $Lease.channel_id). Replace("{resource_id}", $VPNResourceID) $url = "$EntryBaseUrl$path`?batch=true" $headers = @{ "X-RAP-Service-Channel-Token" = $Lease.token.token "X-RAP-Fabric-Channel-ID" = $Lease.channel_id "X-RAP-Service-Class" = "vpn_packets" "X-RAP-Channel-Class" = "vpn_packet" "X-RAP-Service-Channel-Authority-Payload" = ConvertTo-Base64UrlJson -Value $Lease.authority_payload "X-RAP-Service-Channel-Authority-Signature" = ConvertTo-Base64UrlJson -Value $Lease.authority_signature } $body = New-PacketBatchBody -Packets $packets $client = [System.Net.Http.HttpClient]::new() try { $client.Timeout = [TimeSpan]::FromSeconds(30) $request = [System.Net.Http.HttpRequestMessage]::new([System.Net.Http.HttpMethod]::Post, $url) foreach ($header in $headers.GetEnumerator()) { [void]$request.Headers.TryAddWithoutValidation($header.Key, [string]$header.Value) } $content = [System.Net.Http.ByteArrayContent]::new($body) $content.Headers.ContentType = [System.Net.Http.Headers.MediaTypeHeaderValue]::Parse("application/vnd.rap.vpn-packet-batch.v1") $request.Content = $content $response = $client.SendAsync($request).GetAwaiter().GetResult() $responseBody = $response.Content.ReadAsStringAsync().GetAwaiter().GetResult() if (-not $response.IsSuccessStatusCode) { throw "Service-channel POST $url failed with HTTP $([int]$response.StatusCode): $responseBody" } return [pscustomobject]@{ StatusCode = [int]$response.StatusCode Body = $responseBody } } finally { $client.Dispose() } } function Get-IngressSendPackets { param([string]$NodeID) $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" if ($null -eq $sendPackets) { return 0 } return [int]$sendPackets } function Get-IngressRouteFailures { param([string]$NodeID) $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $failures = Get-ObjectPropertyValue -Object $ingress -Name "send_route_failures" if ($null -eq $failures) { return 0 } return [int]$failures } function Get-IngressFlowDropped { param([string]$NodeID) $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress if ($null -eq $ingress) { return 0 } $flowScheduler = Get-ObjectPropertyValue -Object $ingress -Name "flow_scheduler" if ($null -eq $flowScheduler) { return 0 } $dropped = Get-ObjectPropertyValue -Object $flowScheduler -Name "dropped" if ($null -eq $dropped) { return 0 } return [int]$dropped } function Get-ExitQueueDepth { param( [string]$NodeID, [string]$VPNConnectionID ) $latest = Get-LatestRuntimeReport -NodeID $NodeID $queueKey = "$VPNConnectionID`:client_to_gateway" $depths = $latest.report.inbox.queue_depths if ($null -eq $depths) { return 0 } $prop = $depths.PSObject.Properties[$queueKey] if ($null -eq $prop) { return 0 } return [int]$prop.Value } function Wait-ForExitQueueDepth { param( [string]$NodeID, [string]$VPNConnectionID, [int]$MinDepth, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $depth = Get-ExitQueueDepth -NodeID $NodeID -VPNConnectionID $VPNConnectionID if ($depth -ge $MinDepth) { return $depth } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for exit queue depth >= $MinDepth on node $NodeID" } function Invoke-ServiceChannelPostSafe { param( [object]$Lease, [int]$PortStart, [string]$VPNResourceID = $resourceId ) try { $response = Invoke-ServiceChannelPost -Lease $Lease -PortStart $PortStart -VPNResourceID $VPNResourceID return [pscustomobject]@{ ok = $true status_code = [int]$response.StatusCode error = "" } } catch { return [pscustomobject]@{ ok = $false status_code = 0 error = $_.Exception.Message } } } function ConvertTo-WebSocketURL { param([string]$URL) if ($URL.StartsWith("https://")) { return "wss://" + $URL.Substring("https://".Length) } if ($URL.StartsWith("http://")) { return "ws://" + $URL.Substring("http://".Length) } return $URL } function Invoke-ServiceChannelWebSocketSend { param( [object]$Lease, [int]$PortStart, [string]$VPNResourceID = $resourceId ) $packets = @() for ($i = 0; $i -lt 8; $i++) { $packets += ,(New-TestIPv4UDPPacket -SourcePort ($PortStart + $i)) } $path = $Lease.entry_http.websocket_path_template. Replace("{cluster_id}", $ClusterID). Replace("{channel_id}", $Lease.channel_id). Replace("{resource_id}", $VPNResourceID) $url = ConvertTo-WebSocketURL -URL "$EntryBaseUrl$path" $socket = [System.Net.WebSockets.ClientWebSocket]::new() $cts = [System.Threading.CancellationTokenSource]::new([TimeSpan]::FromSeconds(20)) try { $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Token", [string]$Lease.token.token) $null = $socket.Options.SetRequestHeader("X-RAP-Fabric-Channel-ID", [string]$Lease.channel_id) $null = $socket.Options.SetRequestHeader("X-RAP-Service-Class", "vpn_packets") $null = $socket.Options.SetRequestHeader("X-RAP-Channel-Class", "vpn_packet") $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Authority-Payload", (ConvertTo-Base64UrlJson -Value $Lease.authority_payload)) $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Authority-Signature", (ConvertTo-Base64UrlJson -Value $Lease.authority_signature)) $null = $socket.ConnectAsync([Uri]$url, $cts.Token).GetAwaiter().GetResult() $body = New-PacketBatchBody -Packets $packets $segment = [ArraySegment[byte]]::new($body) $null = $socket.SendAsync($segment, [System.Net.WebSockets.WebSocketMessageType]::Binary, $true, $cts.Token).GetAwaiter().GetResult() Start-Sleep -Milliseconds 300 if ($socket.State -eq [System.Net.WebSockets.WebSocketState]::Open) { $null = $socket.CloseOutputAsync([System.Net.WebSockets.WebSocketCloseStatus]::NormalClosure, "c18z10 sent", $cts.Token).GetAwaiter().GetResult() } return [pscustomobject]@{ ok = $true url = $url sent_packets = $packets.Count state = [string]$socket.State error = "" } } catch { return [pscustomobject]@{ ok = $false url = $url sent_packets = 0 state = [string]$socket.State error = $_.Exception.Message } } finally { $socket.Dispose() $cts.Dispose() } } function Invoke-ServiceChannelWebSocketPressure { param( [object]$Lease, [int]$PortStart, [int]$PreSwitchBatches, [int]$PostSwitchBatches, [int]$PacketsInBatch, [int]$DelayMilliseconds, [scriptblock]$AfterPreSwitchAction, [string]$VPNResourceID = $resourceId ) $path = $Lease.entry_http.websocket_path_template. Replace("{cluster_id}", $ClusterID). Replace("{channel_id}", $Lease.channel_id). Replace("{resource_id}", $VPNResourceID) $url = ConvertTo-WebSocketURL -URL "$EntryBaseUrl$path" $socket = [System.Net.WebSockets.ClientWebSocket]::new() $cts = [System.Threading.CancellationTokenSource]::new([TimeSpan]::FromSeconds(120)) $sentBatches = 0 $sentPackets = 0 $switchActionRan = $false try { $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Token", [string]$Lease.token.token) $null = $socket.Options.SetRequestHeader("X-RAP-Fabric-Channel-ID", [string]$Lease.channel_id) $null = $socket.Options.SetRequestHeader("X-RAP-Service-Class", "vpn_packets") $null = $socket.Options.SetRequestHeader("X-RAP-Channel-Class", "vpn_packet") $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Authority-Payload", (ConvertTo-Base64UrlJson -Value $Lease.authority_payload)) $null = $socket.Options.SetRequestHeader("X-RAP-Service-Channel-Authority-Signature", (ConvertTo-Base64UrlJson -Value $Lease.authority_signature)) $null = $socket.ConnectAsync([Uri]$url, $cts.Token).GetAwaiter().GetResult() $totalBatches = $PreSwitchBatches + $PostSwitchBatches for ($batch = 0; $batch -lt $totalBatches; $batch++) { if ($batch -eq $PreSwitchBatches -and $null -ne $AfterPreSwitchAction) { & $AfterPreSwitchAction $switchActionRan = $true } $packets = @() for ($i = 0; $i -lt $PacketsInBatch; $i++) { $packets += ,(New-TestIPv4UDPPacket -SourcePort ($PortStart + ($batch * 100) + $i)) } $body = New-PacketBatchBody -Packets $packets $segment = [ArraySegment[byte]]::new($body) $null = $socket.SendAsync($segment, [System.Net.WebSockets.WebSocketMessageType]::Binary, $true, $cts.Token).GetAwaiter().GetResult() $sentBatches++ $sentPackets += $packets.Count if ($DelayMilliseconds -gt 0) { Start-Sleep -Milliseconds $DelayMilliseconds } } Start-Sleep -Milliseconds 500 if ($socket.State -eq [System.Net.WebSockets.WebSocketState]::Open) { $null = $socket.CloseOutputAsync([System.Net.WebSockets.WebSocketCloseStatus]::NormalClosure, "c18z10 sent", $cts.Token).GetAwaiter().GetResult() } return [pscustomobject]@{ ok = $true url = $url sent_batches = $sentBatches sent_packets = $sentPackets switch_action_ran = $switchActionRan state = [string]$socket.State error = "" } } catch { return [pscustomobject]@{ ok = $false url = $url sent_batches = $sentBatches sent_packets = $sentPackets switch_action_ran = $switchActionRan state = [string]$socket.State error = $_.Exception.Message } } finally { $socket.Dispose() $cts.Dispose() } } function Send-BatchSeries { param( [object]$Lease, [int]$Count, [int]$PortBase, [int]$DelayMilliseconds = 100, [string]$VPNResourceID = $resourceId ) $results = @() for ($i = 0; $i -lt $Count; $i++) { $results += Invoke-ServiceChannelPostSafe -Lease $Lease -PortStart ($PortBase + ($i * 100)) -VPNResourceID $VPNResourceID if ($DelayMilliseconds -gt 0) { Start-Sleep -Milliseconds $DelayMilliseconds } } return $results } function Invoke-RemoteDocker { param([string]$Command) & ssh $DockerSSH $Command if ($LASTEXITCODE -ne 0) { throw "ssh $DockerSSH command failed: $Command" } } function Stop-TestUpdaters { Invoke-RemoteDocker -Command "docker stop rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true" } function Start-TestUpdaters { Invoke-RemoteDocker -Command "docker start rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true" } function Restart-ExitContainer { param([string]$Name) $containerName = "rap_test_node_" + $Name.Replace("-", "_") Invoke-RemoteDocker -Command "docker restart $containerName >/dev/null" } function Restart-NodeContainer { param([string]$Name) $containerName = "rap_test_node_" + $Name.Replace("-", "_") Invoke-RemoteDocker -Command "docker restart $containerName >/dev/null" } function Get-BackendClientGatewayDepth { param([string]$VPNConnectionID) $stats = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/vpn-connections/$VPNConnectionID/tunnel/stats").vpn_packet_stats $queue = $stats.client_to_gateway if ($null -eq $queue) { return 0 } $depthProp = $queue.PSObject.Properties["queue_depth"] if ($null -eq $depthProp) { return 0 } return [int]$depthProp.Value } function Wait-ForIngressRoute { param( [string]$NodeID, [string]$RouteID, [int]$MinSendPackets, [int]$TimeoutSeconds = 45 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" $selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id" if ($null -ne $ingress -and [int]$sendPackets -ge $MinSendPackets -and [string]$selectedRoute -eq $RouteID) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for ingress telemetry route=$RouteID packets>=$MinSendPackets on node $NodeID" } function Wait-ForIngressAnyRoute { param( [string]$NodeID, [string[]]$RouteIDs, [int]$MinSendPackets, [int]$TimeoutSeconds = 45 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" $selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id" if ($null -ne $ingress -and [int]$sendPackets -ge $MinSendPackets -and $RouteIDs -contains [string]$selectedRoute) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for ingress telemetry routes='$($RouteIDs -join ",")' packets>=$MinSendPackets on node $NodeID" } function Wait-ForExitInbox { param( [string]$NodeID, [string]$VPNConnectionID, [int]$TimeoutSeconds = 45 ) $queueKey = "$VPNConnectionID`:client_to_gateway" $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $depths = $latest.report.inbox.queue_depths if ($null -ne $depths) { $prop = $depths.PSObject.Properties[$queueKey] if ($null -ne $prop -and [int]$prop.Value -gt 0) { return $latest } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for exit inbox queue '$queueKey' on node $NodeID" } function Send-FeedbackHeartbeat { param( [string]$EntryNodeID, [string]$BadRouteID, [string]$GoodRouteID ) return Invoke-Api -Method POST -Path "/clusters/$ClusterID/nodes/$EntryNodeID/heartbeats" -Body @{ health_status = "healthy" reported_version = $RequiredNodeVersion capabilities = @{ native_node_agent = $true fabric_service_channel_runtime = $true fabric_service_channel_route_manager = $true smoke_feedback_injection = "c18z10" } service_states = @{ smoke = "c18z10_exit_pool_feedback" } metadata = @{ fabric_service_channel_runtime_report = @{ schema_version = "c18l.fabric_service_channel_runtime_report.v1" ingress = @{ flow_scheduler = @{ channel_stats = @{ "c18z10-exit-pool-flow" = @{ last_route_id = $GoodRouteID last_failed_route_id = $BadRouteID last_error = "c18z10 marked primary exit route stale during active service-channel websocket" consecutive_failures = 3 stall_count = 1 last_send_duration_ms = 250 route_rebuild_recommended = $true degraded_fallback_recommended = $false } } } } } smoke = @{ name = "c18z10_live_service_channel_exit_pool" run_id = $runId } } } } function Wait-ForConfigDecision { param( [string]$NodeID, [string]$BadRouteID, [string]$ExpectedReplacementID, [int]$TimeoutSeconds = 60 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $config = Get-SyntheticConfig -NodeID $NodeID $decisions = @($config.synthetic_mesh_config.route_path_decisions.decisions) $decision = @($decisions | Where-Object { $_.route_id -eq $BadRouteID -and $_.rebuild_status -eq "applied" -and $_.replacement_route_id -eq $ExpectedReplacementID }) | Select-Object -First 1 if ($null -ne $decision) { return @{ config = $config decision = $decision } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for applied rebuild decision $BadRouteID -> $ExpectedReplacementID" } function Wait-ForAppliedRebuildTransition { param( [string]$NodeID, [string]$BadRouteID = "", [string]$ReplacementRouteID = "", [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $transition = $null if ($null -ne $latest.report -and $null -ne $latest.report.ingress) { $prop = $latest.report.ingress.PSObject.Properties["route_manager_transition"] if ($null -ne $prop) { $transition = $prop.Value } } if ($null -ne $transition -and [string]$transition.status -eq "applied_rebuild") { return $latest } if ($BadRouteID -ne "" -and $ReplacementRouteID -ne "") { Send-FeedbackHeartbeat -EntryNodeID $NodeID -BadRouteID $BadRouteID -GoodRouteID $ReplacementRouteID | Out-Null } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for node route-manager transition applied_rebuild on node $NodeID" } $entryNode = Get-NodeByName -Name $EntryNodeName $primaryExitNode = Get-NodeByName -Name $PrimaryExitNodeName $alternateExitNode = Get-NodeByName -Name $AlternateExitNodeName $primaryRouteID = "" $alternateRouteID = "" $appliedDecision = $null $appliedTransition = $null $updatersStopped = $false $result = $null try { Stop-TestUpdaters $updatersStopped = $true Enable-TestMeshListener -Node $entryNode Enable-TestMeshListener -Node $primaryExitNode Enable-TestMeshListener -Node $alternateExitNode Clear-OldSmokeRouteIntents -SourceNodeID $entryNode.id -DestinationNodeID $primaryExitNode.id Clear-OldSmokeRouteIntents -SourceNodeID $entryNode.id -DestinationNodeID $alternateExitNode.id $primaryIntent = New-RouteIntent ` -SourceNodeID $entryNode.id ` -DestinationNodeID $primaryExitNode.id ` -Priority 2000000000 ` -Label "primary-exit" ` -Hops @($entryNode.id, $primaryExitNode.id) $alternateIntent = New-RouteIntent ` -SourceNodeID $entryNode.id ` -DestinationNodeID $alternateExitNode.id ` -Priority 1999999990 ` -Label "alternate-exit" ` -Hops @($entryNode.id, $alternateExitNode.id) $primaryRouteID = $primaryIntent.route_intent.id $alternateRouteID = $alternateIntent.route_intent.id $routeIDs = @($primaryRouteID, $alternateRouteID) $visibleConfig = Wait-ForRouteIntentVisible -NodeID $entryNode.id -RouteIDs $routeIDs $primaryExitVisibleConfig = Wait-ForRouteIntentVisible -NodeID $primaryExitNode.id -RouteIDs @($primaryRouteID) $alternateExitVisibleConfig = Wait-ForRouteIntentVisible -NodeID $alternateExitNode.id -RouteIDs @($alternateRouteID) $readyBefore = Wait-ForRuntimeReady -NodeID $entryNode.id -MinRoutes 2 $primaryExitReadyBefore = Wait-ForRuntimeReady -NodeID $primaryExitNode.id -MinRoutes 0 $alternateExitReadyBefore = Wait-ForRuntimeReady -NodeID $alternateExitNode.id -MinRoutes 0 $loadedConfig = Wait-ForRuntimeConfigVersion -NodeID $entryNode.id -ConfigVersion $visibleConfig.synthetic_mesh_config.config_version $primaryExitLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $primaryExitNode.id -ConfigVersion $primaryExitVisibleConfig.synthetic_mesh_config.config_version $alternateExitLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $alternateExitNode.id -ConfigVersion $alternateExitVisibleConfig.synthetic_mesh_config.config_version $lease = New-ServiceChannelLease -EntryNodeID $entryNode.id -ExitNodeIDs @($primaryExitNode.id, $alternateExitNode.id) -PreferredExitNodeID $primaryExitNode.id if ($lease.status -ne "ready") { throw "Lease status was '$($lease.status)', want ready" } if ([string]$lease.primary_route.route_id -ne $primaryRouteID) { throw "Lease primary route was '$($lease.primary_route.route_id)', want primary exit route '$primaryRouteID'" } if ([string]$lease.selected_exit_node_id -ne [string]$primaryExitNode.id -or @($lease.exit_pool).Count -lt 2) { throw "Lease did not authorize expected exit pool: selected=$($lease.selected_exit_node_id) exit_pool_count=$(@($lease.exit_pool).Count)" } $baselineSendPackets = Get-IngressSendPackets -NodeID $entryNode.id $baselineRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id $baselineDropped = Get-IngressFlowDropped -NodeID $entryNode.id $baselinePrimaryExitDepth = Get-ExitQueueDepth -NodeID $primaryExitNode.id -VPNConnectionID $resourceId $baselineAlternateExitDepth = Get-ExitQueueDepth -NodeID $alternateExitNode.id -VPNConnectionID $resourceId $baselineBackendDepth = Get-BackendClientGatewayDepth -VPNConnectionID $resourceId $script:preRebuildPrimaryExitDepth = $baselinePrimaryExitDepth $rebuildAction = { $script:preRebuildPrimaryExitDepth = Wait-ForExitQueueDepth ` -NodeID $primaryExitNode.id ` -VPNConnectionID $resourceId ` -MinDepth ($baselinePrimaryExitDepth + ($PreRebuildBatchCount * $PacketsPerBatch)) ` -TimeoutSeconds 60 Send-FeedbackHeartbeat -EntryNodeID $entryNode.id -BadRouteID $primaryRouteID -GoodRouteID $alternateRouteID | Out-Null $script:appliedDecision = Wait-ForConfigDecision -NodeID $entryNode.id -BadRouteID $primaryRouteID -ExpectedReplacementID $alternateRouteID -TimeoutSeconds 90 $script:appliedTransition = Wait-ForAppliedRebuildTransition -NodeID $entryNode.id -BadRouteID $primaryRouteID -ReplacementRouteID $alternateRouteID -TimeoutSeconds 120 } $webSocketResult = Invoke-ServiceChannelWebSocketPressure ` -Lease $lease ` -PortStart 57000 ` -PreSwitchBatches $PreRebuildBatchCount ` -PostSwitchBatches $PostRebuildBatchCount ` -PacketsInBatch $PacketsPerBatch ` -DelayMilliseconds $BatchDelayMilliseconds ` -AfterPreSwitchAction $rebuildAction if (-not $webSocketResult.ok) { throw "WebSocket exit-pool pressure failed after $($webSocketResult.sent_batches) batches: $($webSocketResult.error)" } $expectedPackets = ($PreRebuildBatchCount + $PostRebuildBatchCount) * $PacketsPerBatch $expectedPrimaryExitPackets = $PreRebuildBatchCount * $PacketsPerBatch $expectedAlternateExitPackets = $PostRebuildBatchCount * $PacketsPerBatch $postRebuildLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $entryNode.id -ConfigVersion $appliedDecision.config.synthetic_mesh_config.config_version -TimeoutSeconds 120 $postRebuildIngress = Wait-ForIngressRoute -NodeID $entryNode.id -RouteID $alternateRouteID -MinSendPackets ($baselineSendPackets + $expectedPackets) -TimeoutSeconds 120 $primaryExitDepth = [Math]::Max( [int]$script:preRebuildPrimaryExitDepth, [int](Get-ExitQueueDepth -NodeID $primaryExitNode.id -VPNConnectionID $resourceId) ) if ($primaryExitDepth -lt ($baselinePrimaryExitDepth + $expectedPrimaryExitPackets)) { throw "Primary exit depth was $primaryExitDepth after pre-rebuild delivery, want >= $($baselinePrimaryExitDepth + $expectedPrimaryExitPackets)" } $alternateExitDepth = Wait-ForExitQueueDepth -NodeID $alternateExitNode.id -VPNConnectionID $resourceId -MinDepth ($baselineAlternateExitDepth + $expectedAlternateExitPackets) -TimeoutSeconds 120 $finalEntryRuntime = Get-LatestRuntimeReport -NodeID $entryNode.id $finalPrimaryExitRuntime = Get-LatestRuntimeReport -NodeID $primaryExitNode.id $finalAlternateExitRuntime = Get-LatestRuntimeReport -NodeID $alternateExitNode.id $finalRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id $finalDropped = Get-IngressFlowDropped -NodeID $entryNode.id $finalBackendDepth = Get-BackendClientGatewayDepth -VPNConnectionID $resourceId $feedbackExpire = Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/route-feedback/expire" -Body @{ actor_user_id = $ActorUserID reporter_node_id = $entryNode.id route_id = $primaryRouteID service_class = "vpn_packets" reason = "c18z10 exit pool smoke cleanup" } Start-Sleep -Seconds 2 $expiredAlternate = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } $expiredPrimary = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } $routeFailureDelta = $finalRouteFailures - $baselineRouteFailures $droppedDelta = $finalDropped - $baselineDropped $feedbackExpireStatus = Get-ObjectPropertyValue -Object (Get-ObjectPropertyValue -Object $feedbackExpire -Name "route_feedback_expire") -Name "status" if ($null -eq $feedbackExpireStatus) { $feedbackExpireStatus = "ok" } $result = [ordered]@{ schema_version = "c18z10.live_service_channel_exit_pool_smoke.v1" run_id = $runId base_url = $ApiBaseUrl entry_base_url = $EntryBaseUrl cluster_id = $ClusterID entry_node = @{ name = $entryNode.name; id = $entryNode.id } primary_exit_node = @{ name = $primaryExitNode.name; id = $primaryExitNode.id } alternate_exit_node = @{ name = $alternateExitNode.name; id = $alternateExitNode.id } resource_id = $resourceId route_intents = @{ primary_exit_route_intent_id = $primaryRouteID alternate_exit_route_intent_id = $alternateRouteID primary_exit_hops = @($entryNode.id, $primaryExitNode.id) alternate_exit_hops = @($entryNode.id, $alternateExitNode.id) expired_primary_status = $expiredPrimary.route_intent.lifecycle_status expired_alternate_status = $expiredAlternate.route_intent.lifecycle_status } lease = @{ channel_id = $lease.channel_id status = $lease.status selected_exit_node_id = $lease.selected_exit_node_id primary_route_id = $lease.primary_route.route_id primary_route_hops = $lease.primary_route.hops alternate_route_count = @($lease.alternate_routes).Count exit_pool_count = @($lease.exit_pool).Count entry_pool_count = @($lease.entry_pool).Count } websocket_pressure = @{ requested_batches = ($PreRebuildBatchCount + $PostRebuildBatchCount) pre_rebuild_batches = $PreRebuildBatchCount post_rebuild_batches = $PostRebuildBatchCount packets_per_batch = $PacketsPerBatch expected_packets = $expectedPackets expected_primary_exit_packets = $expectedPrimaryExitPackets expected_alternate_exit_packets = $expectedAlternateExitPackets sent_batches = $webSocketResult.sent_batches sent_packets = $webSocketResult.sent_packets rebuild_action_ran = $webSocketResult.switch_action_ran final_state = $webSocketResult.state } rebuild = @{ feedback_expire_status = $feedbackExpireStatus applied_rebuild_status = $appliedDecision.decision.rebuild_status decision_source = $appliedDecision.decision.decision_source replacement_route_id = $appliedDecision.decision.replacement_route_id replacement_hops = $appliedDecision.decision.effective_hops transition_status = $appliedTransition.report.ingress.route_manager_transition.status } route_failures = @{ baseline = $baselineRouteFailures final = $finalRouteFailures delta = $routeFailureDelta } flow_drops = @{ baseline = $baselineDropped final = $finalDropped delta = $droppedDelta } primary_exit_queue = @{ baseline_depth = $baselinePrimaryExitDepth depth = $primaryExitDepth } alternate_exit_queue = @{ baseline_depth = $baselineAlternateExitDepth depth = $alternateExitDepth } backend_fallback_queue = @{ baseline_depth = $baselineBackendDepth depth = $finalBackendDepth } passed = $true checks = [ordered]@{ production_forwarding_ready = ($readyBefore.report.production_payload_forwarding -eq $true) primary_exit_production_forwarding_ready = ($primaryExitReadyBefore.report.production_payload_forwarding -eq $true) alternate_exit_production_forwarding_ready = ($alternateExitReadyBefore.report.production_payload_forwarding -eq $true) route_intents_visible_before_pressure = (@($visibleConfig.synthetic_mesh_config.routes | Where-Object { $routeIDs -contains $_.route_id }).Count -ge 2) primary_exit_route_intent_visible_before_pressure = (@($primaryExitVisibleConfig.synthetic_mesh_config.routes | Where-Object { $_.route_id -eq $primaryRouteID }).Count -ge 1) alternate_exit_route_intent_visible_before_pressure = (@($alternateExitVisibleConfig.synthetic_mesh_config.routes | Where-Object { $_.route_id -eq $alternateRouteID }).Count -ge 1) entry_runtime_loaded_visible_config = ([string]$loadedConfig.report.config_version -ge [string]$visibleConfig.synthetic_mesh_config.config_version) primary_exit_runtime_loaded_visible_config = ([string]$primaryExitLoadedConfig.report.config_version -ge [string]$primaryExitVisibleConfig.synthetic_mesh_config.config_version) alternate_exit_runtime_loaded_visible_config = ([string]$alternateExitLoadedConfig.report.config_version -ge [string]$alternateExitVisibleConfig.synthetic_mesh_config.config_version) signed_lease_ready = ($lease.status -eq "ready") signed_lease_selected_primary_exit = ([string]$lease.primary_route.route_id -eq $primaryRouteID -and [string]$lease.selected_exit_node_id -eq [string]$primaryExitNode.id) signed_lease_has_exit_pool = (@($lease.exit_pool).Count -ge 2) signed_lease_has_exit_pool_alternate_route = (@($lease.alternate_routes | Where-Object { $_.route_id -eq $alternateRouteID }).Count -ge 1) long_lived_websocket_sent_all_batches = ($webSocketResult.ok -eq $true -and $webSocketResult.sent_batches -eq ($PreRebuildBatchCount + $PostRebuildBatchCount) -and $webSocketResult.sent_packets -eq $expectedPackets) rebuild_feedback_action_ran = ($webSocketResult.switch_action_ran -eq $true) control_plane_applied_exit_pool_rebuild_decision = ($appliedDecision.decision.rebuild_status -eq "applied" -and $appliedDecision.decision.replacement_route_id -eq $alternateRouteID -and $appliedDecision.decision.decision_source -eq "service_channel_feedback_exit_pool_replacement") node_agent_applied_rebuild_transition = ($appliedTransition.report.ingress.route_manager_transition.status -eq "applied_rebuild") entry_runtime_loaded_rebuild_config = ([string]$postRebuildLoadedConfig.report.config_version -ge [string]$appliedDecision.config.synthetic_mesh_config.config_version) post_rebuild_uses_alternate_exit_route = ([string]$postRebuildIngress.report.ingress.last_selected_route_id -eq $alternateRouteID) primary_exit_inbox_received_pre_rebuild_packets = ($primaryExitDepth -ge ($baselinePrimaryExitDepth + $expectedPrimaryExitPackets)) alternate_exit_inbox_received_post_rebuild_packets = ($alternateExitDepth -ge ($baselineAlternateExitDepth + $expectedAlternateExitPackets)) no_backend_fallback_used = ($finalBackendDepth -eq $baselineBackendDepth) no_flow_drops = ($droppedDelta -eq 0) route_intents_expired = ($expiredPrimary.route_intent.lifecycle_status -eq "expired" -and $expiredAlternate.route_intent.lifecycle_status -eq "expired") } telemetry = @{ final_entry_ingress = $finalEntryRuntime.report.ingress final_primary_exit_inbox = $finalPrimaryExitRuntime.report.inbox final_alternate_exit_inbox = $finalAlternateExitRuntime.report.inbox post_rebuild_ingress = $postRebuildIngress.report.ingress applied_decision = $appliedDecision.decision applied_transition = $appliedTransition.report.ingress.route_manager_transition websocket_result = $webSocketResult } } $failedChecks = @($result.checks.GetEnumerator() | Where-Object { $_.Value -ne $true }) if ($failedChecks.Count -gt 0) { throw "C18Z10 failed checks: $($failedChecks.Name -join ', ')" } } finally { if ($primaryRouteID) { try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {} } if ($alternateRouteID) { try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {} } if ($updatersStopped) { try { Start-TestUpdaters } catch { Write-Warning "Could not restart test updaters: $($_.Exception.Message)" } } } $resultFullPath = Join-Path $repoRoot $ResultPath $resultDir = Split-Path -Parent $resultFullPath if (-not (Test-Path $resultDir)) { New-Item -ItemType Directory -Path $resultDir | Out-Null } $result | ConvertTo-Json -Depth 100 | Set-Content -Path $resultFullPath -Encoding UTF8 Write-Host "C18Z10 live service-channel exit pool smoke passed. Result: $resultFullPath" $result