param( [string]$ApiBaseUrl = "http://192.168.200.61:18121/api/v1", [string]$ClusterID = "cfc0743d-d960-49fb-9de8-96e063d5e4aa", [string]$ActorUserID = "f67d943f-5397-4b3a-a229-695fe67ad700", [string]$EntryNodeName = "test-1", [string]$ExitNodeName = "test-2", [string]$EntryBaseUrl = "http://192.168.200.61:19131", [string]$DockerSSH = "test-docker", [int]$WarmBatchCount = 6, [int]$RecoveryBatchCount = 8, [switch]$SkipExitRestart, [string]$RequiredNodeVersion = "0.2.182", [string]$ResultPath = "artifacts\c18z2-live-service-channel-soak-smoke-result.json" ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" Add-Type -AssemblyName System.Net.Http $repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..\..")).ProviderPath $runId = "c18z2-" + (Get-Date -Format "yyyyMMdd-HHmmss") $resourceId = "vpn-$runId" function Invoke-Api { param( [string]$Method, [string]$Path, [object]$Body = $null ) $uri = "$ApiBaseUrl$Path" try { if ($null -eq $Body) { return Invoke-RestMethod -Method $Method -Uri $uri -TimeoutSec 30 } return Invoke-RestMethod -Method $Method -Uri $uri -ContentType "application/json" -Body ($Body | ConvertTo-Json -Depth 80) -TimeoutSec 30 } catch { $statusCode = $null if ($_.Exception.Response) { $statusCode = [int]$_.Exception.Response.StatusCode } $details = $_.ErrorDetails.Message if (-not $details) { $details = $_.Exception.Message } throw "$Method $Path failed with HTTP $statusCode`: $details" } } function Get-NodeByName { param([string]$Name) $nodes = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes?actor_user_id=$ActorUserID").nodes $node = @($nodes | Where-Object { $_.name -eq $Name }) | Select-Object -First 1 if ($null -eq $node) { throw "Node '$Name' was not found in cluster $ClusterID" } return $node } function Get-MeshPort { param([string]$Name) switch ($Name) { "test-1" { return 19131 } "test-2" { return 19132 } "test-3" { return 19133 } default { return 19131 } } } function Enable-TestMeshListener { param([object]$Node) $port = Get-MeshPort -Name $Node.name Invoke-Api -Method PUT -Path "/clusters/$ClusterID/nodes/$($Node.id)/workloads/mesh-listener/desired" -Body @{ actor_user_id = $ActorUserID desired_state = "enabled" runtime_mode = "container" version = "c18z2-live-fsc-soak" config = @{ listen_addr = "0.0.0.0:$port" listen_port_mode = "manual" advertise_endpoint = "http://192.168.200.61:$port" advertise_transport = "direct_http" connectivity_mode = "private_lan" nat_type = "none" region = "docker-test" production_forwarding = $true } environment = @{} } | Out-Null } function Clear-OldSmokeRouteIntents { param( [string]$SourceNodeID, [string]$DestinationNodeID ) $items = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/mesh/route-intents?actor_user_id=$ActorUserID").route_intents foreach ($item in @($items)) { if ([string]$item.lifecycle_status -ne "active") { continue } if ([string]$item.service_class -ne "vpn_packets") { continue } if ([string]$item.source_selector.node_id -ne $SourceNodeID -or [string]$item.destination_selector.node_id -ne $DestinationNodeID) { continue } $smoke = "" if ($null -ne $item.policy -and $null -ne $item.policy.metadata) { $prop = $item.policy.metadata.PSObject.Properties["smoke"] if ($null -ne $prop) { $smoke = [string]$prop.Value } } if ($smoke -ne "c18z1_live_service_channel_ingress" -and $smoke -ne "c18z2_live_service_channel_soak") { continue } Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$($item.id)/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } } function New-RouteIntent { param( [string]$SourceNodeID, [string]$DestinationNodeID, [int]$Priority, [string]$Label ) $expiresAt = (Get-Date).ToUniversalTime().AddMinutes(10).ToString("o") return Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents" -Body @{ actor_user_id = $ActorUserID source_selector = @{ node_id = $SourceNodeID } destination_selector = @{ node_id = $DestinationNodeID } service_class = "vpn_packets" priority = $Priority policy = @{ synthetic_enabled = $true route_version = "$runId-$Label" policy_version = "$runId-$Label" peer_directory_version = "$runId-$Label" hops = @($SourceNodeID, $DestinationNodeID) allowed_channels = @("vpn_packet", "fabric_control") max_ttl = 8 max_hops = 8 expires_at = $expiresAt metadata = @{ smoke = "c18z2_live_service_channel_soak" run_id = $runId label = $Label } } } } function Get-SyntheticConfig { param([string]$NodeID) return Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/mesh/synthetic-config?actor_user_id=$ActorUserID" } function Get-LatestHeartbeat { param([string]$NodeID) return (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/heartbeats?actor_user_id=$ActorUserID&limit=1").heartbeats[0] } function Get-LatestRuntimeReport { param([string]$NodeID) $hb = Get-LatestHeartbeat -NodeID $NodeID return @{ heartbeat = $hb report = $hb.metadata.fabric_service_channel_runtime_report } } function Wait-ForRuntimeReady { param( [string]$NodeID, [int]$MinRoutes, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $report = $latest.report if ($null -ne $report -and $report.enabled -eq $true -and $report.production_payload_forwarding -eq $true -and [int]$report.route_candidate_total -ge $MinRoutes) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for production service-channel runtime ready on node $NodeID" } function Wait-ForRuntimeConfigVersion { param( [string]$NodeID, [string]$ConfigVersion, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID if ($null -ne $latest.report) { $loadedVersion = [string]$latest.report.config_version if ($loadedVersion -ge $ConfigVersion) { return $latest } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for node $NodeID to load synthetic config $ConfigVersion" } function Wait-ForRouteIntentVisible { param( [string]$NodeID, [string[]]$RouteIDs, [int]$TimeoutSeconds = 60 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $config = Get-SyntheticConfig -NodeID $NodeID $routes = @($config.synthetic_mesh_config.routes) $present = @($routes | Where-Object { $RouteIDs -contains $_.route_id }) if ($present.Count -ge $RouteIDs.Count) { return $config } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for routes '$($RouteIDs -join ",")' in synthetic config for node $NodeID" } function New-ServiceChannelLease { param( [string]$EntryNodeID, [string]$ExitNodeID ) return (Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/leases" -Body @{ actor_user_id = $ActorUserID organization_id = "org-c18z1-smoke" user_id = $ActorUserID resource_id = $resourceId service_class = "vpn_packets" entry_node_ids = @($EntryNodeID) exit_node_ids = @($ExitNodeID) preferred_entry_node_id = $EntryNodeID preferred_exit_node_id = $ExitNodeID allowed_channels = @("vpn_packet", "bulk", "control") ttl_seconds = 300 metadata = @{ smoke = "c18z2_live_service_channel_soak" run_id = $runId } }).fabric_service_channel_lease } function ConvertTo-Base64UrlJson { param([object]$Value) $json = $Value | ConvertTo-Json -Depth 80 -Compress $bytes = [System.Text.Encoding]::UTF8.GetBytes($json) return [Convert]::ToBase64String($bytes).TrimEnd("=").Replace("+", "-").Replace("/", "_") } function Get-ObjectPropertyValue { param( [object]$Object, [string]$Name ) if ($null -eq $Object) { return $null } $prop = $Object.PSObject.Properties[$Name] if ($null -eq $prop) { return $null } return $prop.Value } function New-TestIPv4UDPPacket { param([int]$SourcePort) $payload = [System.Text.Encoding]::ASCII.GetBytes("c18z1-$SourcePort") $totalLength = 20 + 8 + $payload.Length $packet = New-Object byte[] $totalLength $packet[0] = 0x45 $packet[1] = 0 $packet[2] = [byte](($totalLength -shr 8) -band 0xff) $packet[3] = [byte]($totalLength -band 0xff) $packet[8] = 64 $packet[9] = 17 $packet[12] = 10; $packet[13] = 18; $packet[14] = 1; $packet[15] = 10 $packet[16] = 10; $packet[17] = 18; $packet[18] = 2; $packet[19] = 20 $udpOffset = 20 $destPort = 3389 $udpLength = 8 + $payload.Length $packet[$udpOffset] = [byte](($SourcePort -shr 8) -band 0xff) $packet[$udpOffset + 1] = [byte]($SourcePort -band 0xff) $packet[$udpOffset + 2] = [byte](($destPort -shr 8) -band 0xff) $packet[$udpOffset + 3] = [byte]($destPort -band 0xff) $packet[$udpOffset + 4] = [byte](($udpLength -shr 8) -band 0xff) $packet[$udpOffset + 5] = [byte]($udpLength -band 0xff) [Array]::Copy($payload, 0, $packet, 28, $payload.Length) return $packet } function New-PacketBatchBody { param([byte[][]]$Packets) $stream = [System.IO.MemoryStream]::new() foreach ($packet in $Packets) { $length = $packet.Length $stream.WriteByte([byte](($length -shr 24) -band 0xff)) $stream.WriteByte([byte](($length -shr 16) -band 0xff)) $stream.WriteByte([byte](($length -shr 8) -band 0xff)) $stream.WriteByte([byte]($length -band 0xff)) $stream.Write($packet, 0, $packet.Length) } return $stream.ToArray() } function Invoke-ServiceChannelPost { param( [object]$Lease, [int]$PortStart ) $packets = @() for ($i = 0; $i -lt 8; $i++) { $packets += ,(New-TestIPv4UDPPacket -SourcePort ($PortStart + $i)) } $path = $Lease.entry_http.path_template. Replace("{cluster_id}", $ClusterID). Replace("{channel_id}", $Lease.channel_id). Replace("{resource_id}", $resourceId) $url = "$EntryBaseUrl$path`?batch=true" $headers = @{ "X-RAP-Service-Channel-Token" = $Lease.token.token "X-RAP-Fabric-Channel-ID" = $Lease.channel_id "X-RAP-Service-Class" = "vpn_packets" "X-RAP-Channel-Class" = "vpn_packet" "X-RAP-Service-Channel-Authority-Payload" = ConvertTo-Base64UrlJson -Value $Lease.authority_payload "X-RAP-Service-Channel-Authority-Signature" = ConvertTo-Base64UrlJson -Value $Lease.authority_signature } $body = New-PacketBatchBody -Packets $packets $client = [System.Net.Http.HttpClient]::new() try { $client.Timeout = [TimeSpan]::FromSeconds(30) $request = [System.Net.Http.HttpRequestMessage]::new([System.Net.Http.HttpMethod]::Post, $url) foreach ($header in $headers.GetEnumerator()) { [void]$request.Headers.TryAddWithoutValidation($header.Key, [string]$header.Value) } $content = [System.Net.Http.ByteArrayContent]::new($body) $content.Headers.ContentType = [System.Net.Http.Headers.MediaTypeHeaderValue]::Parse("application/vnd.rap.vpn-packet-batch.v1") $request.Content = $content $response = $client.SendAsync($request).GetAwaiter().GetResult() $responseBody = $response.Content.ReadAsStringAsync().GetAwaiter().GetResult() if (-not $response.IsSuccessStatusCode) { throw "Service-channel POST $url failed with HTTP $([int]$response.StatusCode): $responseBody" } return [pscustomobject]@{ StatusCode = [int]$response.StatusCode Body = $responseBody } } finally { $client.Dispose() } } function Get-IngressSendPackets { param([string]$NodeID) $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" if ($null -eq $sendPackets) { return 0 } return [int]$sendPackets } function Get-IngressRouteFailures { param([string]$NodeID) $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $failures = Get-ObjectPropertyValue -Object $ingress -Name "send_route_failures" if ($null -eq $failures) { return 0 } return [int]$failures } function Get-ExitQueueDepth { param( [string]$NodeID, [string]$VPNConnectionID ) $latest = Get-LatestRuntimeReport -NodeID $NodeID $queueKey = "$VPNConnectionID`:client_to_gateway" $depths = $latest.report.inbox.queue_depths if ($null -eq $depths) { return 0 } $prop = $depths.PSObject.Properties[$queueKey] if ($null -eq $prop) { return 0 } return [int]$prop.Value } function Wait-ForExitQueueDepth { param( [string]$NodeID, [string]$VPNConnectionID, [int]$MinDepth, [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $depth = Get-ExitQueueDepth -NodeID $NodeID -VPNConnectionID $VPNConnectionID if ($depth -ge $MinDepth) { return $depth } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for exit queue depth >= $MinDepth on node $NodeID" } function Invoke-ServiceChannelPostSafe { param( [object]$Lease, [int]$PortStart ) try { $response = Invoke-ServiceChannelPost -Lease $Lease -PortStart $PortStart return [pscustomobject]@{ ok = $true status_code = [int]$response.StatusCode error = "" } } catch { return [pscustomobject]@{ ok = $false status_code = 0 error = $_.Exception.Message } } } function Send-BatchSeries { param( [object]$Lease, [int]$Count, [int]$PortBase, [int]$DelayMilliseconds = 100 ) $results = @() for ($i = 0; $i -lt $Count; $i++) { $results += Invoke-ServiceChannelPostSafe -Lease $Lease -PortStart ($PortBase + ($i * 100)) if ($DelayMilliseconds -gt 0) { Start-Sleep -Milliseconds $DelayMilliseconds } } return $results } function Invoke-RemoteDocker { param([string]$Command) & ssh $DockerSSH $Command if ($LASTEXITCODE -ne 0) { throw "ssh $DockerSSH command failed: $Command" } } function Stop-TestUpdaters { Invoke-RemoteDocker -Command "docker stop rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true" } function Start-TestUpdaters { Invoke-RemoteDocker -Command "docker start rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true" } function Restart-ExitContainer { param([string]$Name) $containerName = "rap_test_node_" + $Name.Replace("-", "_") Invoke-RemoteDocker -Command "docker restart $containerName >/dev/null" } function Wait-ForIngressRoute { param( [string]$NodeID, [string]$RouteID, [int]$MinSendPackets, [int]$TimeoutSeconds = 45 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" $selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id" if ($null -ne $ingress -and [int]$sendPackets -ge $MinSendPackets -and [string]$selectedRoute -eq $RouteID) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for ingress telemetry route=$RouteID packets>=$MinSendPackets on node $NodeID" } function Wait-ForIngressAnyRoute { param( [string]$NodeID, [string[]]$RouteIDs, [int]$MinSendPackets, [int]$TimeoutSeconds = 45 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $ingress = $latest.report.ingress $sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets" $selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id" if ($null -ne $ingress -and [int]$sendPackets -ge $MinSendPackets -and $RouteIDs -contains [string]$selectedRoute) { return $latest } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for ingress telemetry routes='$($RouteIDs -join ",")' packets>=$MinSendPackets on node $NodeID" } function Wait-ForExitInbox { param( [string]$NodeID, [string]$VPNConnectionID, [int]$TimeoutSeconds = 45 ) $queueKey = "$VPNConnectionID`:client_to_gateway" $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $depths = $latest.report.inbox.queue_depths if ($null -ne $depths) { $prop = $depths.PSObject.Properties[$queueKey] if ($null -ne $prop -and [int]$prop.Value -gt 0) { return $latest } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for exit inbox queue '$queueKey' on node $NodeID" } function Send-FeedbackHeartbeat { param( [string]$EntryNodeID, [string]$BadRouteID, [string]$GoodRouteID ) return Invoke-Api -Method POST -Path "/clusters/$ClusterID/nodes/$EntryNodeID/heartbeats" -Body @{ health_status = "healthy" reported_version = $RequiredNodeVersion capabilities = @{ native_node_agent = $true fabric_service_channel_runtime = $true fabric_service_channel_route_manager = $true smoke_feedback_injection = "c18z1" } service_states = @{ smoke = "c18z1_live_ingress_feedback" } metadata = @{ fabric_service_channel_runtime_report = @{ schema_version = "c18l.fabric_service_channel_runtime_report.v1" ingress = @{ flow_scheduler = @{ channel_stats = @{ "c18z1-live-flow" = @{ last_route_id = $GoodRouteID last_failed_route_id = $BadRouteID last_error = "c18z1 forced stale route after live packet ingress" consecutive_failures = 3 stall_count = 1 last_send_duration_ms = 250 route_rebuild_recommended = $true degraded_fallback_recommended = $false } } } } } smoke = @{ name = "c18z1_live_service_channel_ingress" run_id = $runId } } } } function Wait-ForConfigDecision { param( [string]$NodeID, [string]$BadRouteID, [string]$ExpectedReplacementID, [int]$TimeoutSeconds = 60 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $config = Get-SyntheticConfig -NodeID $NodeID $decisions = @($config.synthetic_mesh_config.route_path_decisions.decisions) $decision = @($decisions | Where-Object { $_.route_id -eq $BadRouteID -and $_.rebuild_status -eq "applied" -and $_.replacement_route_id -eq $ExpectedReplacementID }) | Select-Object -First 1 if ($null -ne $decision) { return @{ config = $config decision = $decision } } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for applied rebuild decision $BadRouteID -> $ExpectedReplacementID" } function Wait-ForAppliedRebuildTransition { param( [string]$NodeID, [string]$BadRouteID = "", [string]$ReplacementRouteID = "", [int]$TimeoutSeconds = 90 ) $deadline = (Get-Date).AddSeconds($TimeoutSeconds) do { $latest = Get-LatestRuntimeReport -NodeID $NodeID $transition = $null if ($null -ne $latest.report -and $null -ne $latest.report.ingress) { $prop = $latest.report.ingress.PSObject.Properties["route_manager_transition"] if ($null -ne $prop) { $transition = $prop.Value } } if ($null -ne $transition -and [string]$transition.status -eq "applied_rebuild") { return $latest } if ($BadRouteID -ne "" -and $ReplacementRouteID -ne "") { Send-FeedbackHeartbeat -EntryNodeID $NodeID -BadRouteID $BadRouteID -GoodRouteID $ReplacementRouteID | Out-Null } Start-Sleep -Seconds 2 } while ((Get-Date) -lt $deadline) throw "Timed out waiting for node route-manager transition applied_rebuild on node $NodeID" } $entryNode = Get-NodeByName -Name $EntryNodeName $exitNode = Get-NodeByName -Name $ExitNodeName $primaryRouteID = "" $alternateRouteID = "" $updatersStopped = $false $result = $null try { Stop-TestUpdaters $updatersStopped = $true Enable-TestMeshListener -Node $entryNode Enable-TestMeshListener -Node $exitNode Clear-OldSmokeRouteIntents -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id $primaryIntent = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Priority 2000000000 -Label "primary" $alternateIntent = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Priority 1999999999 -Label "alternate" $primaryRouteID = $primaryIntent.route_intent.id $alternateRouteID = $alternateIntent.route_intent.id $routeIDs = @($primaryRouteID, $alternateRouteID) $visibleConfig = Wait-ForRouteIntentVisible -NodeID $entryNode.id -RouteIDs $routeIDs $exitVisibleConfig = Wait-ForRouteIntentVisible -NodeID $exitNode.id -RouteIDs $routeIDs $readyBefore = Wait-ForRuntimeReady -NodeID $entryNode.id -MinRoutes 2 $exitReadyBefore = Wait-ForRuntimeReady -NodeID $exitNode.id -MinRoutes 0 $loadedConfig = Wait-ForRuntimeConfigVersion -NodeID $entryNode.id -ConfigVersion $visibleConfig.synthetic_mesh_config.config_version $exitLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $exitNode.id -ConfigVersion $exitVisibleConfig.synthetic_mesh_config.config_version $lease = New-ServiceChannelLease -EntryNodeID $entryNode.id -ExitNodeID $exitNode.id if ($lease.status -ne "ready") { throw "Lease status was '$($lease.status)', want ready" } if ($routeIDs -notcontains [string]$lease.primary_route.route_id) { throw "Lease primary route was '$($lease.primary_route.route_id)', want one of smoke routes" } $baselineSendPackets = Get-IngressSendPackets -NodeID $entryNode.id $baselineRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id $warmResults = Send-BatchSeries -Lease $lease -Count $WarmBatchCount -PortBase 43000 -DelayMilliseconds 100 $warmOk = @($warmResults | Where-Object { $_.ok -eq $true }).Count if ($warmOk -lt $WarmBatchCount) { throw "Warm service-channel soak accepted $warmOk/$WarmBatchCount batches" } $warmIngress = Wait-ForIngressAnyRoute -NodeID $entryNode.id -RouteIDs $routeIDs -MinSendPackets ($baselineSendPackets + ($WarmBatchCount * 8)) -TimeoutSeconds 90 $warmExitDepth = Wait-ForExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId -MinDepth 8 -TimeoutSeconds 90 $restartAttempted = $false $duringRestartResults = @() $postRestartBaselineExitDepth = $warmExitDepth if (-not $SkipExitRestart) { $restartAttempted = $true Restart-ExitContainer -Name $ExitNodeName $duringRestartResults = Send-BatchSeries -Lease $lease -Count 3 -PortBase 53000 -DelayMilliseconds 150 $exitNode = Get-NodeByName -Name $ExitNodeName $postRestartVisibleConfig = Wait-ForRouteIntentVisible -NodeID $exitNode.id -RouteIDs $routeIDs -TimeoutSeconds 120 $postRestartReady = Wait-ForRuntimeReady -NodeID $exitNode.id -MinRoutes 0 -TimeoutSeconds 120 $postRestartLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $exitNode.id -ConfigVersion $postRestartVisibleConfig.synthetic_mesh_config.config_version -TimeoutSeconds 120 $postRestartBaselineExitDepth = Get-ExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId } else { $postRestartVisibleConfig = $exitVisibleConfig $postRestartReady = $exitReadyBefore $postRestartLoadedConfig = $exitLoadedConfig } $recoveryBaseline = Get-IngressSendPackets -NodeID $entryNode.id $recoveryResults = Send-BatchSeries -Lease $lease -Count $RecoveryBatchCount -PortBase 63000 -DelayMilliseconds 100 $recoveryOk = @($recoveryResults | Where-Object { $_.ok -eq $true }).Count if ($recoveryOk -lt $RecoveryBatchCount) { throw "Recovery service-channel soak accepted $recoveryOk/$RecoveryBatchCount batches" } $recoveryIngress = Wait-ForIngressAnyRoute -NodeID $entryNode.id -RouteIDs $routeIDs -MinSendPackets ($recoveryBaseline + ($RecoveryBatchCount * 8)) -TimeoutSeconds 120 $recoveryExitDepth = Wait-ForExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId -MinDepth ($postRestartBaselineExitDepth + 8) -TimeoutSeconds 120 $finalExitRuntime = Get-LatestRuntimeReport -NodeID $exitNode.id $finalRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id $expiredPrimary = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } $expiredAlternate = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } $result = [ordered]@{ schema_version = "c18z2.live_service_channel_soak_smoke.v1" run_id = $runId base_url = $ApiBaseUrl entry_base_url = $EntryBaseUrl cluster_id = $ClusterID entry_node = @{ name = $entryNode.name; id = $entryNode.id } exit_node = @{ name = $exitNode.name; id = $exitNode.id } resource_id = $resourceId route_intents = @{ primary_route_intent_id = $primaryRouteID alternate_route_intent_id = $alternateRouteID expired_primary_status = $expiredPrimary.route_intent.lifecycle_status expired_alternate_status = $expiredAlternate.route_intent.lifecycle_status } lease = @{ channel_id = $lease.channel_id status = $lease.status primary_route_id = $lease.primary_route.route_id } batches = @{ warm_requested = $WarmBatchCount warm_accepted = $warmOk during_restart_requested = @($duringRestartResults).Count during_restart_accepted = @($duringRestartResults | Where-Object { $_.ok -eq $true }).Count recovery_requested = $RecoveryBatchCount recovery_accepted = $recoveryOk } route_failures = @{ baseline = $baselineRouteFailures final = $finalRouteFailures delta = ($finalRouteFailures - $baselineRouteFailures) } exit_queue = @{ warm_depth = $warmExitDepth post_restart_baseline_depth = $postRestartBaselineExitDepth recovery_depth = $recoveryExitDepth } restart_attempted = $restartAttempted passed = $true checks = [ordered]@{ production_forwarding_ready = ($readyBefore.report.production_payload_forwarding -eq $true) exit_production_forwarding_ready = ($exitReadyBefore.report.production_payload_forwarding -eq $true) route_intents_visible_before_post = (@($visibleConfig.synthetic_mesh_config.routes | Where-Object { $routeIDs -contains $_.route_id }).Count -ge 2) exit_route_intents_visible_before_post = (@($exitVisibleConfig.synthetic_mesh_config.routes | Where-Object { $routeIDs -contains $_.route_id }).Count -ge 2) entry_runtime_loaded_visible_config = ([string]$loadedConfig.report.config_version -ge [string]$visibleConfig.synthetic_mesh_config.config_version) exit_runtime_loaded_visible_config = ([string]$exitLoadedConfig.report.config_version -ge [string]$exitVisibleConfig.synthetic_mesh_config.config_version) signed_lease_ready = ($lease.status -eq "ready") warm_batches_accepted = ($warmOk -eq $WarmBatchCount) warm_exit_inbox_received = ($warmExitDepth -ge 8) exit_restart_recovered = ($postRestartReady.report.production_payload_forwarding -eq $true -and [string]$postRestartLoadedConfig.report.config_version -ge [string]$postRestartVisibleConfig.synthetic_mesh_config.config_version) recovery_batches_accepted = ($recoveryOk -eq $RecoveryBatchCount) recovery_exit_inbox_grew = ($recoveryExitDepth -ge ($postRestartBaselineExitDepth + 8)) route_intents_expired = ($expiredPrimary.route_intent.lifecycle_status -eq "expired" -and $expiredAlternate.route_intent.lifecycle_status -eq "expired") } telemetry = @{ warm_ingress = $warmIngress.report.ingress recovery_ingress = $recoveryIngress.report.ingress exit_inbox = $finalExitRuntime.report.inbox during_restart_results = $duringRestartResults } } $failedChecks = @($result.checks.GetEnumerator() | Where-Object { $_.Value -ne $true }) if ($failedChecks.Count -gt 0) { throw "C18Z2 failed checks: $($failedChecks.Name -join ', ')" } } finally { if ($primaryRouteID) { try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {} } if ($alternateRouteID) { try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {} } if ($updatersStopped) { try { Start-TestUpdaters } catch { Write-Warning "Could not restart test updaters: $($_.Exception.Message)" } } } $resultFullPath = Join-Path $repoRoot $ResultPath $resultDir = Split-Path -Parent $resultFullPath if (-not (Test-Path $resultDir)) { New-Item -ItemType Directory -Path $resultDir | Out-Null } $result | ConvertTo-Json -Depth 100 | Set-Content -Path $resultFullPath -Encoding UTF8 Write-Host "C18Z2 live service-channel soak smoke passed. Result: $resultFullPath" $result