Files
rdp-proxy/scripts/fabric/c18z2-live-service-channel-soak-smoke.ps1
T
2026-05-12 21:02:29 +03:00

821 lines
32 KiB
PowerShell

param(
[string]$ApiBaseUrl = "http://192.168.200.61:18121/api/v1",
[string]$ClusterID = "cfc0743d-d960-49fb-9de8-96e063d5e4aa",
[string]$ActorUserID = "f67d943f-5397-4b3a-a229-695fe67ad700",
[string]$EntryNodeName = "test-1",
[string]$ExitNodeName = "test-2",
[string]$EntryBaseUrl = "http://192.168.200.61:19131",
[string]$DockerSSH = "test-docker",
[int]$WarmBatchCount = 6,
[int]$RecoveryBatchCount = 8,
[switch]$SkipExitRestart,
[string]$RequiredNodeVersion = "0.2.182",
[string]$ResultPath = "artifacts\c18z2-live-service-channel-soak-smoke-result.json"
)
Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"
Add-Type -AssemblyName System.Net.Http
$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..\..")).ProviderPath
$runId = "c18z2-" + (Get-Date -Format "yyyyMMdd-HHmmss")
$resourceId = "vpn-$runId"
function Invoke-Api {
param(
[string]$Method,
[string]$Path,
[object]$Body = $null
)
$uri = "$ApiBaseUrl$Path"
try {
if ($null -eq $Body) {
return Invoke-RestMethod -Method $Method -Uri $uri -TimeoutSec 30
}
return Invoke-RestMethod -Method $Method -Uri $uri -ContentType "application/json" -Body ($Body | ConvertTo-Json -Depth 80) -TimeoutSec 30
}
catch {
$statusCode = $null
if ($_.Exception.Response) {
$statusCode = [int]$_.Exception.Response.StatusCode
}
$details = $_.ErrorDetails.Message
if (-not $details) {
$details = $_.Exception.Message
}
throw "$Method $Path failed with HTTP $statusCode`: $details"
}
}
function Get-NodeByName {
param([string]$Name)
$nodes = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes?actor_user_id=$ActorUserID").nodes
$node = @($nodes | Where-Object { $_.name -eq $Name }) | Select-Object -First 1
if ($null -eq $node) {
throw "Node '$Name' was not found in cluster $ClusterID"
}
return $node
}
function Get-MeshPort {
param([string]$Name)
switch ($Name) {
"test-1" { return 19131 }
"test-2" { return 19132 }
"test-3" { return 19133 }
default { return 19131 }
}
}
function Enable-TestMeshListener {
param([object]$Node)
$port = Get-MeshPort -Name $Node.name
Invoke-Api -Method PUT -Path "/clusters/$ClusterID/nodes/$($Node.id)/workloads/mesh-listener/desired" -Body @{
actor_user_id = $ActorUserID
desired_state = "enabled"
runtime_mode = "container"
version = "c18z2-live-fsc-soak"
config = @{
listen_addr = "0.0.0.0:$port"
listen_port_mode = "manual"
advertise_endpoint = "http://192.168.200.61:$port"
advertise_transport = "direct_http"
connectivity_mode = "private_lan"
nat_type = "none"
region = "docker-test"
production_forwarding = $true
}
environment = @{}
} | Out-Null
}
function Clear-OldSmokeRouteIntents {
param(
[string]$SourceNodeID,
[string]$DestinationNodeID
)
$items = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/mesh/route-intents?actor_user_id=$ActorUserID").route_intents
foreach ($item in @($items)) {
if ([string]$item.lifecycle_status -ne "active") {
continue
}
if ([string]$item.service_class -ne "vpn_packets") {
continue
}
if ([string]$item.source_selector.node_id -ne $SourceNodeID -or [string]$item.destination_selector.node_id -ne $DestinationNodeID) {
continue
}
$smoke = ""
if ($null -ne $item.policy -and $null -ne $item.policy.metadata) {
$prop = $item.policy.metadata.PSObject.Properties["smoke"]
if ($null -ne $prop) {
$smoke = [string]$prop.Value
}
}
if ($smoke -ne "c18z1_live_service_channel_ingress" -and $smoke -ne "c18z2_live_service_channel_soak") {
continue
}
Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$($item.id)/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null
}
}
function New-RouteIntent {
param(
[string]$SourceNodeID,
[string]$DestinationNodeID,
[int]$Priority,
[string]$Label
)
$expiresAt = (Get-Date).ToUniversalTime().AddMinutes(10).ToString("o")
return Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents" -Body @{
actor_user_id = $ActorUserID
source_selector = @{ node_id = $SourceNodeID }
destination_selector = @{ node_id = $DestinationNodeID }
service_class = "vpn_packets"
priority = $Priority
policy = @{
synthetic_enabled = $true
route_version = "$runId-$Label"
policy_version = "$runId-$Label"
peer_directory_version = "$runId-$Label"
hops = @($SourceNodeID, $DestinationNodeID)
allowed_channels = @("vpn_packet", "fabric_control")
max_ttl = 8
max_hops = 8
expires_at = $expiresAt
metadata = @{
smoke = "c18z2_live_service_channel_soak"
run_id = $runId
label = $Label
}
}
}
}
function Get-SyntheticConfig {
param([string]$NodeID)
return Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/mesh/synthetic-config?actor_user_id=$ActorUserID"
}
function Get-LatestHeartbeat {
param([string]$NodeID)
return (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$NodeID/heartbeats?actor_user_id=$ActorUserID&limit=1").heartbeats[0]
}
function Get-LatestRuntimeReport {
param([string]$NodeID)
$hb = Get-LatestHeartbeat -NodeID $NodeID
return @{
heartbeat = $hb
report = $hb.metadata.fabric_service_channel_runtime_report
}
}
function Wait-ForRuntimeReady {
param(
[string]$NodeID,
[int]$MinRoutes,
[int]$TimeoutSeconds = 90
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$report = $latest.report
if ($null -ne $report -and
$report.enabled -eq $true -and
$report.production_payload_forwarding -eq $true -and
[int]$report.route_candidate_total -ge $MinRoutes) {
return $latest
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for production service-channel runtime ready on node $NodeID"
}
function Wait-ForRuntimeConfigVersion {
param(
[string]$NodeID,
[string]$ConfigVersion,
[int]$TimeoutSeconds = 90
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
if ($null -ne $latest.report) {
$loadedVersion = [string]$latest.report.config_version
if ($loadedVersion -ge $ConfigVersion) {
return $latest
}
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for node $NodeID to load synthetic config $ConfigVersion"
}
function Wait-ForRouteIntentVisible {
param(
[string]$NodeID,
[string[]]$RouteIDs,
[int]$TimeoutSeconds = 60
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$config = Get-SyntheticConfig -NodeID $NodeID
$routes = @($config.synthetic_mesh_config.routes)
$present = @($routes | Where-Object { $RouteIDs -contains $_.route_id })
if ($present.Count -ge $RouteIDs.Count) {
return $config
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for routes '$($RouteIDs -join ",")' in synthetic config for node $NodeID"
}
function New-ServiceChannelLease {
param(
[string]$EntryNodeID,
[string]$ExitNodeID
)
return (Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/leases" -Body @{
actor_user_id = $ActorUserID
organization_id = "org-c18z1-smoke"
user_id = $ActorUserID
resource_id = $resourceId
service_class = "vpn_packets"
entry_node_ids = @($EntryNodeID)
exit_node_ids = @($ExitNodeID)
preferred_entry_node_id = $EntryNodeID
preferred_exit_node_id = $ExitNodeID
allowed_channels = @("vpn_packet", "bulk", "control")
ttl_seconds = 300
metadata = @{
smoke = "c18z2_live_service_channel_soak"
run_id = $runId
}
}).fabric_service_channel_lease
}
function ConvertTo-Base64UrlJson {
param([object]$Value)
$json = $Value | ConvertTo-Json -Depth 80 -Compress
$bytes = [System.Text.Encoding]::UTF8.GetBytes($json)
return [Convert]::ToBase64String($bytes).TrimEnd("=").Replace("+", "-").Replace("/", "_")
}
function Get-ObjectPropertyValue {
param(
[object]$Object,
[string]$Name
)
if ($null -eq $Object) {
return $null
}
$prop = $Object.PSObject.Properties[$Name]
if ($null -eq $prop) {
return $null
}
return $prop.Value
}
function New-TestIPv4UDPPacket {
param([int]$SourcePort)
$payload = [System.Text.Encoding]::ASCII.GetBytes("c18z1-$SourcePort")
$totalLength = 20 + 8 + $payload.Length
$packet = New-Object byte[] $totalLength
$packet[0] = 0x45
$packet[1] = 0
$packet[2] = [byte](($totalLength -shr 8) -band 0xff)
$packet[3] = [byte]($totalLength -band 0xff)
$packet[8] = 64
$packet[9] = 17
$packet[12] = 10; $packet[13] = 18; $packet[14] = 1; $packet[15] = 10
$packet[16] = 10; $packet[17] = 18; $packet[18] = 2; $packet[19] = 20
$udpOffset = 20
$destPort = 3389
$udpLength = 8 + $payload.Length
$packet[$udpOffset] = [byte](($SourcePort -shr 8) -band 0xff)
$packet[$udpOffset + 1] = [byte]($SourcePort -band 0xff)
$packet[$udpOffset + 2] = [byte](($destPort -shr 8) -band 0xff)
$packet[$udpOffset + 3] = [byte]($destPort -band 0xff)
$packet[$udpOffset + 4] = [byte](($udpLength -shr 8) -band 0xff)
$packet[$udpOffset + 5] = [byte]($udpLength -band 0xff)
[Array]::Copy($payload, 0, $packet, 28, $payload.Length)
return $packet
}
function New-PacketBatchBody {
param([byte[][]]$Packets)
$stream = [System.IO.MemoryStream]::new()
foreach ($packet in $Packets) {
$length = $packet.Length
$stream.WriteByte([byte](($length -shr 24) -band 0xff))
$stream.WriteByte([byte](($length -shr 16) -band 0xff))
$stream.WriteByte([byte](($length -shr 8) -band 0xff))
$stream.WriteByte([byte]($length -band 0xff))
$stream.Write($packet, 0, $packet.Length)
}
return $stream.ToArray()
}
function Invoke-ServiceChannelPost {
param(
[object]$Lease,
[int]$PortStart
)
$packets = @()
for ($i = 0; $i -lt 8; $i++) {
$packets += ,(New-TestIPv4UDPPacket -SourcePort ($PortStart + $i))
}
$path = $Lease.entry_http.path_template.
Replace("{cluster_id}", $ClusterID).
Replace("{channel_id}", $Lease.channel_id).
Replace("{resource_id}", $resourceId)
$url = "$EntryBaseUrl$path`?batch=true"
$headers = @{
"X-RAP-Service-Channel-Token" = $Lease.token.token
"X-RAP-Fabric-Channel-ID" = $Lease.channel_id
"X-RAP-Service-Class" = "vpn_packets"
"X-RAP-Channel-Class" = "vpn_packet"
"X-RAP-Service-Channel-Authority-Payload" = ConvertTo-Base64UrlJson -Value $Lease.authority_payload
"X-RAP-Service-Channel-Authority-Signature" = ConvertTo-Base64UrlJson -Value $Lease.authority_signature
}
$body = New-PacketBatchBody -Packets $packets
$client = [System.Net.Http.HttpClient]::new()
try {
$client.Timeout = [TimeSpan]::FromSeconds(30)
$request = [System.Net.Http.HttpRequestMessage]::new([System.Net.Http.HttpMethod]::Post, $url)
foreach ($header in $headers.GetEnumerator()) {
[void]$request.Headers.TryAddWithoutValidation($header.Key, [string]$header.Value)
}
$content = [System.Net.Http.ByteArrayContent]::new($body)
$content.Headers.ContentType = [System.Net.Http.Headers.MediaTypeHeaderValue]::Parse("application/vnd.rap.vpn-packet-batch.v1")
$request.Content = $content
$response = $client.SendAsync($request).GetAwaiter().GetResult()
$responseBody = $response.Content.ReadAsStringAsync().GetAwaiter().GetResult()
if (-not $response.IsSuccessStatusCode) {
throw "Service-channel POST $url failed with HTTP $([int]$response.StatusCode): $responseBody"
}
return [pscustomobject]@{
StatusCode = [int]$response.StatusCode
Body = $responseBody
}
}
finally {
$client.Dispose()
}
}
function Get-IngressSendPackets {
param([string]$NodeID)
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$ingress = $latest.report.ingress
$sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets"
if ($null -eq $sendPackets) {
return 0
}
return [int]$sendPackets
}
function Get-IngressRouteFailures {
param([string]$NodeID)
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$ingress = $latest.report.ingress
$failures = Get-ObjectPropertyValue -Object $ingress -Name "send_route_failures"
if ($null -eq $failures) {
return 0
}
return [int]$failures
}
function Get-ExitQueueDepth {
param(
[string]$NodeID,
[string]$VPNConnectionID
)
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$queueKey = "$VPNConnectionID`:client_to_gateway"
$depths = $latest.report.inbox.queue_depths
if ($null -eq $depths) {
return 0
}
$prop = $depths.PSObject.Properties[$queueKey]
if ($null -eq $prop) {
return 0
}
return [int]$prop.Value
}
function Wait-ForExitQueueDepth {
param(
[string]$NodeID,
[string]$VPNConnectionID,
[int]$MinDepth,
[int]$TimeoutSeconds = 90
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$depth = Get-ExitQueueDepth -NodeID $NodeID -VPNConnectionID $VPNConnectionID
if ($depth -ge $MinDepth) {
return $depth
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for exit queue depth >= $MinDepth on node $NodeID"
}
function Invoke-ServiceChannelPostSafe {
param(
[object]$Lease,
[int]$PortStart
)
try {
$response = Invoke-ServiceChannelPost -Lease $Lease -PortStart $PortStart
return [pscustomobject]@{
ok = $true
status_code = [int]$response.StatusCode
error = ""
}
}
catch {
return [pscustomobject]@{
ok = $false
status_code = 0
error = $_.Exception.Message
}
}
}
function Send-BatchSeries {
param(
[object]$Lease,
[int]$Count,
[int]$PortBase,
[int]$DelayMilliseconds = 100
)
$results = @()
for ($i = 0; $i -lt $Count; $i++) {
$results += Invoke-ServiceChannelPostSafe -Lease $Lease -PortStart ($PortBase + ($i * 100))
if ($DelayMilliseconds -gt 0) {
Start-Sleep -Milliseconds $DelayMilliseconds
}
}
return $results
}
function Invoke-RemoteDocker {
param([string]$Command)
& ssh $DockerSSH $Command
if ($LASTEXITCODE -ne 0) {
throw "ssh $DockerSSH command failed: $Command"
}
}
function Stop-TestUpdaters {
Invoke-RemoteDocker -Command "docker stop rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true"
}
function Start-TestUpdaters {
Invoke-RemoteDocker -Command "docker start rap_host_agent_updater_test-1 rap_host_agent_updater_test-2 rap_host_agent_updater_test-3 >/dev/null 2>&1 || true"
}
function Restart-ExitContainer {
param([string]$Name)
$containerName = "rap_test_node_" + $Name.Replace("-", "_")
Invoke-RemoteDocker -Command "docker restart $containerName >/dev/null"
}
function Wait-ForIngressRoute {
param(
[string]$NodeID,
[string]$RouteID,
[int]$MinSendPackets,
[int]$TimeoutSeconds = 45
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$ingress = $latest.report.ingress
$sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets"
$selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id"
if ($null -ne $ingress -and
[int]$sendPackets -ge $MinSendPackets -and
[string]$selectedRoute -eq $RouteID) {
return $latest
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for ingress telemetry route=$RouteID packets>=$MinSendPackets on node $NodeID"
}
function Wait-ForIngressAnyRoute {
param(
[string]$NodeID,
[string[]]$RouteIDs,
[int]$MinSendPackets,
[int]$TimeoutSeconds = 45
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$ingress = $latest.report.ingress
$sendPackets = Get-ObjectPropertyValue -Object $ingress -Name "send_packets"
$selectedRoute = Get-ObjectPropertyValue -Object $ingress -Name "last_selected_route_id"
if ($null -ne $ingress -and
[int]$sendPackets -ge $MinSendPackets -and
$RouteIDs -contains [string]$selectedRoute) {
return $latest
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for ingress telemetry routes='$($RouteIDs -join ",")' packets>=$MinSendPackets on node $NodeID"
}
function Wait-ForExitInbox {
param(
[string]$NodeID,
[string]$VPNConnectionID,
[int]$TimeoutSeconds = 45
)
$queueKey = "$VPNConnectionID`:client_to_gateway"
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$depths = $latest.report.inbox.queue_depths
if ($null -ne $depths) {
$prop = $depths.PSObject.Properties[$queueKey]
if ($null -ne $prop -and [int]$prop.Value -gt 0) {
return $latest
}
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for exit inbox queue '$queueKey' on node $NodeID"
}
function Send-FeedbackHeartbeat {
param(
[string]$EntryNodeID,
[string]$BadRouteID,
[string]$GoodRouteID
)
return Invoke-Api -Method POST -Path "/clusters/$ClusterID/nodes/$EntryNodeID/heartbeats" -Body @{
health_status = "healthy"
reported_version = $RequiredNodeVersion
capabilities = @{
native_node_agent = $true
fabric_service_channel_runtime = $true
fabric_service_channel_route_manager = $true
smoke_feedback_injection = "c18z1"
}
service_states = @{ smoke = "c18z1_live_ingress_feedback" }
metadata = @{
fabric_service_channel_runtime_report = @{
schema_version = "c18l.fabric_service_channel_runtime_report.v1"
ingress = @{
flow_scheduler = @{
channel_stats = @{
"c18z1-live-flow" = @{
last_route_id = $GoodRouteID
last_failed_route_id = $BadRouteID
last_error = "c18z1 forced stale route after live packet ingress"
consecutive_failures = 3
stall_count = 1
last_send_duration_ms = 250
route_rebuild_recommended = $true
degraded_fallback_recommended = $false
}
}
}
}
}
smoke = @{
name = "c18z1_live_service_channel_ingress"
run_id = $runId
}
}
}
}
function Wait-ForConfigDecision {
param(
[string]$NodeID,
[string]$BadRouteID,
[string]$ExpectedReplacementID,
[int]$TimeoutSeconds = 60
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$config = Get-SyntheticConfig -NodeID $NodeID
$decisions = @($config.synthetic_mesh_config.route_path_decisions.decisions)
$decision = @($decisions | Where-Object {
$_.route_id -eq $BadRouteID -and
$_.rebuild_status -eq "applied" -and
$_.replacement_route_id -eq $ExpectedReplacementID
}) | Select-Object -First 1
if ($null -ne $decision) {
return @{
config = $config
decision = $decision
}
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for applied rebuild decision $BadRouteID -> $ExpectedReplacementID"
}
function Wait-ForAppliedRebuildTransition {
param(
[string]$NodeID,
[string]$BadRouteID = "",
[string]$ReplacementRouteID = "",
[int]$TimeoutSeconds = 90
)
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
do {
$latest = Get-LatestRuntimeReport -NodeID $NodeID
$transition = $null
if ($null -ne $latest.report -and $null -ne $latest.report.ingress) {
$prop = $latest.report.ingress.PSObject.Properties["route_manager_transition"]
if ($null -ne $prop) {
$transition = $prop.Value
}
}
if ($null -ne $transition -and [string]$transition.status -eq "applied_rebuild") {
return $latest
}
if ($BadRouteID -ne "" -and $ReplacementRouteID -ne "") {
Send-FeedbackHeartbeat -EntryNodeID $NodeID -BadRouteID $BadRouteID -GoodRouteID $ReplacementRouteID | Out-Null
}
Start-Sleep -Seconds 2
} while ((Get-Date) -lt $deadline)
throw "Timed out waiting for node route-manager transition applied_rebuild on node $NodeID"
}
$entryNode = Get-NodeByName -Name $EntryNodeName
$exitNode = Get-NodeByName -Name $ExitNodeName
$primaryRouteID = ""
$alternateRouteID = ""
$updatersStopped = $false
$result = $null
try {
Stop-TestUpdaters
$updatersStopped = $true
Enable-TestMeshListener -Node $entryNode
Enable-TestMeshListener -Node $exitNode
Clear-OldSmokeRouteIntents -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id
$primaryIntent = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Priority 2000000000 -Label "primary"
$alternateIntent = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Priority 1999999999 -Label "alternate"
$primaryRouteID = $primaryIntent.route_intent.id
$alternateRouteID = $alternateIntent.route_intent.id
$routeIDs = @($primaryRouteID, $alternateRouteID)
$visibleConfig = Wait-ForRouteIntentVisible -NodeID $entryNode.id -RouteIDs $routeIDs
$exitVisibleConfig = Wait-ForRouteIntentVisible -NodeID $exitNode.id -RouteIDs $routeIDs
$readyBefore = Wait-ForRuntimeReady -NodeID $entryNode.id -MinRoutes 2
$exitReadyBefore = Wait-ForRuntimeReady -NodeID $exitNode.id -MinRoutes 0
$loadedConfig = Wait-ForRuntimeConfigVersion -NodeID $entryNode.id -ConfigVersion $visibleConfig.synthetic_mesh_config.config_version
$exitLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $exitNode.id -ConfigVersion $exitVisibleConfig.synthetic_mesh_config.config_version
$lease = New-ServiceChannelLease -EntryNodeID $entryNode.id -ExitNodeID $exitNode.id
if ($lease.status -ne "ready") {
throw "Lease status was '$($lease.status)', want ready"
}
if ($routeIDs -notcontains [string]$lease.primary_route.route_id) {
throw "Lease primary route was '$($lease.primary_route.route_id)', want one of smoke routes"
}
$baselineSendPackets = Get-IngressSendPackets -NodeID $entryNode.id
$baselineRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id
$warmResults = Send-BatchSeries -Lease $lease -Count $WarmBatchCount -PortBase 43000 -DelayMilliseconds 100
$warmOk = @($warmResults | Where-Object { $_.ok -eq $true }).Count
if ($warmOk -lt $WarmBatchCount) {
throw "Warm service-channel soak accepted $warmOk/$WarmBatchCount batches"
}
$warmIngress = Wait-ForIngressAnyRoute -NodeID $entryNode.id -RouteIDs $routeIDs -MinSendPackets ($baselineSendPackets + ($WarmBatchCount * 8)) -TimeoutSeconds 90
$warmExitDepth = Wait-ForExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId -MinDepth 8 -TimeoutSeconds 90
$restartAttempted = $false
$duringRestartResults = @()
$postRestartBaselineExitDepth = $warmExitDepth
if (-not $SkipExitRestart) {
$restartAttempted = $true
Restart-ExitContainer -Name $ExitNodeName
$duringRestartResults = Send-BatchSeries -Lease $lease -Count 3 -PortBase 53000 -DelayMilliseconds 150
$exitNode = Get-NodeByName -Name $ExitNodeName
$postRestartVisibleConfig = Wait-ForRouteIntentVisible -NodeID $exitNode.id -RouteIDs $routeIDs -TimeoutSeconds 120
$postRestartReady = Wait-ForRuntimeReady -NodeID $exitNode.id -MinRoutes 0 -TimeoutSeconds 120
$postRestartLoadedConfig = Wait-ForRuntimeConfigVersion -NodeID $exitNode.id -ConfigVersion $postRestartVisibleConfig.synthetic_mesh_config.config_version -TimeoutSeconds 120
$postRestartBaselineExitDepth = Get-ExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId
}
else {
$postRestartVisibleConfig = $exitVisibleConfig
$postRestartReady = $exitReadyBefore
$postRestartLoadedConfig = $exitLoadedConfig
}
$recoveryBaseline = Get-IngressSendPackets -NodeID $entryNode.id
$recoveryResults = Send-BatchSeries -Lease $lease -Count $RecoveryBatchCount -PortBase 63000 -DelayMilliseconds 100
$recoveryOk = @($recoveryResults | Where-Object { $_.ok -eq $true }).Count
if ($recoveryOk -lt $RecoveryBatchCount) {
throw "Recovery service-channel soak accepted $recoveryOk/$RecoveryBatchCount batches"
}
$recoveryIngress = Wait-ForIngressAnyRoute -NodeID $entryNode.id -RouteIDs $routeIDs -MinSendPackets ($recoveryBaseline + ($RecoveryBatchCount * 8)) -TimeoutSeconds 120
$recoveryExitDepth = Wait-ForExitQueueDepth -NodeID $exitNode.id -VPNConnectionID $resourceId -MinDepth ($postRestartBaselineExitDepth + 8) -TimeoutSeconds 120
$finalExitRuntime = Get-LatestRuntimeReport -NodeID $exitNode.id
$finalRouteFailures = Get-IngressRouteFailures -NodeID $entryNode.id
$expiredPrimary = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID }
$expiredAlternate = Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID }
$result = [ordered]@{
schema_version = "c18z2.live_service_channel_soak_smoke.v1"
run_id = $runId
base_url = $ApiBaseUrl
entry_base_url = $EntryBaseUrl
cluster_id = $ClusterID
entry_node = @{ name = $entryNode.name; id = $entryNode.id }
exit_node = @{ name = $exitNode.name; id = $exitNode.id }
resource_id = $resourceId
route_intents = @{
primary_route_intent_id = $primaryRouteID
alternate_route_intent_id = $alternateRouteID
expired_primary_status = $expiredPrimary.route_intent.lifecycle_status
expired_alternate_status = $expiredAlternate.route_intent.lifecycle_status
}
lease = @{
channel_id = $lease.channel_id
status = $lease.status
primary_route_id = $lease.primary_route.route_id
}
batches = @{
warm_requested = $WarmBatchCount
warm_accepted = $warmOk
during_restart_requested = @($duringRestartResults).Count
during_restart_accepted = @($duringRestartResults | Where-Object { $_.ok -eq $true }).Count
recovery_requested = $RecoveryBatchCount
recovery_accepted = $recoveryOk
}
route_failures = @{
baseline = $baselineRouteFailures
final = $finalRouteFailures
delta = ($finalRouteFailures - $baselineRouteFailures)
}
exit_queue = @{
warm_depth = $warmExitDepth
post_restart_baseline_depth = $postRestartBaselineExitDepth
recovery_depth = $recoveryExitDepth
}
restart_attempted = $restartAttempted
passed = $true
checks = [ordered]@{
production_forwarding_ready = ($readyBefore.report.production_payload_forwarding -eq $true)
exit_production_forwarding_ready = ($exitReadyBefore.report.production_payload_forwarding -eq $true)
route_intents_visible_before_post = (@($visibleConfig.synthetic_mesh_config.routes | Where-Object { $routeIDs -contains $_.route_id }).Count -ge 2)
exit_route_intents_visible_before_post = (@($exitVisibleConfig.synthetic_mesh_config.routes | Where-Object { $routeIDs -contains $_.route_id }).Count -ge 2)
entry_runtime_loaded_visible_config = ([string]$loadedConfig.report.config_version -ge [string]$visibleConfig.synthetic_mesh_config.config_version)
exit_runtime_loaded_visible_config = ([string]$exitLoadedConfig.report.config_version -ge [string]$exitVisibleConfig.synthetic_mesh_config.config_version)
signed_lease_ready = ($lease.status -eq "ready")
warm_batches_accepted = ($warmOk -eq $WarmBatchCount)
warm_exit_inbox_received = ($warmExitDepth -ge 8)
exit_restart_recovered = ($postRestartReady.report.production_payload_forwarding -eq $true -and [string]$postRestartLoadedConfig.report.config_version -ge [string]$postRestartVisibleConfig.synthetic_mesh_config.config_version)
recovery_batches_accepted = ($recoveryOk -eq $RecoveryBatchCount)
recovery_exit_inbox_grew = ($recoveryExitDepth -ge ($postRestartBaselineExitDepth + 8))
route_intents_expired = ($expiredPrimary.route_intent.lifecycle_status -eq "expired" -and $expiredAlternate.route_intent.lifecycle_status -eq "expired")
}
telemetry = @{
warm_ingress = $warmIngress.report.ingress
recovery_ingress = $recoveryIngress.report.ingress
exit_inbox = $finalExitRuntime.report.inbox
during_restart_results = $duringRestartResults
}
}
$failedChecks = @($result.checks.GetEnumerator() | Where-Object { $_.Value -ne $true })
if ($failedChecks.Count -gt 0) {
throw "C18Z2 failed checks: $($failedChecks.Name -join ', ')"
}
}
finally {
if ($primaryRouteID) {
try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {}
}
if ($alternateRouteID) {
try { Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null } catch {}
}
if ($updatersStopped) {
try { Start-TestUpdaters } catch { Write-Warning "Could not restart test updaters: $($_.Exception.Message)" }
}
}
$resultFullPath = Join-Path $repoRoot $ResultPath
$resultDir = Split-Path -Parent $resultFullPath
if (-not (Test-Path $resultDir)) {
New-Item -ItemType Directory -Path $resultDir | Out-Null
}
$result | ConvertTo-Json -Depth 100 | Set-Content -Path $resultFullPath -Encoding UTF8
Write-Host "C18Z2 live service-channel soak smoke passed. Result: $resultFullPath"
$result