511 lines
29 KiB
PowerShell
511 lines
29 KiB
PowerShell
param(
|
|
[string]$ApiBaseUrl = "http://192.168.200.61:18121/api/v1",
|
|
[string]$ClusterID = "cfc0743d-d960-49fb-9de8-96e063d5e4aa",
|
|
[string]$ActorUserID = "f67d943f-5397-4b3a-a229-695fe67ad700",
|
|
[string]$EntryNodeName = "test-1",
|
|
[string]$RelayNodeName = "test-3",
|
|
[string]$ExitNodeName = "test-2",
|
|
[string]$EntryBaseUrl = "http://192.168.200.61:19131",
|
|
[string]$DockerSSH = "test-docker",
|
|
[string]$ExpectedBackendImage = "rap-backend:fabric-service-channel-0.2.281-c18z109",
|
|
[string]$ExpectedNodeAgentImage = "rap-node-agent:0.2.270-c18z95",
|
|
[string]$ResultPath = "artifacts\c18z82-service-channel-no-safe-recovery-smoke-result.json"
|
|
)
|
|
|
|
Set-StrictMode -Version Latest
|
|
$ErrorActionPreference = "Stop"
|
|
|
|
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
|
|
$repoRoot = (Resolve-Path (Join-Path $scriptDir "..\..")).ProviderPath
|
|
$runId = "c18z82-" + (Get-Date -Format "yyyyMMdd-HHmmss")
|
|
|
|
function Invoke-Api {
|
|
param([string]$Method, [string]$Path, [object]$Body = $null)
|
|
$params = @{ Method = $Method; Uri = "$ApiBaseUrl$Path"; TimeoutSec = 30 }
|
|
if ($null -ne $Body) {
|
|
$params.ContentType = "application/json"
|
|
$params.Body = ($Body | ConvertTo-Json -Depth 80)
|
|
}
|
|
return Invoke-RestMethod @params
|
|
}
|
|
|
|
function Get-PropertyValue {
|
|
param([object]$Item, [string]$Name, [object]$Default = $null)
|
|
if ($null -eq $Item) { return $Default }
|
|
$property = $Item.PSObject.Properties[$Name]
|
|
if ($null -eq $property) { return $Default }
|
|
return $property.Value
|
|
}
|
|
|
|
function Get-NodeByName {
|
|
param([string]$Name)
|
|
$nodes = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes?actor_user_id=$ActorUserID").nodes
|
|
$node = @($nodes | Where-Object { $_.name -eq $Name }) | Select-Object -First 1
|
|
if ($null -eq $node) { throw "Node '$Name' was not found" }
|
|
return $node
|
|
}
|
|
|
|
function New-IPv4TcpPacket {
|
|
param(
|
|
[byte[]]$Source = @(10, 77, 0, 2),
|
|
[byte[]]$Destination = @(192, 168, 200, 95),
|
|
[int]$SourcePort,
|
|
[int]$DestinationPort = 3389
|
|
)
|
|
$packet = [byte[]]::new(40)
|
|
$packet[0] = 0x45
|
|
$packet[2] = 0
|
|
$packet[3] = 40
|
|
$packet[8] = 64
|
|
$packet[9] = 6
|
|
[Array]::Copy($Source, 0, $packet, 12, 4)
|
|
[Array]::Copy($Destination, 0, $packet, 16, 4)
|
|
$packet[20] = [byte](($SourcePort -shr 8) -band 0xff)
|
|
$packet[21] = [byte]($SourcePort -band 0xff)
|
|
$packet[22] = [byte](($DestinationPort -shr 8) -band 0xff)
|
|
$packet[23] = [byte]($DestinationPort -band 0xff)
|
|
$packet[32] = 0x50
|
|
return $packet
|
|
}
|
|
|
|
function ConvertTo-VPNPacketBatch {
|
|
param([byte[][]]$Packets)
|
|
$buffer = New-Object System.Collections.Generic.List[byte]
|
|
foreach ($packet in $Packets) {
|
|
if ($null -eq $packet -or $packet.Length -eq 0) { continue }
|
|
$size = [uint32]$packet.Length
|
|
$buffer.Add([byte](($size -shr 24) -band 0xff))
|
|
$buffer.Add([byte](($size -shr 16) -band 0xff))
|
|
$buffer.Add([byte](($size -shr 8) -band 0xff))
|
|
$buffer.Add([byte]($size -band 0xff))
|
|
foreach ($value in $packet) {
|
|
$buffer.Add($value)
|
|
}
|
|
}
|
|
return $buffer.ToArray()
|
|
}
|
|
|
|
function New-RouteIntent {
|
|
param([string]$SourceNodeID, [string]$DestinationNodeID, [string[]]$Hops)
|
|
$expiresAt = (Get-Date).ToUniversalTime().AddMinutes(5).ToString("o")
|
|
return (Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents" -Body @{
|
|
actor_user_id = $ActorUserID
|
|
source_selector = @{ node_id = $SourceNodeID }
|
|
destination_selector = @{ node_id = $DestinationNodeID }
|
|
service_class = "vpn_packets"
|
|
priority = 2100000000
|
|
policy = @{
|
|
synthetic_enabled = $true
|
|
route_version = "$runId-primary"
|
|
policy_version = "$runId-primary"
|
|
peer_directory_version = "$runId-primary"
|
|
hops = @($Hops)
|
|
allowed_channels = @("vpn_packet", "fabric_control")
|
|
max_ttl = 8
|
|
max_hops = 8
|
|
expires_at = $expiresAt
|
|
metadata = @{ smoke = "c18z82_service_channel_no_safe_recovery"; run_id = $runId }
|
|
}
|
|
}).route_intent
|
|
}
|
|
|
|
function Disable-ExistingRouteIntents {
|
|
param([string]$SourceNodeID, [string]$DestinationNodeID)
|
|
$items = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/mesh/route-intents?actor_user_id=$ActorUserID").route_intents
|
|
foreach ($item in @($items)) {
|
|
if ([string](Get-PropertyValue -Item $item -Name "status" -Default "") -ne "active") { continue }
|
|
if ([string](Get-PropertyValue -Item $item -Name "service_class" -Default "") -ne "vpn_packets") { continue }
|
|
$sourceSelector = Get-PropertyValue -Item $item -Name "source_selector" -Default $null
|
|
$destinationSelector = Get-PropertyValue -Item $item -Name "destination_selector" -Default $null
|
|
if ([string](Get-PropertyValue -Item $sourceSelector -Name "node_id" -Default "") -ne $SourceNodeID) { continue }
|
|
if ([string](Get-PropertyValue -Item $destinationSelector -Name "node_id" -Default "") -ne $DestinationNodeID) { continue }
|
|
[void](Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$($item.id)/disable" -Body @{
|
|
actor_user_id = $ActorUserID
|
|
reason = "c18z82 isolate no-safe-recovery smoke route pair"
|
|
})
|
|
}
|
|
}
|
|
|
|
function Send-DegradedHeartbeat {
|
|
param([string]$EntryNodeID, [string]$PrimaryRouteID)
|
|
$observedAt = (Get-Date).ToUniversalTime().ToString("o")
|
|
return Invoke-Api -Method POST -Path "/clusters/$ClusterID/nodes/$EntryNodeID/heartbeats" -Body @{
|
|
health_status = "healthy"
|
|
reported_version = "0.2.256-c18z82"
|
|
capabilities = @{
|
|
fabric_service_channel_runtime = $true
|
|
fabric_service_channel_route_quality_feedback = $true
|
|
smoke_feedback_injection = "c18z77"
|
|
}
|
|
service_states = @{ smoke = "c18z82_primary_degraded_alternate_after_lease" }
|
|
metadata = @{
|
|
fabric_service_channel_runtime_report = @{
|
|
schema_version = "c18l.fabric_service_channel_runtime_report.v1"
|
|
config_version = "$runId-primary"
|
|
cluster_id = $ClusterID
|
|
local_node_id = $EntryNodeID
|
|
observed_at = $observedAt
|
|
ingress = @{
|
|
flow_scheduler = @{
|
|
schema_version = "rap.fabric_flow_scheduler.v1"
|
|
service_neutral = $true
|
|
service_mode = "application_protocol_agnostic"
|
|
channel_stats = @{
|
|
"c18z82-primary-degraded" = @{
|
|
last_route_id = $PrimaryRouteID
|
|
last_failed_route_id = $PrimaryRouteID
|
|
route_generation = "$runId-primary"
|
|
last_error = "c18z82 primary route degraded; alternate added after lease"
|
|
last_send_duration_ms = 1200
|
|
consecutive_failures = 3
|
|
stall_count = 2
|
|
route_rebuild_recommended = $true
|
|
degraded_fallback_recommended = $false
|
|
quality_window_sample_count = 8
|
|
quality_window_success_count = 2
|
|
quality_window_failure_count = 3
|
|
quality_window_slow_count = 2
|
|
quality_window_drop_count = 0
|
|
quality_window_avg_latency_ms = 1200
|
|
quality_window_last_updated_at = $observedAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
smoke = @{ name = "c18z82_service_channel_no_safe_recovery"; run_id = $runId }
|
|
}
|
|
}
|
|
}
|
|
|
|
function Send-ReplacementDegradedHeartbeat {
|
|
param([string]$EntryNodeID, [string]$ReplacementRouteID, [string]$RouteGeneration)
|
|
$observedAt = (Get-Date).ToUniversalTime().ToString("o")
|
|
return Invoke-Api -Method POST -Path "/clusters/$ClusterID/nodes/$EntryNodeID/heartbeats" -Body @{
|
|
health_status = "healthy"
|
|
reported_version = "0.2.256-c18z82"
|
|
capabilities = @{
|
|
fabric_service_channel_runtime = $true
|
|
fabric_service_channel_route_quality_feedback = $true
|
|
smoke_feedback_injection = "c18z82"
|
|
}
|
|
service_states = @{ smoke = "c18z82_replacement_route_degraded_no_safe_recovery" }
|
|
metadata = @{
|
|
fabric_service_channel_runtime_report = @{
|
|
schema_version = "c18l.fabric_service_channel_runtime_report.v1"
|
|
config_version = "$runId-replacement-degraded"
|
|
cluster_id = $ClusterID
|
|
local_node_id = $EntryNodeID
|
|
observed_at = $observedAt
|
|
ingress = @{
|
|
last_selected_route_id = $ReplacementRouteID
|
|
send_route_failures = 4
|
|
flow_scheduler = @{
|
|
schema_version = "rap.fabric_flow_scheduler.v1"
|
|
service_neutral = $true
|
|
service_mode = "application_protocol_agnostic"
|
|
channel_stats = @{
|
|
"c18z82-replacement-degraded" = @{
|
|
last_route_id = $ReplacementRouteID
|
|
last_failed_route_id = $ReplacementRouteID
|
|
route_generation = $RouteGeneration
|
|
last_error = "c18z82 replacement route degraded; no safe recovery route exists"
|
|
last_send_duration_ms = 1500
|
|
consecutive_failures = 4
|
|
stall_count = 2
|
|
route_rebuild_recommended = $true
|
|
degraded_fallback_recommended = $true
|
|
quality_window_sample_count = 10
|
|
quality_window_success_count = 1
|
|
quality_window_failure_count = 4
|
|
quality_window_slow_count = 3
|
|
quality_window_drop_count = 0
|
|
quality_window_avg_latency_ms = 1500
|
|
quality_window_last_updated_at = $observedAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
smoke = @{ name = "c18z82_service_channel_no_safe_recovery"; run_id = $runId }
|
|
}
|
|
}
|
|
}
|
|
|
|
$entryNode = Get-NodeByName -Name $EntryNodeName
|
|
$relayNode = Get-NodeByName -Name $RelayNodeName
|
|
$exitNode = Get-NodeByName -Name $ExitNodeName
|
|
[void](Disable-ExistingRouteIntents -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id)
|
|
$route = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Hops @($entryNode.id, $exitNode.id)
|
|
$lease = (Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/leases" -Body @{
|
|
actor_user_id = $ActorUserID
|
|
organization_id = "smoke-org"
|
|
user_id = "smoke-user"
|
|
resource_id = "c18z82-vpn-smoke"
|
|
service_class = "vpn_packets"
|
|
entry_node_ids = @([string]$entryNode.id)
|
|
exit_node_ids = @([string]$exitNode.id)
|
|
preferred_entry_node_id = [string]$entryNode.id
|
|
preferred_exit_node_id = [string]$exitNode.id
|
|
allowed_channels = @("vpn_packet", "fabric_control")
|
|
ttl_seconds = 180
|
|
metadata = @{ smoke = "c18z82_service_channel_no_safe_recovery"; run_id = $runId }
|
|
}).fabric_service_channel_lease
|
|
$primaryRouteID = [string](Get-PropertyValue -Item (Get-PropertyValue -Item $lease -Name "primary_route" -Default $null) -Name "route_id" -Default $route.id)
|
|
|
|
[void](Send-DegradedHeartbeat -EntryNodeID $entryNode.id -PrimaryRouteID $primaryRouteID)
|
|
|
|
$matchingChannel = $null
|
|
for ($i = 0; $i -lt 10; $i++) {
|
|
Start-Sleep -Seconds 3
|
|
$accessTelemetry = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/fabric/service-channels/access-telemetry?actor_user_id=$ActorUserID&limit=50").fabric_service_channel_access_telemetry
|
|
$matchingChannel = @($accessTelemetry.active_channels | Where-Object { $_.channel_id -eq $lease.channel_id }) | Select-Object -First 1
|
|
if ($null -ne $matchingChannel -and [string](Get-PropertyValue -Item $matchingChannel -Name "remediation_action" -Default "") -eq "rebuild_route") {
|
|
break
|
|
}
|
|
}
|
|
|
|
$command = Get-PropertyValue -Item $matchingChannel -Name "remediation_command" -Default $null
|
|
$commandID = [string](Get-PropertyValue -Item $command -Name "command_id" -Default "")
|
|
$alternate = New-RouteIntent -SourceNodeID $entryNode.id -DestinationNodeID $exitNode.id -Hops @($entryNode.id, $relayNode.id, $exitNode.id)
|
|
$alternateRouteID = [string]$alternate.id
|
|
[void](Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$($entryNode.id)/mesh/synthetic-config")
|
|
|
|
$attempts = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/fabric/service-channels/rebuild-attempts?actor_user_id=$ActorUserID&reporter_node_id=$($entryNode.id)&rebuild_request_id=$commandID&limit=10").rebuild_attempts
|
|
$attempt = @($attempts | Where-Object { $_.rebuild_request_id -eq $commandID }) | Select-Object -First 1
|
|
|
|
$routeManagerDecision = $null
|
|
$routeManagerTransition = $null
|
|
for ($i = 0; $i -lt 12; $i++) {
|
|
Start-Sleep -Seconds 5
|
|
$heartbeats = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$($entryNode.id)/heartbeats?actor_user_id=$ActorUserID&limit=5").heartbeats
|
|
foreach ($heartbeat in @($heartbeats)) {
|
|
$runtimeReport = Get-PropertyValue -Item (Get-PropertyValue -Item $heartbeat -Name "metadata" -Default $null) -Name "fabric_service_channel_runtime_report" -Default $null
|
|
$ingress = Get-PropertyValue -Item $runtimeReport -Name "ingress" -Default $null
|
|
$routeManager = Get-PropertyValue -Item $ingress -Name "route_manager" -Default $null
|
|
$routeManagerTransition = Get-PropertyValue -Item $ingress -Name "route_manager_transition" -Default $null
|
|
$decisions = @()
|
|
if ($null -ne $routeManager -and $routeManager.PSObject.Properties.Name -contains "decisions") {
|
|
$decisions = @($routeManager.decisions)
|
|
}
|
|
$routeManagerDecision = $decisions | Where-Object {
|
|
[string](Get-PropertyValue -Item $_ -Name "rebuild_request_id" -Default "") -eq $commandID -and
|
|
[string](Get-PropertyValue -Item $_ -Name "route_id" -Default "") -eq $primaryRouteID -and
|
|
[string](Get-PropertyValue -Item $_ -Name "replacement_route_id" -Default "") -eq $alternateRouteID
|
|
} | Select-Object -First 1
|
|
if ($null -ne $routeManagerDecision) {
|
|
break
|
|
}
|
|
}
|
|
if ($null -ne $routeManagerDecision) {
|
|
break
|
|
}
|
|
}
|
|
|
|
$packetPath = $lease.entry_http.path_template.
|
|
Replace("{cluster_id}", $ClusterID).
|
|
Replace("{channel_id}", [string]$lease.channel_id).
|
|
Replace("{resource_id}", "c18z82-vpn-smoke")
|
|
$packetUrl = $EntryBaseUrl.TrimEnd("/") + $packetPath
|
|
$headers = @{
|
|
"X-RAP-Service-Channel-Token" = [string]$lease.token.token
|
|
"X-RAP-Fabric-Channel-ID" = [string]$lease.channel_id
|
|
"X-RAP-Service-Class" = "vpn_packets"
|
|
"X-RAP-Channel-Class" = "vpn_packet"
|
|
"X-RAP-Traffic-Class" = "interactive"
|
|
}
|
|
$baselineFallbackLocal = 0
|
|
$baselineRouteFailures = 0
|
|
$baselineFlowDropped = 0
|
|
$baselineSchedulerDropped = 0
|
|
$baselineHeartbeats = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$($entryNode.id)/heartbeats?actor_user_id=$ActorUserID&limit=1").heartbeats
|
|
if (@($baselineHeartbeats).Count -gt 0) {
|
|
$baselineRuntimeReport = Get-PropertyValue -Item (Get-PropertyValue -Item $baselineHeartbeats[0] -Name "metadata" -Default $null) -Name "fabric_service_channel_runtime_report" -Default $null
|
|
$baselineIngress = Get-PropertyValue -Item $baselineRuntimeReport -Name "ingress" -Default $null
|
|
$baselineFlowScheduler = Get-PropertyValue -Item $baselineIngress -Name "flow_scheduler" -Default $null
|
|
$baselineFallbackLocal = [int](Get-PropertyValue -Item $baselineIngress -Name "send_fallback_local" -Default 0)
|
|
$baselineRouteFailures = [int](Get-PropertyValue -Item $baselineIngress -Name "send_route_failures" -Default 0)
|
|
$baselineFlowDropped = [int](Get-PropertyValue -Item $baselineIngress -Name "send_flow_dropped" -Default 0)
|
|
$baselineSchedulerDropped = [int](Get-PropertyValue -Item $baselineFlowScheduler -Name "dropped" -Default 0)
|
|
}
|
|
$trafficPackets = @()
|
|
foreach ($offset in 0..15) {
|
|
$trafficPackets += ,(New-IPv4TcpPacket -SourcePort (54000 + $offset) -DestinationPort 3389)
|
|
}
|
|
$trafficPayload = ConvertTo-VPNPacketBatch -Packets $trafficPackets
|
|
$trafficPayloadPath = Join-Path ([System.IO.Path]::GetTempPath()) "$runId-vpn-packet-batch.bin"
|
|
[System.IO.File]::WriteAllBytes($trafficPayloadPath, [byte[]]$trafficPayload)
|
|
$trafficStarted = Get-Date
|
|
$trafficResponse = Invoke-WebRequest -Method Post -Uri ($packetUrl + "?batch=true") -Headers $headers -InFile $trafficPayloadPath -ContentType "application/vnd.rap.vpn-packet-batch.v1" -TimeoutSec 30
|
|
$trafficDurationMs = [int]((Get-Date) - $trafficStarted).TotalMilliseconds
|
|
$trafficAcceptedBy = [string]$trafficResponse.Headers["X-RAP-Service-Channel-Accepted-By"]
|
|
Remove-Item -Path $trafficPayloadPath -Force -ErrorAction SilentlyContinue
|
|
|
|
$replacementTrafficObserved = $false
|
|
$replacementLastSelected = ""
|
|
$replacementFlowStats = @()
|
|
$postRemediationFallbackLocal = 0
|
|
$postRemediationRouteFailures = 0
|
|
$postRemediationFlowDropped = 0
|
|
$postRemediationSchedulerDropped = 0
|
|
for ($i = 0; $i -lt 12; $i++) {
|
|
Start-Sleep -Seconds 5
|
|
$heartbeats = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$($entryNode.id)/heartbeats?actor_user_id=$ActorUserID&limit=5").heartbeats
|
|
foreach ($heartbeat in @($heartbeats)) {
|
|
$runtimeReport = Get-PropertyValue -Item (Get-PropertyValue -Item $heartbeat -Name "metadata" -Default $null) -Name "fabric_service_channel_runtime_report" -Default $null
|
|
$ingress = Get-PropertyValue -Item $runtimeReport -Name "ingress" -Default $null
|
|
$replacementLastSelected = [string](Get-PropertyValue -Item $ingress -Name "last_selected_route_id" -Default "")
|
|
$postRemediationFallbackLocal = [int](Get-PropertyValue -Item $ingress -Name "send_fallback_local" -Default 0)
|
|
$postRemediationRouteFailures = [int](Get-PropertyValue -Item $ingress -Name "send_route_failures" -Default 0)
|
|
$postRemediationFlowDropped = [int](Get-PropertyValue -Item $ingress -Name "send_flow_dropped" -Default 0)
|
|
$flowScheduler = Get-PropertyValue -Item $ingress -Name "flow_scheduler" -Default $null
|
|
$postRemediationSchedulerDropped = [int](Get-PropertyValue -Item $flowScheduler -Name "dropped" -Default 0)
|
|
$channelStats = Get-PropertyValue -Item $flowScheduler -Name "channel_stats" -Default $null
|
|
if ($null -ne $channelStats) {
|
|
$replacementFlowStats = @()
|
|
foreach ($property in @($channelStats.PSObject.Properties)) {
|
|
$stat = $property.Value
|
|
if ([string](Get-PropertyValue -Item $stat -Name "last_route_id" -Default "") -eq $alternateRouteID) {
|
|
$replacementFlowStats += $stat
|
|
}
|
|
}
|
|
}
|
|
if ($replacementLastSelected -eq $alternateRouteID -and $replacementFlowStats.Count -ge 1) {
|
|
$replacementTrafficObserved = $true
|
|
break
|
|
}
|
|
}
|
|
if ($replacementTrafficObserved) {
|
|
break
|
|
}
|
|
}
|
|
|
|
$postAccess = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/fabric/service-channels/access-telemetry?actor_user_id=$ActorUserID&limit=50").fabric_service_channel_access_telemetry
|
|
$postChannel = @($postAccess.active_channels | Where-Object { $_.channel_id -eq $lease.channel_id }) | Select-Object -First 1
|
|
|
|
[void](Send-ReplacementDegradedHeartbeat -EntryNodeID $entryNode.id -ReplacementRouteID $alternateRouteID -RouteGeneration "$runId-primary")
|
|
$noSafeSyntheticConfig = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/nodes/$($entryNode.id)/mesh/synthetic-config").synthetic_mesh_config
|
|
$noSafePathDecisions = @()
|
|
if ($noSafeSyntheticConfig.PSObject.Properties.Name -contains "route_path_decisions") {
|
|
$noSafePathDecisions = @($noSafeSyntheticConfig.route_path_decisions.decisions)
|
|
}
|
|
$noSafeDecision = $noSafePathDecisions | Where-Object {
|
|
[string](Get-PropertyValue -Item $_ -Name "route_id" -Default "") -eq $alternateRouteID -and
|
|
[string](Get-PropertyValue -Item $_ -Name "decision_source" -Default "") -eq "service_channel_feedback_no_alternate"
|
|
} | Select-Object -First 1
|
|
$noSafeFeedback = $null
|
|
$serviceChannelFeedback = Get-PropertyValue -Item $noSafeSyntheticConfig -Name "service_channel_feedback" -Default $null
|
|
if ($null -ne $serviceChannelFeedback -and $serviceChannelFeedback.PSObject.Properties.Name -contains "routes") {
|
|
$noSafeFeedback = @($serviceChannelFeedback.routes | Where-Object { [string](Get-PropertyValue -Item $_ -Name "route_id" -Default "") -eq $alternateRouteID }) | Select-Object -First 1
|
|
}
|
|
|
|
$postNoSafeAccess = (Invoke-Api -Method GET -Path "/clusters/$ClusterID/fabric/service-channels/access-telemetry?actor_user_id=$ActorUserID&limit=50").fabric_service_channel_access_telemetry
|
|
$postNoSafeChannel = @($postNoSafeAccess.active_channels | Where-Object { $_.channel_id -eq $lease.channel_id }) | Select-Object -First 1
|
|
|
|
$backendLine = (& ssh $DockerSSH "docker ps --format '{{.Names}} {{.Image}} {{.Status}}' | grep '^rap_test_backend '") | Out-String
|
|
$nodeLines = (& ssh $DockerSSH "docker ps --format '{{.Names}} {{.Image}} {{.Status}}' | grep '^rap_test_node_test_'") | Out-String
|
|
$checks = [ordered]@{
|
|
backend_expected_image_deployed = $backendLine.Contains($ExpectedBackendImage)
|
|
node_agent_expected_image_deployed = $nodeLines.Contains($ExpectedNodeAgentImage)
|
|
lease_ready = ([string]$lease.status -eq "ready")
|
|
remediation_rebuild_route_visible = ($null -ne $matchingChannel -and [string](Get-PropertyValue -Item $matchingChannel -Name "remediation_action" -Default "") -eq "rebuild_route")
|
|
remediation_command_visible = ($null -ne $command -and $commandID.Length -gt 0)
|
|
durable_rebuild_intent_recorded = ($null -ne $attempt)
|
|
durable_rebuild_intent_resolved_applied = ($null -ne $attempt -and [string](Get-PropertyValue -Item $attempt -Name "rebuild_status" -Default "") -eq "applied")
|
|
durable_rebuild_intent_outcome_replacement_selected = ($null -ne $attempt -and [string](Get-PropertyValue -Item $attempt -Name "outcome" -Default "") -eq "replacement_selected")
|
|
durable_rebuild_intent_replacement_matches = ($null -ne $attempt -and [string](Get-PropertyValue -Item $attempt -Name "replacement_route_id" -Default "") -eq $alternateRouteID)
|
|
durable_rebuild_intent_source_matches = ($null -ne $attempt -and [string](Get-PropertyValue -Item $attempt -Name "decision_source" -Default "") -eq "service_channel_remediation_command")
|
|
initial_access_telemetry_reports_planner_applied = ($null -ne $postChannel -and [string](Get-PropertyValue -Item $postChannel -Name "remediation_execution_status" -Default "") -eq "rebuild_request_applied")
|
|
node_route_manager_consumed_same_command = ($null -ne $routeManagerDecision)
|
|
node_route_manager_applied_rebuild = ($null -ne $routeManagerDecision -and [string](Get-PropertyValue -Item $routeManagerDecision -Name "rebuild_status" -Default "") -eq "applied")
|
|
node_route_manager_transition_applied_rebuild = ($null -ne $routeManagerTransition -and [string](Get-PropertyValue -Item $routeManagerTransition -Name "status" -Default "") -eq "applied_rebuild")
|
|
traffic_packet_accepted = ([int]$trafficResponse.StatusCode -eq 202)
|
|
traffic_accepted_by_runtime = ($trafficAcceptedBy -eq "introspection" -or $trafficAcceptedBy -eq "signed")
|
|
traffic_selected_replacement_route = $replacementTrafficObserved
|
|
traffic_last_selected_route_matches_replacement = ($replacementLastSelected -eq $alternateRouteID)
|
|
no_backend_or_local_fallback_after_replacement = (($postRemediationFallbackLocal - $baselineFallbackLocal) -eq 0)
|
|
no_route_failures_after_replacement = (($postRemediationRouteFailures - $baselineRouteFailures) -eq 0)
|
|
no_flow_drops_after_replacement = (($postRemediationFlowDropped - $baselineFlowDropped) -eq 0 -and ($postRemediationSchedulerDropped - $baselineSchedulerDropped) -eq 0)
|
|
replacement_degradation_feedback_visible = ($null -ne $noSafeDecision -and @($noSafeDecision.score_reasons | Where-Object { [string]$_ -eq "service_channel_fenced_route" }).Count -gt 0)
|
|
no_safe_recovery_decision_visible = ($null -ne $noSafeDecision)
|
|
no_safe_recovery_status_pending_fallback = ($null -ne $noSafeDecision -and [string](Get-PropertyValue -Item $noSafeDecision -Name "rebuild_status" -Default "") -eq "pending_degraded_fallback")
|
|
no_safe_recovery_reason_visible = ($null -ne $noSafeDecision -and @($noSafeDecision.score_reasons | Where-Object { [string]$_ -eq "no_unfenced_alternate_route" }).Count -gt 0)
|
|
no_silent_stickiness_to_bad_replacement = ($null -ne $noSafeDecision -and [string](Get-PropertyValue -Item $noSafeDecision -Name "decision_source" -Default "") -eq "service_channel_feedback_no_alternate")
|
|
}
|
|
$failed = @($checks.GetEnumerator() | Where-Object { -not $_.Value } | ForEach-Object { $_.Key })
|
|
|
|
$result = [ordered]@{
|
|
schema_version = "c18z82.service_channel_no_safe_recovery_smoke.v1"
|
|
run_id = $runId
|
|
cluster_id = $ClusterID
|
|
channel_id = [string]$lease.channel_id
|
|
primary_route_id = $primaryRouteID
|
|
alternate_route_id = $alternateRouteID
|
|
rebuild_request_id = $commandID
|
|
passed = ($failed.Count -eq 0)
|
|
checks = $checks
|
|
failed_checks = $failed
|
|
summary = [ordered]@{
|
|
backend_container = $backendLine.Trim()
|
|
node_containers = $nodeLines.Trim()
|
|
remediation_command = $command
|
|
rebuild_attempt = $attempt
|
|
route_manager_decision = $routeManagerDecision
|
|
route_manager_transition = $routeManagerTransition
|
|
traffic_status_code = [int]$trafficResponse.StatusCode
|
|
traffic_accepted_by = $trafficAcceptedBy
|
|
traffic_duration_ms = $trafficDurationMs
|
|
traffic_packet_count = $trafficPackets.Count
|
|
replacement_last_selected_route_id = $replacementLastSelected
|
|
replacement_flow_stat_count = $replacementFlowStats.Count
|
|
replacement_flow_stats = $replacementFlowStats
|
|
baseline_send_fallback_local = $baselineFallbackLocal
|
|
baseline_send_route_failures = $baselineRouteFailures
|
|
baseline_send_flow_dropped = $baselineFlowDropped
|
|
baseline_scheduler_dropped = $baselineSchedulerDropped
|
|
fallback_local_delta = ($postRemediationFallbackLocal - $baselineFallbackLocal)
|
|
route_failure_delta = ($postRemediationRouteFailures - $baselineRouteFailures)
|
|
flow_drop_delta = ($postRemediationFlowDropped - $baselineFlowDropped)
|
|
scheduler_drop_delta = ($postRemediationSchedulerDropped - $baselineSchedulerDropped)
|
|
post_remediation_send_fallback_local = $postRemediationFallbackLocal
|
|
post_remediation_send_route_failures = $postRemediationRouteFailures
|
|
post_remediation_send_flow_dropped = $postRemediationFlowDropped
|
|
post_remediation_scheduler_dropped = $postRemediationSchedulerDropped
|
|
post_channel = $postChannel
|
|
no_safe_feedback = $noSafeFeedback
|
|
no_safe_decision = $noSafeDecision
|
|
no_safe_route_path_decisions = $noSafePathDecisions
|
|
post_no_safe_channel = $postNoSafeChannel
|
|
}
|
|
}
|
|
|
|
$target = Join-Path $repoRoot $ResultPath
|
|
$result | ConvertTo-Json -Depth 80 | Set-Content -Path $target -Encoding UTF8
|
|
|
|
try {
|
|
if ($primaryRouteID) {
|
|
Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$primaryRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null
|
|
}
|
|
if ($alternateRouteID) {
|
|
Invoke-Api -Method POST -Path "/clusters/$ClusterID/mesh/route-intents/$alternateRouteID/expire" -Body @{ actor_user_id = $ActorUserID } | Out-Null
|
|
}
|
|
Start-Sleep -Seconds 3
|
|
Invoke-Api -Method POST -Path "/clusters/$ClusterID/fabric/service-channels/leases/cleanup" -Body @{
|
|
actor_user_id = $ActorUserID
|
|
limit = 50
|
|
} | Out-Null
|
|
} catch {
|
|
Write-Warning "cleanup failed after c18z82 smoke: $($_.Exception.Message)"
|
|
}
|
|
|
|
if (-not $result.passed) {
|
|
throw "C18Z82 no-safe-recovery smoke failed: $($failed -join ', ')"
|
|
}
|
|
|
|
Write-Host "C18Z82 service-channel no-safe-recovery smoke passed. Result: $target"
|
|
$result
|
|
|
|
|
|
|
|
|