diff --git a/agents/rap-node-agent/cmd/mesh-live-smoke/main.go b/agents/rap-node-agent/cmd/mesh-live-smoke/main.go index b314827..a91d832 100644 --- a/agents/rap-node-agent/cmd/mesh-live-smoke/main.go +++ b/agents/rap-node-agent/cmd/mesh-live-smoke/main.go @@ -48,6 +48,8 @@ type smokeReport struct { FabricVPNInteractive int `json:"fabric_vpn_interactive_or_control_channels"` FabricVPNBulkWindow int `json:"fabric_vpn_bulk_parallel_window"` FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"` + FabricVPNPressureLevel string `json:"fabric_vpn_pressure_level"` + FabricVPNPressureReason []string `json:"fabric_vpn_pressure_reasons"` FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"` FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"` FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"` @@ -157,7 +159,7 @@ func run(ctx context.Context) (smokeReport, error) { if err != nil { return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err) } - fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure() + fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow, fabricVPNPressureLevel, fabricVPNPressureReasons := smokeVPNFlowSchedulerBulkPressure() fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS, fabricVPNRecoveryReason := smokeVPNFlowSchedulerRouteRecovery() fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx) if err != nil { @@ -184,6 +186,8 @@ func run(ctx context.Context) (smokeReport, error) { FabricVPNInteractive: fabricVPNInteractiveChannels, FabricVPNBulkWindow: fabricVPNBulkWindow, FabricVPNInteractiveWin: fabricVPNInteractiveWindow, + FabricVPNPressureLevel: fabricVPNPressureLevel, + FabricVPNPressureReason: fabricVPNPressureReasons, FabricVPNRouteRecovered: fabricVPNRouteRecovered, FabricVPNRouteSwitches: fabricVPNRouteSwitches, FabricVPNRecoveryMS: fabricVPNRecoveryMS, @@ -203,7 +207,7 @@ func run(ctx context.Context) (smokeReport, error) { }, nil } -func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) { +func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int, string, []string) { scheduler := vpnruntime.NewFabricFlowScheduler(32, 16) bulkPacket := []byte("bulk") interactivePacket := []byte("interactive-rdp-like") @@ -224,7 +228,9 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) { snapshot.BulkPressureChannelCount, snapshot.InteractiveOrControlCount, snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassBulk], - snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive] + snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive], + snapshot.PressureLevel, + snapshot.PressureReasons } func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64, string) { diff --git a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md index 84243f6..163ef2f 100644 --- a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md +++ b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md @@ -465,6 +465,7 @@ Flow-scheduler snapshots now include a machine-readable pressure level (`nominal`, `warning`, `critical`) and bounded reason list derived from drops, route failures, route recovery, slow channels, bulk pressure, and adaptive backpressure. +`mesh-live-smoke` reports the mixed-load scheduler pressure level and reasons. Endpoint ranking treats `capacity_limited` observations as a soft pressure penalty instead of a hard recent failure, enabling load spreading without marking the carrier unhealthy.