Report VPN scheduler pressure in smoke

This commit is contained in:
2026-05-16 13:23:31 +03:00
parent 1687277688
commit db75e1baed
2 changed files with 10 additions and 3 deletions
@@ -48,6 +48,8 @@ type smokeReport struct {
FabricVPNInteractive int `json:"fabric_vpn_interactive_or_control_channels"` FabricVPNInteractive int `json:"fabric_vpn_interactive_or_control_channels"`
FabricVPNBulkWindow int `json:"fabric_vpn_bulk_parallel_window"` FabricVPNBulkWindow int `json:"fabric_vpn_bulk_parallel_window"`
FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"` FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"`
FabricVPNPressureLevel string `json:"fabric_vpn_pressure_level"`
FabricVPNPressureReason []string `json:"fabric_vpn_pressure_reasons"`
FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"` FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"`
FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"` FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"`
FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"` FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"`
@@ -157,7 +159,7 @@ func run(ctx context.Context) (smokeReport, error) {
if err != nil { if err != nil {
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err) return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
} }
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure() fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow, fabricVPNPressureLevel, fabricVPNPressureReasons := smokeVPNFlowSchedulerBulkPressure()
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS, fabricVPNRecoveryReason := smokeVPNFlowSchedulerRouteRecovery() fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS, fabricVPNRecoveryReason := smokeVPNFlowSchedulerRouteRecovery()
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx) fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
if err != nil { if err != nil {
@@ -184,6 +186,8 @@ func run(ctx context.Context) (smokeReport, error) {
FabricVPNInteractive: fabricVPNInteractiveChannels, FabricVPNInteractive: fabricVPNInteractiveChannels,
FabricVPNBulkWindow: fabricVPNBulkWindow, FabricVPNBulkWindow: fabricVPNBulkWindow,
FabricVPNInteractiveWin: fabricVPNInteractiveWindow, FabricVPNInteractiveWin: fabricVPNInteractiveWindow,
FabricVPNPressureLevel: fabricVPNPressureLevel,
FabricVPNPressureReason: fabricVPNPressureReasons,
FabricVPNRouteRecovered: fabricVPNRouteRecovered, FabricVPNRouteRecovered: fabricVPNRouteRecovered,
FabricVPNRouteSwitches: fabricVPNRouteSwitches, FabricVPNRouteSwitches: fabricVPNRouteSwitches,
FabricVPNRecoveryMS: fabricVPNRecoveryMS, FabricVPNRecoveryMS: fabricVPNRecoveryMS,
@@ -203,7 +207,7 @@ func run(ctx context.Context) (smokeReport, error) {
}, nil }, nil
} }
func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) { func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int, string, []string) {
scheduler := vpnruntime.NewFabricFlowScheduler(32, 16) scheduler := vpnruntime.NewFabricFlowScheduler(32, 16)
bulkPacket := []byte("bulk") bulkPacket := []byte("bulk")
interactivePacket := []byte("interactive-rdp-like") interactivePacket := []byte("interactive-rdp-like")
@@ -224,7 +228,9 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) {
snapshot.BulkPressureChannelCount, snapshot.BulkPressureChannelCount,
snapshot.InteractiveOrControlCount, snapshot.InteractiveOrControlCount,
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassBulk], snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassBulk],
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive] snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive],
snapshot.PressureLevel,
snapshot.PressureReasons
} }
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64, string) { func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64, string) {
@@ -465,6 +465,7 @@ Flow-scheduler snapshots now include a machine-readable pressure level
(`nominal`, `warning`, `critical`) and bounded reason list derived from drops, (`nominal`, `warning`, `critical`) and bounded reason list derived from drops,
route failures, route recovery, slow channels, bulk pressure, and adaptive route failures, route recovery, slow channels, bulk pressure, and adaptive
backpressure. backpressure.
`mesh-live-smoke` reports the mixed-load scheduler pressure level and reasons.
Endpoint ranking treats `capacity_limited` observations as a soft pressure Endpoint ranking treats `capacity_limited` observations as a soft pressure
penalty instead of a hard recent failure, enabling load spreading without penalty instead of a hard recent failure, enabling load spreading without
marking the carrier unhealthy. marking the carrier unhealthy.