Report aggregate route recovery in smoke

This commit is contained in:
2026-05-16 13:14:44 +03:00
parent aac224af9e
commit f23e11a8fd
2 changed files with 11 additions and 3 deletions
@@ -51,6 +51,8 @@ type smokeReport struct {
FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"` FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"`
FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"` FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"`
FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"` FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"`
FabricVPNRecoveryMaxMS int64 `json:"fabric_vpn_route_recovery_max_ms"`
FabricVPNRecoveryAvgMS int64 `json:"fabric_vpn_route_recovery_avg_ms"`
FabricQUICAccepted bool `json:"fabric_quic_accepted"` FabricQUICAccepted bool `json:"fabric_quic_accepted"`
FabricQUICEndpoint string `json:"fabric_quic_endpoint"` FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"` FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
@@ -155,7 +157,7 @@ func run(ctx context.Context) (smokeReport, error) {
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err) return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
} }
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure() fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS := smokeVPNFlowSchedulerRouteRecovery() fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS := smokeVPNFlowSchedulerRouteRecovery()
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx) fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
if err != nil { if err != nil {
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err) return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
@@ -184,6 +186,8 @@ func run(ctx context.Context) (smokeReport, error) {
FabricVPNRouteRecovered: fabricVPNRouteRecovered, FabricVPNRouteRecovered: fabricVPNRouteRecovered,
FabricVPNRouteSwitches: fabricVPNRouteSwitches, FabricVPNRouteSwitches: fabricVPNRouteSwitches,
FabricVPNRecoveryMS: fabricVPNRecoveryMS, FabricVPNRecoveryMS: fabricVPNRecoveryMS,
FabricVPNRecoveryMaxMS: fabricVPNRecoveryMaxMS,
FabricVPNRecoveryAvgMS: fabricVPNRecoveryAvgMS,
FabricQUICAccepted: fabricQUICAccepted, FabricQUICAccepted: fabricQUICAccepted,
FabricQUICEndpoint: fabricQUICEndpoint, FabricQUICEndpoint: fabricQUICEndpoint,
FabricQUICPressure: fabricQUICPressure, FabricQUICPressure: fabricQUICPressure,
@@ -221,7 +225,7 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) {
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive] snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
} }
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64) { func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64) {
scheduler := vpnruntime.NewFabricFlowScheduler(8, 16) scheduler := vpnruntime.NewFabricFlowScheduler(8, 16)
channelID := "vpn-smoke-flow-0" channelID := "vpn-smoke-flow-0"
scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond) scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond)
@@ -233,7 +237,9 @@ func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64) {
stat.LastRouteID == "route-alternate" && stat.LastRouteID == "route-alternate" &&
snapshot.RouteRecoveredChannelCount == 1, snapshot.RouteRecoveredChannelCount == 1,
snapshot.RouteSwitchCount, snapshot.RouteSwitchCount,
stat.LastRouteRecoveryMillis stat.LastRouteRecoveryMillis,
snapshot.RouteRecoveryMaxMillis,
snapshot.RouteRecoveryAvgMillis
} }
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) { func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
@@ -447,6 +447,8 @@ switch counts, making alternate-route recovery measurable during load tests.
alternate-route success and reports the resulting route switch count. alternate-route success and reports the resulting route switch count.
The same smoke output reports measured route recovery milliseconds for the The same smoke output reports measured route recovery milliseconds for the
synthetic failover path. synthetic failover path.
Smoke now includes max/average route recovery timing from the scheduler
aggregate snapshot as well.
Route recovery telemetry includes failure/switch timestamps and recovery Route recovery telemetry includes failure/switch timestamps and recovery
duration in milliseconds for each recovered flow channel. duration in milliseconds for each recovered flow channel.
Scheduler snapshots also aggregate route recovery max/average milliseconds Scheduler snapshots also aggregate route recovery max/average milliseconds