Report VPN route recovery time in smoke
This commit is contained in:
@@ -50,6 +50,7 @@ type smokeReport struct {
|
|||||||
FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"`
|
FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"`
|
||||||
FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"`
|
FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"`
|
||||||
FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"`
|
FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"`
|
||||||
|
FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"`
|
||||||
FabricQUICAccepted bool `json:"fabric_quic_accepted"`
|
FabricQUICAccepted bool `json:"fabric_quic_accepted"`
|
||||||
FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
|
FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
|
||||||
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
|
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
|
||||||
@@ -154,7 +155,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
|||||||
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
|
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
|
||||||
}
|
}
|
||||||
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
|
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
|
||||||
fabricVPNRouteRecovered, fabricVPNRouteSwitches := smokeVPNFlowSchedulerRouteRecovery()
|
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS := smokeVPNFlowSchedulerRouteRecovery()
|
||||||
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
||||||
@@ -182,6 +183,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
|||||||
FabricVPNInteractiveWin: fabricVPNInteractiveWindow,
|
FabricVPNInteractiveWin: fabricVPNInteractiveWindow,
|
||||||
FabricVPNRouteRecovered: fabricVPNRouteRecovered,
|
FabricVPNRouteRecovered: fabricVPNRouteRecovered,
|
||||||
FabricVPNRouteSwitches: fabricVPNRouteSwitches,
|
FabricVPNRouteSwitches: fabricVPNRouteSwitches,
|
||||||
|
FabricVPNRecoveryMS: fabricVPNRecoveryMS,
|
||||||
FabricQUICAccepted: fabricQUICAccepted,
|
FabricQUICAccepted: fabricQUICAccepted,
|
||||||
FabricQUICEndpoint: fabricQUICEndpoint,
|
FabricQUICEndpoint: fabricQUICEndpoint,
|
||||||
FabricQUICPressure: fabricQUICPressure,
|
FabricQUICPressure: fabricQUICPressure,
|
||||||
@@ -219,17 +221,19 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) {
|
|||||||
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
|
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
|
||||||
}
|
}
|
||||||
|
|
||||||
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64) {
|
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64) {
|
||||||
scheduler := vpnruntime.NewFabricFlowScheduler(8, 16)
|
scheduler := vpnruntime.NewFabricFlowScheduler(8, 16)
|
||||||
channelID := "vpn-smoke-flow-0"
|
channelID := "vpn-smoke-flow-0"
|
||||||
scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond)
|
scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond)
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
scheduler.RecordRouteSuccess(channelID, "route-alternate", "node-alternate", time.Millisecond)
|
scheduler.RecordRouteSuccess(channelID, "route-alternate", "node-alternate", time.Millisecond)
|
||||||
snapshot := scheduler.Snapshot()
|
snapshot := scheduler.Snapshot()
|
||||||
stat := snapshot.ChannelStats[channelID]
|
stat := snapshot.ChannelStats[channelID]
|
||||||
return stat.LastRecoveredFromRouteID == "route-primary" &&
|
return stat.LastRecoveredFromRouteID == "route-primary" &&
|
||||||
stat.LastRouteID == "route-alternate" &&
|
stat.LastRouteID == "route-alternate" &&
|
||||||
snapshot.RouteRecoveredChannelCount == 1,
|
snapshot.RouteRecoveredChannelCount == 1,
|
||||||
snapshot.RouteSwitchCount
|
snapshot.RouteSwitchCount,
|
||||||
|
stat.LastRouteRecoveryMillis
|
||||||
}
|
}
|
||||||
|
|
||||||
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
||||||
|
|||||||
@@ -445,6 +445,8 @@ the failed route a channel recovered from, and aggregate recovered-channel /
|
|||||||
switch counts, making alternate-route recovery measurable during load tests.
|
switch counts, making alternate-route recovery measurable during load tests.
|
||||||
`mesh-live-smoke` now also exercises a primary-route failure followed by an
|
`mesh-live-smoke` now also exercises a primary-route failure followed by an
|
||||||
alternate-route success and reports the resulting route switch count.
|
alternate-route success and reports the resulting route switch count.
|
||||||
|
The same smoke output reports measured route recovery milliseconds for the
|
||||||
|
synthetic failover path.
|
||||||
Route recovery telemetry includes failure/switch timestamps and recovery
|
Route recovery telemetry includes failure/switch timestamps and recovery
|
||||||
duration in milliseconds for each recovered flow channel.
|
duration in milliseconds for each recovered flow channel.
|
||||||
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
||||||
|
|||||||
Reference in New Issue
Block a user