Smoke test VPN route recovery
This commit is contained in:
@@ -48,6 +48,8 @@ type smokeReport struct {
|
|||||||
FabricVPNInteractive int `json:"fabric_vpn_interactive_or_control_channels"`
|
FabricVPNInteractive int `json:"fabric_vpn_interactive_or_control_channels"`
|
||||||
FabricVPNBulkWindow int `json:"fabric_vpn_bulk_parallel_window"`
|
FabricVPNBulkWindow int `json:"fabric_vpn_bulk_parallel_window"`
|
||||||
FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"`
|
FabricVPNInteractiveWin int `json:"fabric_vpn_interactive_parallel_window"`
|
||||||
|
FabricVPNRouteRecovered bool `json:"fabric_vpn_route_recovered"`
|
||||||
|
FabricVPNRouteSwitches uint64 `json:"fabric_vpn_route_switch_count"`
|
||||||
FabricQUICAccepted bool `json:"fabric_quic_accepted"`
|
FabricQUICAccepted bool `json:"fabric_quic_accepted"`
|
||||||
FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
|
FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
|
||||||
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
|
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
|
||||||
@@ -152,6 +154,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
|||||||
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
|
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
|
||||||
}
|
}
|
||||||
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
|
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
|
||||||
|
fabricVPNRouteRecovered, fabricVPNRouteSwitches := smokeVPNFlowSchedulerRouteRecovery()
|
||||||
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
||||||
@@ -177,6 +180,8 @@ func run(ctx context.Context) (smokeReport, error) {
|
|||||||
FabricVPNInteractive: fabricVPNInteractiveChannels,
|
FabricVPNInteractive: fabricVPNInteractiveChannels,
|
||||||
FabricVPNBulkWindow: fabricVPNBulkWindow,
|
FabricVPNBulkWindow: fabricVPNBulkWindow,
|
||||||
FabricVPNInteractiveWin: fabricVPNInteractiveWindow,
|
FabricVPNInteractiveWin: fabricVPNInteractiveWindow,
|
||||||
|
FabricVPNRouteRecovered: fabricVPNRouteRecovered,
|
||||||
|
FabricVPNRouteSwitches: fabricVPNRouteSwitches,
|
||||||
FabricQUICAccepted: fabricQUICAccepted,
|
FabricQUICAccepted: fabricQUICAccepted,
|
||||||
FabricQUICEndpoint: fabricQUICEndpoint,
|
FabricQUICEndpoint: fabricQUICEndpoint,
|
||||||
FabricQUICPressure: fabricQUICPressure,
|
FabricQUICPressure: fabricQUICPressure,
|
||||||
@@ -214,6 +219,19 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) {
|
|||||||
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
|
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64) {
|
||||||
|
scheduler := vpnruntime.NewFabricFlowScheduler(8, 16)
|
||||||
|
channelID := "vpn-smoke-flow-0"
|
||||||
|
scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond)
|
||||||
|
scheduler.RecordRouteSuccess(channelID, "route-alternate", "node-alternate", time.Millisecond)
|
||||||
|
snapshot := scheduler.Snapshot()
|
||||||
|
stat := snapshot.ChannelStats[channelID]
|
||||||
|
return stat.LastRecoveredFromRouteID == "route-primary" &&
|
||||||
|
stat.LastRouteID == "route-alternate" &&
|
||||||
|
snapshot.RouteRecoveredChannelCount == 1,
|
||||||
|
snapshot.RouteSwitchCount
|
||||||
|
}
|
||||||
|
|
||||||
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
||||||
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
|
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
|
||||||
ListenAddr: "127.0.0.1:0",
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
|||||||
@@ -443,6 +443,8 @@ pressure activation plus bulk/interactive window recommendations.
|
|||||||
Flow-scheduler route recovery telemetry now records per-channel route switches,
|
Flow-scheduler route recovery telemetry now records per-channel route switches,
|
||||||
the failed route a channel recovered from, and aggregate recovered-channel /
|
the failed route a channel recovered from, and aggregate recovered-channel /
|
||||||
switch counts, making alternate-route recovery measurable during load tests.
|
switch counts, making alternate-route recovery measurable during load tests.
|
||||||
|
`mesh-live-smoke` now also exercises a primary-route failure followed by an
|
||||||
|
alternate-route success and reports the resulting route switch count.
|
||||||
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
||||||
penalty instead of a hard recent failure, enabling load spreading without
|
penalty instead of a hard recent failure, enabling load spreading without
|
||||||
marking the carrier unhealthy.
|
marking the carrier unhealthy.
|
||||||
|
|||||||
Reference in New Issue
Block a user