Report route recovery reason in smoke
This commit is contained in:
@@ -53,6 +53,7 @@ type smokeReport struct {
|
||||
FabricVPNRecoveryMS int64 `json:"fabric_vpn_route_recovery_ms"`
|
||||
FabricVPNRecoveryMaxMS int64 `json:"fabric_vpn_route_recovery_max_ms"`
|
||||
FabricVPNRecoveryAvgMS int64 `json:"fabric_vpn_route_recovery_avg_ms"`
|
||||
FabricVPNRecoveryReason string `json:"fabric_vpn_route_recovery_reason"`
|
||||
FabricQUICAccepted bool `json:"fabric_quic_accepted"`
|
||||
FabricQUICEndpoint string `json:"fabric_quic_endpoint"`
|
||||
FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"`
|
||||
@@ -157,7 +158,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err)
|
||||
}
|
||||
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow := smokeVPNFlowSchedulerBulkPressure()
|
||||
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS := smokeVPNFlowSchedulerRouteRecovery()
|
||||
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS, fabricVPNRecoveryReason := smokeVPNFlowSchedulerRouteRecovery()
|
||||
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
||||
if err != nil {
|
||||
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
||||
@@ -188,6 +189,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
FabricVPNRecoveryMS: fabricVPNRecoveryMS,
|
||||
FabricVPNRecoveryMaxMS: fabricVPNRecoveryMaxMS,
|
||||
FabricVPNRecoveryAvgMS: fabricVPNRecoveryAvgMS,
|
||||
FabricVPNRecoveryReason: fabricVPNRecoveryReason,
|
||||
FabricQUICAccepted: fabricQUICAccepted,
|
||||
FabricQUICEndpoint: fabricQUICEndpoint,
|
||||
FabricQUICPressure: fabricQUICPressure,
|
||||
@@ -225,7 +227,7 @@ func smokeVPNFlowSchedulerBulkPressure() (bool, int, int, int, int) {
|
||||
snapshot.RecommendedParallelWindows[vpnruntime.FabricTrafficClassInteractive]
|
||||
}
|
||||
|
||||
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64) {
|
||||
func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64, string) {
|
||||
scheduler := vpnruntime.NewFabricFlowScheduler(8, 16)
|
||||
channelID := "vpn-smoke-flow-0"
|
||||
scheduler.RecordRouteFailure(channelID, "route-primary", "node-primary", fmt.Errorf("smoke primary unavailable"), time.Millisecond)
|
||||
@@ -239,7 +241,8 @@ func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64) {
|
||||
snapshot.RouteSwitchCount,
|
||||
stat.LastRouteRecoveryMillis,
|
||||
snapshot.RouteRecoveryMaxMillis,
|
||||
snapshot.RouteRecoveryAvgMillis
|
||||
snapshot.RouteRecoveryAvgMillis,
|
||||
stat.LastRouteSwitchReason
|
||||
}
|
||||
|
||||
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
||||
|
||||
@@ -456,6 +456,8 @@ across recovered channels for quick load-test health checks.
|
||||
Route recovery telemetry now includes normalized switch reasons and aggregate
|
||||
reason counts, so load tests can distinguish peer failures, timeouts, and other
|
||||
route-break causes.
|
||||
`mesh-live-smoke` reports the synthetic route-recovery reason beside recovery
|
||||
timing and switch count.
|
||||
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
||||
penalty instead of a hard recent failure, enabling load spreading without
|
||||
marking the carrier unhealthy.
|
||||
|
||||
Reference in New Issue
Block a user