From 8feb6d58fe57220c69c1925899c3d1f175fa243b Mon Sep 17 00:00:00 2001 From: Mikhail Date: Sat, 16 May 2026 12:50:51 +0300 Subject: [PATCH] Report QUIC capacity in smoke --- .../cmd/mesh-live-smoke/main.go | 26 ++++++++++++------- .../DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md | 2 ++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/agents/rap-node-agent/cmd/mesh-live-smoke/main.go b/agents/rap-node-agent/cmd/mesh-live-smoke/main.go index 5bb811a..6ed3319 100644 --- a/agents/rap-node-agent/cmd/mesh-live-smoke/main.go +++ b/agents/rap-node-agent/cmd/mesh-live-smoke/main.go @@ -45,6 +45,7 @@ type smokeReport struct { FabricVPNPacketFanout int `json:"fabric_vpn_packet_fanout"` FabricQUICAccepted bool `json:"fabric_quic_accepted"` FabricQUICEndpoint string `json:"fabric_quic_endpoint"` + FabricQUICPressure int `json:"fabric_quic_capacity_pressure_percent"` FabricSessionLatencyMS int64 `json:"fabric_session_latency_ms"` FabricSessionEndpoint string `json:"fabric_session_endpoint"` PeerEndpoints map[string]any `json:"peer_endpoints"` @@ -145,7 +146,7 @@ func run(ctx context.Context) (smokeReport, error) { if err != nil { return smokeReport{}, fmt.Errorf("fabric vpn packet session smoke: %w", err) } - fabricQUICAccepted, fabricQUICEndpoint, err := smokeQUICFabricSession(ctx) + fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx) if err != nil { return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err) } @@ -167,6 +168,7 @@ func run(ctx context.Context) (smokeReport, error) { FabricVPNPacketFanout: fabricVPNPacketFanout, FabricQUICAccepted: fabricQUICAccepted, FabricQUICEndpoint: fabricQUICEndpoint, + FabricQUICPressure: fabricQUICPressure, FabricSessionLatencyMS: fabricSessionLatency.Milliseconds(), FabricSessionEndpoint: nodeB.URL + "/mesh/v1/fabric/session/ws", PeerEndpoints: map[string]any{ @@ -177,17 +179,20 @@ func run(ctx context.Context) (smokeReport, error) { }, nil } -func smokeQUICFabricSession(ctx context.Context) (bool, string, error) { +func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) { server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{ ListenAddr: "127.0.0.1:0", TLSConfig: smokeQUICTLSConfig(), }) if err != nil { - return false, "", err + return false, "", 0, err } defer server.Close() endpoint := server.Addr().String() - session, err := mesh.NewQUICFabricTransport(nil).Connect(ctx, mesh.FabricTransportTarget{ + transport := mesh.NewQUICFabricTransport(nil) + defer transport.Close() + session, err := transport.Connect(ctx, mesh.FabricTransportTarget{ + PeerID: "node-b", Endpoint: endpoint, TLSConfig: &tls.Config{ InsecureSkipVerify: true, @@ -198,7 +203,7 @@ func smokeQUICFabricSession(ctx context.Context) (bool, string, error) { ErrorBuffer: 4, }) if err != nil { - return false, endpoint, err + return false, endpoint, 0, err } defer session.Close() if err := session.Send(ctx, fabricproto.Frame{ @@ -206,20 +211,21 @@ func smokeQUICFabricSession(ctx context.Context) (bool, string, error) { Sequence: uint64(time.Now().UnixNano()), Payload: []byte("mesh-live-smoke-quic"), }); err != nil { - return false, endpoint, err + return false, endpoint, 0, err } timer := time.NewTimer(3 * time.Second) defer timer.Stop() for { select { case frame := <-session.Frames(): - return frame.Type == fabricproto.FramePong && string(frame.Payload) == "mesh-live-smoke-quic", endpoint, nil + snapshot := transport.Snapshot() + return frame.Type == fabricproto.FramePong && string(frame.Payload) == "mesh-live-smoke-quic", endpoint, snapshot.CapacityPressurePercent, nil case err := <-session.Errors(): - return false, endpoint, err + return false, endpoint, 0, err case <-timer.C: - return false, endpoint, fmt.Errorf("timed out waiting for quic pong") + return false, endpoint, 0, fmt.Errorf("timed out waiting for quic pong") case <-ctx.Done(): - return false, endpoint, ctx.Err() + return false, endpoint, 0, ctx.Err() } } } diff --git a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md index defa487..18d0253 100644 --- a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md +++ b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md @@ -421,6 +421,8 @@ connection count, and capacity pressure percentage next to stream limit rejects. Closed cached QUIC connections discovered during snapshot generation now update the transport's cumulative eviction counters, keeping successive heartbeats consistent. +`mesh-live-smoke` reports QUIC fabric capacity-pressure percentage from the +transport snapshot, verifying that the capacity fields are populated. Endpoint ranking treats `capacity_limited` observations as a soft pressure penalty instead of a hard recent failure, enabling load spreading without marking the carrier unhealthy.