From 83a1bb67e2da312033066a07b759aa717dd85b8f Mon Sep 17 00:00:00 2001 From: Mikhail Date: Sat, 16 May 2026 12:55:48 +0300 Subject: [PATCH] Stabilize QUIC pressure snapshots --- .../internal/mesh/fabric_quic_transport.go | 10 ++++++++++ .../DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md | 2 ++ 2 files changed, 12 insertions(+) diff --git a/agents/rap-node-agent/internal/mesh/fabric_quic_transport.go b/agents/rap-node-agent/internal/mesh/fabric_quic_transport.go index 206661d..5a797db 100644 --- a/agents/rap-node-agent/internal/mesh/fabric_quic_transport.go +++ b/agents/rap-node-agent/internal/mesh/fabric_quic_transport.go @@ -7,6 +7,7 @@ import ( "crypto/x509" "encoding/hex" "fmt" + "sort" "strings" "sync" "time" @@ -412,6 +413,15 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot { snapshot.CapacityPressurePercent = (snapshot.ActiveStreams * 100) / capacity } } + sort.Slice(snapshot.Connections, func(i, j int) bool { + if snapshot.Connections[i].PeerID != snapshot.Connections[j].PeerID { + return snapshot.Connections[i].PeerID < snapshot.Connections[j].PeerID + } + if snapshot.Connections[i].Endpoint != snapshot.Connections[j].Endpoint { + return snapshot.Connections[i].Endpoint < snapshot.Connections[j].Endpoint + } + return snapshot.Connections[i].CertSHA256 < snapshot.Connections[j].CertSHA256 + }) return snapshot } diff --git a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md index 741feae..538f3b5 100644 --- a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md +++ b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md @@ -427,6 +427,8 @@ QUIC fabric snapshots now include per cached connection pressure, endpoint, and saturation state; VPN fabric endpoint ranking consumes that live local pressure before stream-limit rejection, spreading new sessions away from already busy QUIC carriers. +Per-connection QUIC snapshot entries are sorted by peer and endpoint so +heartbeats and diagnostics stay stable across reports. Endpoint ranking treats `capacity_limited` observations as a soft pressure penalty instead of a hard recent failure, enabling load spreading without marking the carrier unhealthy.