diff --git a/agents/rap-node-agent/cmd/rap-node-agent/main.go b/agents/rap-node-agent/cmd/rap-node-agent/main.go index 8d88258..f5a0773 100644 --- a/agents/rap-node-agent/cmd/rap-node-agent/main.go +++ b/agents/rap-node-agent/cmd/rap-node-agent/main.go @@ -5441,7 +5441,9 @@ func mergeEndpointCapacityPressure(primary, secondary map[string]mesh.EndpointCa } for endpointID, pressure := range secondary { existing, ok := out[endpointID] - if !ok || pressure.Count > existing.Count || pressure.LastSeenUnixSec > existing.LastSeenUnixSec { + if !ok || + pressure.Count > existing.Count || + (pressure.Count == existing.Count && pressure.LastSeenUnixSec > existing.LastSeenUnixSec) { out[endpointID] = pressure } } diff --git a/agents/rap-node-agent/cmd/rap-node-agent/main_test.go b/agents/rap-node-agent/cmd/rap-node-agent/main_test.go index d132e38..3a6eb7b 100644 --- a/agents/rap-node-agent/cmd/rap-node-agent/main_test.go +++ b/agents/rap-node-agent/cmd/rap-node-agent/main_test.go @@ -1167,6 +1167,24 @@ func TestQUICEndpointCapacityPressureForScoringUsesLiveSnapshot(t *testing.T) { } } +func TestMergeEndpointCapacityPressureKeepsStrongerSignal(t *testing.T) { + merged := mergeEndpointCapacityPressure( + map[string]mesh.EndpointCandidateCapacityPressure{ + "node-b-quic": {EndpointID: "node-b-quic", Count: 9, LastSeenUnixSec: 10}, + }, + map[string]mesh.EndpointCandidateCapacityPressure{ + "node-b-quic": {EndpointID: "node-b-quic", Count: 1, LastSeenUnixSec: 20}, + "node-b-wss": {EndpointID: "node-b-wss", Count: 2, LastSeenUnixSec: 20}, + }, + ) + if merged["node-b-quic"].Count != 9 || merged["node-b-quic"].LastSeenUnixSec != 10 { + t.Fatalf("weaker fresh pressure replaced stronger signal: %+v", merged["node-b-quic"]) + } + if merged["node-b-wss"].Count != 2 { + t.Fatalf("new pressure missing: %+v", merged) + } +} + func TestMergedEndpointCandidateObservationsKeepsNewest(t *testing.T) { now := time.Now().UTC() merged := mergedEndpointCandidateObservations( diff --git a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md index 538f3b5..d83e204 100644 --- a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md +++ b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md @@ -429,6 +429,9 @@ before stream-limit rejection, spreading new sessions away from already busy QUIC carriers. Per-connection QUIC snapshot entries are sorted by peer and endpoint so heartbeats and diagnostics stay stable across reports. +When local live QUIC pressure and recent capacity-limit counters overlap, the +ranking input keeps the stronger pressure signal rather than allowing a weak +fresh sample to hide a saturated endpoint. Endpoint ranking treats `capacity_limited` observations as a soft pressure penalty instead of a hard recent failure, enabling load spreading without marking the carrier unhealthy.