Emit capacity endpoint observations
This commit is contained in:
@@ -576,6 +576,25 @@ func (s *vpnFabricEndpointObservationStore) ObserveFailure(endpointID string, re
|
||||
s.pruneLocked(observation.ObservedAt, vpnFabricEndpointObservationMaxAge, maxVPNFabricEndpointObservationEntries)
|
||||
}
|
||||
|
||||
func (s *vpnFabricEndpointObservationStore) ObserveCapacity(endpointID string) {
|
||||
if s == nil || strings.TrimSpace(endpointID) == "" {
|
||||
return
|
||||
}
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
observation := s.observations[endpointID]
|
||||
observation.EndpointID = endpointID
|
||||
observation.Source = "local_vpn_fabric_session"
|
||||
observation.ReporterNodeID = s.reporterNodeID
|
||||
observation.LastFailureReason = "capacity_limited"
|
||||
if observation.ReliabilityScore <= 0 {
|
||||
observation.ReliabilityScore = 90
|
||||
}
|
||||
observation.ObservedAt = time.Now().UTC()
|
||||
s.observations[endpointID] = observation
|
||||
s.pruneLocked(observation.ObservedAt, vpnFabricEndpointObservationMaxAge, maxVPNFabricEndpointObservationEntries)
|
||||
}
|
||||
|
||||
func fabricTransportLabelIsQUIC(label string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
||||
@@ -5063,6 +5082,7 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
||||
reason := fabricSessionOpenFailureReason(err)
|
||||
if reason == "capacity_limited" {
|
||||
meshState.VPNFabricSessionDialStats.ObserveCapacityLimited(selectedTarget)
|
||||
meshState.VPNFabricEndpointObservations.ObserveCapacity(selectedTarget.EndpointID)
|
||||
} else {
|
||||
meshState.VPNFabricSessionDialStats.ObserveCandidateFailure(reason)
|
||||
meshState.VPNFabricEndpointObservations.ObserveFailure(selectedTarget.EndpointID, reason)
|
||||
|
||||
@@ -851,6 +851,20 @@ func TestVPNFabricEndpointObservationStoreTagsLocalSource(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNFabricEndpointObservationStoreRecordsCapacityWithoutFailure(t *testing.T) {
|
||||
store := newVPNFabricEndpointObservationStore("node-a")
|
||||
store.ObserveCapacity("endpoint-a")
|
||||
snapshot := store.Snapshot()
|
||||
observation := snapshot["endpoint-a"]
|
||||
if observation.LastFailureReason != "capacity_limited" ||
|
||||
observation.FailureCount != 0 ||
|
||||
observation.ReliabilityScore != 90 ||
|
||||
observation.Source != "local_vpn_fabric_session" ||
|
||||
observation.ReporterNodeID != "node-a" {
|
||||
t.Fatalf("unexpected capacity observation: %+v", observation)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNFabricEndpointObservationStorePrunesOldAndExcessEntries(t *testing.T) {
|
||||
store := newVPNFabricEndpointObservationStore()
|
||||
now := time.Now().UTC()
|
||||
|
||||
@@ -377,6 +377,9 @@ observations.
|
||||
Endpoint ranking treats `capacity_limited` observations as a soft pressure
|
||||
penalty instead of a hard recent failure, enabling load spreading without
|
||||
marking the carrier unhealthy.
|
||||
Local QUIC stream-limit pressure is now emitted as a capacity observation with
|
||||
no failure-count increment, allowing control plane to spread load without
|
||||
treating saturation as packet-path breakage.
|
||||
Cached QUIC carrier idle TTL is configurable through
|
||||
`RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS` / `-vpn-fabric-quic-idle-ttl` and
|
||||
propagated by host-agent install profiles.
|
||||
|
||||
Reference in New Issue
Block a user