Prune idle QUIC fabric connections
This commit is contained in:
@@ -16,12 +16,14 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
||||||
|
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
|
||||||
|
|
||||||
type QUICFabricTransport struct {
|
type QUICFabricTransport struct {
|
||||||
Config *quic.Config
|
Config *quic.Config
|
||||||
mu sync.Mutex
|
IdleTTL time.Duration
|
||||||
conns map[string]*quic.Conn
|
mu sync.Mutex
|
||||||
stats QUICFabricTransportStats
|
conns map[string]*quicFabricConnEntry
|
||||||
|
stats QUICFabricTransportStats
|
||||||
}
|
}
|
||||||
|
|
||||||
type QUICFabricTransportStats struct {
|
type QUICFabricTransportStats struct {
|
||||||
@@ -30,6 +32,7 @@ type QUICFabricTransportStats struct {
|
|||||||
OpenFailures uint64 `json:"open_failures"`
|
OpenFailures uint64 `json:"open_failures"`
|
||||||
ClosedEvicted uint64 `json:"closed_evicted"`
|
ClosedEvicted uint64 `json:"closed_evicted"`
|
||||||
CloseAllCalls uint64 `json:"close_all_calls"`
|
CloseAllCalls uint64 `json:"close_all_calls"`
|
||||||
|
IdleEvicted uint64 `json:"idle_evicted"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type QUICFabricTransportSnapshot struct {
|
type QUICFabricTransportSnapshot struct {
|
||||||
@@ -51,8 +54,13 @@ type quicFabricSession struct {
|
|||||||
closeConn bool
|
closeConn bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type quicFabricConnEntry struct {
|
||||||
|
conn *quic.Conn
|
||||||
|
lastUsed time.Time
|
||||||
|
}
|
||||||
|
|
||||||
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
||||||
return &QUICFabricTransport{Config: config, conns: map[string]*quic.Conn{}}
|
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, conns: map[string]*quicFabricConnEntry{}}
|
||||||
}
|
}
|
||||||
|
|
||||||
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
||||||
@@ -146,15 +154,17 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
|||||||
return conn, true, err
|
return conn, true, err
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
if conn := t.conns[key]; conn != nil {
|
t.pruneIdleLocked(time.Now())
|
||||||
|
if entry := t.conns[key]; entry != nil && entry.conn != nil {
|
||||||
select {
|
select {
|
||||||
case <-conn.Context().Done():
|
case <-entry.conn.Context().Done():
|
||||||
delete(t.conns, key)
|
delete(t.conns, key)
|
||||||
t.stats.ClosedEvicted++
|
t.stats.ClosedEvicted++
|
||||||
default:
|
default:
|
||||||
|
entry.lastUsed = time.Now()
|
||||||
t.stats.Reuses++
|
t.stats.Reuses++
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
return conn, false, nil
|
return entry.conn, false, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
@@ -167,22 +177,24 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
|||||||
return nil, false, err
|
return nil, false, err
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
if existing := t.conns[key]; existing != nil {
|
t.pruneIdleLocked(time.Now())
|
||||||
|
if existing := t.conns[key]; existing != nil && existing.conn != nil {
|
||||||
select {
|
select {
|
||||||
case <-existing.Context().Done():
|
case <-existing.conn.Context().Done():
|
||||||
delete(t.conns, key)
|
delete(t.conns, key)
|
||||||
t.stats.ClosedEvicted++
|
t.stats.ClosedEvicted++
|
||||||
default:
|
default:
|
||||||
|
existing.lastUsed = time.Now()
|
||||||
t.stats.Reuses++
|
t.stats.Reuses++
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
_ = conn.CloseWithError(0, "duplicate connection")
|
_ = conn.CloseWithError(0, "duplicate connection")
|
||||||
return existing, false, nil
|
return existing.conn, false, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if t.conns == nil {
|
if t.conns == nil {
|
||||||
t.conns = map[string]*quic.Conn{}
|
t.conns = map[string]*quicFabricConnEntry{}
|
||||||
}
|
}
|
||||||
t.conns[key] = conn
|
t.conns[key] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
||||||
t.stats.Opens++
|
t.stats.Opens++
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
return conn, false, nil
|
return conn, false, nil
|
||||||
@@ -197,13 +209,34 @@ func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
if t.conns[key] == conn {
|
if entry := t.conns[key]; entry != nil && entry.conn == conn {
|
||||||
delete(t.conns, key)
|
delete(t.conns, key)
|
||||||
t.stats.ClosedEvicted++
|
t.stats.ClosedEvicted++
|
||||||
}
|
}
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) pruneIdleLocked(now time.Time) {
|
||||||
|
if t == nil || len(t.conns) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ttl := t.IdleTTL
|
||||||
|
if ttl <= 0 {
|
||||||
|
ttl = defaultQUICFabricConnIdleTTL
|
||||||
|
}
|
||||||
|
for key, entry := range t.conns {
|
||||||
|
if entry == nil || entry.conn == nil {
|
||||||
|
delete(t.conns, key)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !entry.lastUsed.IsZero() && now.Sub(entry.lastUsed) > ttl {
|
||||||
|
_ = entry.conn.CloseWithError(0, "idle")
|
||||||
|
delete(t.conns, key)
|
||||||
|
t.stats.IdleEvicted++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func quicFabricConnKey(target FabricTransportTarget) string {
|
func quicFabricConnKey(target FabricTransportTarget) string {
|
||||||
peerID := strings.TrimSpace(target.PeerID)
|
peerID := strings.TrimSpace(target.PeerID)
|
||||||
endpoint := strings.TrimPrefix(strings.TrimSpace(target.Endpoint), "quic://")
|
endpoint := strings.TrimPrefix(strings.TrimSpace(target.Endpoint), "quic://")
|
||||||
@@ -220,10 +253,12 @@ func (t *QUICFabricTransport) Close() error {
|
|||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
t.stats.CloseAllCalls++
|
t.stats.CloseAllCalls++
|
||||||
conns := t.conns
|
conns := t.conns
|
||||||
t.conns = map[string]*quic.Conn{}
|
t.conns = map[string]*quicFabricConnEntry{}
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
for _, conn := range conns {
|
for _, entry := range conns {
|
||||||
_ = conn.CloseWithError(0, "closed")
|
if entry != nil && entry.conn != nil {
|
||||||
|
_ = entry.conn.CloseWithError(0, "closed")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -234,17 +269,18 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
|
|||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
|
t.pruneIdleLocked(time.Now())
|
||||||
snapshot := QUICFabricTransportSnapshot{
|
snapshot := QUICFabricTransportSnapshot{
|
||||||
SchemaVersion: "rap.quic_fabric_transport.v1",
|
SchemaVersion: "rap.quic_fabric_transport.v1",
|
||||||
Stats: t.stats,
|
Stats: t.stats,
|
||||||
}
|
}
|
||||||
for key, conn := range t.conns {
|
for key, entry := range t.conns {
|
||||||
if conn == nil {
|
if entry == nil || entry.conn == nil {
|
||||||
delete(t.conns, key)
|
delete(t.conns, key)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
select {
|
select {
|
||||||
case <-conn.Context().Done():
|
case <-entry.conn.Context().Done():
|
||||||
delete(t.conns, key)
|
delete(t.conns, key)
|
||||||
snapshot.Stats.ClosedEvicted++
|
snapshot.Stats.ClosedEvicted++
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -197,6 +197,42 @@ func TestQUICFabricTransportReusesConnectionForPeerEndpoint(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQUICFabricTransportPrunesIdleConnections(t *testing.T) {
|
||||||
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: testQUICTLSConfig(t),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
transport := NewQUICFabricTransport(nil)
|
||||||
|
transport.IdleTTL = time.Nanosecond
|
||||||
|
defer transport.Close()
|
||||||
|
session, err := transport.Connect(ctx, FabricTransportTarget{
|
||||||
|
PeerID: "node-b",
|
||||||
|
Endpoint: server.Addr().String(),
|
||||||
|
TLSConfig: &tls.Config{
|
||||||
|
InsecureSkipVerify: true,
|
||||||
|
NextProtos: []string{fabricQUICNextProto},
|
||||||
|
},
|
||||||
|
Timeout: time.Second,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect: %v", err)
|
||||||
|
}
|
||||||
|
defer session.Close()
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
|
||||||
|
snapshot := transport.Snapshot()
|
||||||
|
if snapshot.ActiveCount != 0 || snapshot.Stats.IdleEvicted != 1 {
|
||||||
|
t.Fatalf("idle connection was not pruned: %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
||||||
var events []FabricSessionEventLogEntry
|
var events []FabricSessionEventLogEntry
|
||||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
|
|||||||
@@ -359,6 +359,8 @@ running the health-aware peer selection path.
|
|||||||
VPN fabric QUIC transport now reuses QUIC connections per peer endpoint and
|
VPN fabric QUIC transport now reuses QUIC connections per peer endpoint and
|
||||||
opens logical fabric-session streams on top, with heartbeat telemetry for QUIC
|
opens logical fabric-session streams on top, with heartbeat telemetry for QUIC
|
||||||
connection opens, reuses, evictions, and active count.
|
connection opens, reuses, evictions, and active count.
|
||||||
|
Cached QUIC connections are pruned by idle TTL, preventing long-running agents
|
||||||
|
from holding unused peer connections indefinitely.
|
||||||
|
|
||||||
Deliverables:
|
Deliverables:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user