From 9cd0cb5ea9abaa492a3cc66229769c79a47b2105 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Sat, 16 May 2026 00:51:39 +0300 Subject: [PATCH] Add fabric peer session manager --- .../internal/mesh/fabric_session_manager.go | 105 +++++++++++++++ .../mesh/fabric_session_manager_test.go | 126 ++++++++++++++++++ .../DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md | 2 + 3 files changed, 233 insertions(+) create mode 100644 agents/rap-node-agent/internal/mesh/fabric_session_manager.go create mode 100644 agents/rap-node-agent/internal/mesh/fabric_session_manager_test.go diff --git a/agents/rap-node-agent/internal/mesh/fabric_session_manager.go b/agents/rap-node-agent/internal/mesh/fabric_session_manager.go new file mode 100644 index 0000000..b7ad282 --- /dev/null +++ b/agents/rap-node-agent/internal/mesh/fabric_session_manager.go @@ -0,0 +1,105 @@ +package mesh + +import ( + "context" + "fmt" + "strings" + "sync" +) + +type FabricSessionPeerManager struct { + mu sync.Mutex + sessions map[string]*FabricSessionPump +} + +type FabricSessionPeerTarget struct { + PeerID string + BaseURL string + Options FabricSessionDialOptions + Pump FabricSessionPumpOptions +} + +func NewFabricSessionPeerManager() *FabricSessionPeerManager { + return &FabricSessionPeerManager{ + sessions: map[string]*FabricSessionPump{}, + } +} + +func (m *FabricSessionPeerManager) Get(ctx context.Context, target FabricSessionPeerTarget) (*FabricSessionPump, error) { + if m == nil { + return nil, fmt.Errorf("fabric session peer manager is nil") + } + key, err := fabricSessionPeerKey(target) + if err != nil { + return nil, err + } + m.mu.Lock() + if pump := m.sessions[key]; pump != nil { + m.mu.Unlock() + return pump, nil + } + m.mu.Unlock() + + session, _, err := NewClient(target.BaseURL).OpenFabricSession(ctx, target.Options) + if err != nil { + return nil, err + } + pump := session.StartPump(ctx, target.Pump) + + m.mu.Lock() + if existing := m.sessions[key]; existing != nil { + m.mu.Unlock() + _ = pump.Close() + return existing, nil + } + if m.sessions == nil { + m.sessions = map[string]*FabricSessionPump{} + } + m.sessions[key] = pump + m.mu.Unlock() + return pump, nil +} + +func (m *FabricSessionPeerManager) ClosePeer(target FabricSessionPeerTarget) error { + if m == nil { + return nil + } + key, err := fabricSessionPeerKey(target) + if err != nil { + return err + } + m.mu.Lock() + pump := m.sessions[key] + delete(m.sessions, key) + m.mu.Unlock() + if pump == nil { + return nil + } + return pump.Close() +} + +func (m *FabricSessionPeerManager) Close() error { + if m == nil { + return nil + } + m.mu.Lock() + sessions := m.sessions + m.sessions = map[string]*FabricSessionPump{} + m.mu.Unlock() + var firstErr error + for _, pump := range sessions { + if err := pump.Close(); err != nil && firstErr == nil { + firstErr = err + } + } + return firstErr +} + +func fabricSessionPeerKey(target FabricSessionPeerTarget) (string, error) { + peerID := strings.TrimSpace(target.PeerID) + baseURL := strings.TrimRight(strings.TrimSpace(target.BaseURL), "/") + if peerID == "" || baseURL == "" { + return "", fmt.Errorf("fabric session peer id and base url are required") + } + return peerID + "\x00" + baseURL, nil +} diff --git a/agents/rap-node-agent/internal/mesh/fabric_session_manager_test.go b/agents/rap-node-agent/internal/mesh/fabric_session_manager_test.go new file mode 100644 index 0000000..0a421c9 --- /dev/null +++ b/agents/rap-node-agent/internal/mesh/fabric_session_manager_test.go @@ -0,0 +1,126 @@ +package mesh + +import ( + "context" + "net/http/httptest" + "testing" + "time" + + "github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto" +) + +func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) { + var opened int + server := httptest.NewServer(Server{ + Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}, + FabricSessionEnabled: true, + FabricSessionLogger: func(entry FabricSessionEventLogEntry) { + if entry.Event == "fabric_session_websocket_opened" { + opened++ + } + }, + }.Handler()) + defer server.Close() + + manager := NewFabricSessionPeerManager() + defer manager.Close() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + target := FabricSessionPeerTarget{ + PeerID: "node-a", + BaseURL: server.URL, + Options: FabricSessionDialOptions{ + Token: "rap_fsn_manager", + Timeout: time.Second, + }, + Pump: FabricSessionPumpOptions{ + OutboundBuffer: 4, + InboundBuffer: 4, + }, + } + + first, err := manager.Get(ctx, target) + if err != nil { + t.Fatalf("first get: %v", err) + } + second, err := manager.Get(ctx, target) + if err != nil { + t.Fatalf("second get: %v", err) + } + if first != second { + t.Fatal("manager did not reuse peer pump") + } + if opened != 1 { + t.Fatalf("opened sessions = %d, want 1", opened) + } + if err := first.Send(ctx, fabricproto.Frame{ + Type: fabricproto.FramePing, + Sequence: 1, + Payload: []byte("manager"), + }); err != nil { + t.Fatalf("send ping: %v", err) + } + select { + case frame := <-first.Frames(): + if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "manager" { + t.Fatalf("frame = %+v", frame) + } + case err := <-first.Errors(): + t.Fatalf("pump error: %v", err) + case <-ctx.Done(): + t.Fatal(ctx.Err()) + } +} + +func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) { + var opened int + server := httptest.NewServer(Server{ + Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}, + FabricSessionEnabled: true, + FabricSessionLogger: func(entry FabricSessionEventLogEntry) { + if entry.Event == "fabric_session_websocket_opened" { + opened++ + } + }, + }.Handler()) + defer server.Close() + + manager := NewFabricSessionPeerManager() + defer manager.Close() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + target := FabricSessionPeerTarget{ + PeerID: "node-a", + BaseURL: server.URL, + Options: FabricSessionDialOptions{ + Token: "rap_fsn_manager_reopen", + Timeout: time.Second, + }, + } + + first, err := manager.Get(ctx, target) + if err != nil { + t.Fatalf("first get: %v", err) + } + if err := manager.ClosePeer(target); err != nil { + t.Fatalf("close peer: %v", err) + } + second, err := manager.Get(ctx, target) + if err != nil { + t.Fatalf("second get: %v", err) + } + if first == second { + t.Fatal("manager reused pump after close peer") + } + if opened != 2 { + t.Fatalf("opened sessions = %d, want 2", opened) + } +} + +func TestFabricSessionPeerManagerRejectsIncompleteTarget(t *testing.T) { + manager := NewFabricSessionPeerManager() + _, err := manager.Get(context.Background(), FabricSessionPeerTarget{PeerID: "node-a"}) + if err == nil { + t.Fatal("incomplete target unexpectedly succeeded") + } +} diff --git a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md index 31e1a4e..000809e 100644 --- a/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md +++ b/docs/architecture/DISTRIBUTED_FABRIC_NODE_PROTOCOL_PLAN.md @@ -278,6 +278,8 @@ VPN packet inbox by stream id. `mesh-live-smoke` now sends a real VPN packet batch through `FabricSessionPacketTransport` over the WebSocket fabric session and requires a stream ACK from the remote node. +Mesh has a peer session manager that reuses one pump per peer endpoint, giving +VPN transport selection a stable place to acquire long-lived fabric sessions. Deliverables: