3
This commit is contained in:
@@ -6,13 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
@@ -20,38 +14,6 @@ type Client struct {
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
type FabricSessionDialOptions struct {
|
||||
Token string
|
||||
Header http.Header
|
||||
Dialer *websocket.Dialer
|
||||
Timeout time.Duration
|
||||
MaxPayload int
|
||||
}
|
||||
|
||||
type FabricSessionClient struct {
|
||||
conn *websocket.Conn
|
||||
timeout time.Duration
|
||||
maxPayload int
|
||||
readMu sync.Mutex
|
||||
writeMu sync.Mutex
|
||||
}
|
||||
|
||||
type FabricSessionPumpOptions struct {
|
||||
OutboundBuffer int
|
||||
InboundBuffer int
|
||||
ErrorBuffer int
|
||||
}
|
||||
|
||||
type FabricSessionPump struct {
|
||||
session *FabricSessionClient
|
||||
outbound chan fabricproto.Frame
|
||||
inbound chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
cancel context.CancelFunc
|
||||
closeMu sync.Once
|
||||
}
|
||||
|
||||
func NewClient(baseURL string) Client {
|
||||
return Client{
|
||||
BaseURL: baseURL,
|
||||
@@ -147,270 +109,3 @@ func (c Client) SendProduction(ctx context.Context, envelope ProductionEnvelope)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c Client) DialFabricSession(ctx context.Context, opts FabricSessionDialOptions) (*websocket.Conn, *http.Response, error) {
|
||||
target, err := c.fabricSessionWebSocketURL()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
header := cloneHeader(opts.Header)
|
||||
if strings.TrimSpace(opts.Token) != "" {
|
||||
header.Set("X-RAP-Fabric-Session-Token", strings.TrimSpace(opts.Token))
|
||||
}
|
||||
dialer := opts.Dialer
|
||||
if dialer == nil {
|
||||
base := *websocket.DefaultDialer
|
||||
if opts.Timeout > 0 {
|
||||
base.HandshakeTimeout = opts.Timeout
|
||||
}
|
||||
dialer = &base
|
||||
}
|
||||
return dialer.DialContext(ctx, target, header)
|
||||
}
|
||||
|
||||
func (c Client) OpenFabricSession(ctx context.Context, opts FabricSessionDialOptions) (*FabricSessionClient, *http.Response, error) {
|
||||
conn, resp, err := c.DialFabricSession(ctx, opts)
|
||||
if err != nil {
|
||||
if resp != nil {
|
||||
return nil, resp, fmt.Errorf("fabric session websocket rejected with status %d: %w", resp.StatusCode, err)
|
||||
}
|
||||
return nil, resp, err
|
||||
}
|
||||
maxPayload := opts.MaxPayload
|
||||
if maxPayload <= 0 {
|
||||
maxPayload = fabricproto.DefaultMaxPayload
|
||||
}
|
||||
return &FabricSessionClient{
|
||||
conn: conn,
|
||||
timeout: opts.Timeout,
|
||||
maxPayload: maxPayload,
|
||||
}, resp, nil
|
||||
}
|
||||
|
||||
func (c Client) SendFabricSessionFrame(ctx context.Context, opts FabricSessionDialOptions, frame fabricproto.Frame) (fabricproto.Frame, error) {
|
||||
session, _, err := c.OpenFabricSession(ctx, opts)
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
defer session.Close()
|
||||
return session.RoundTrip(ctx, frame)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) Close() error {
|
||||
if c == nil || c.conn == nil {
|
||||
return nil
|
||||
}
|
||||
return c.conn.Close()
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) WriteFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if c == nil || c.conn == nil {
|
||||
return fmt.Errorf("fabric session client is closed")
|
||||
}
|
||||
payload, err := fabricproto.MarshalFrame(frame)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.writeMu.Lock()
|
||||
defer c.writeMu.Unlock()
|
||||
c.applyWriteDeadline(ctx)
|
||||
return c.conn.WriteMessage(websocket.BinaryMessage, payload)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) ReadFrame(ctx context.Context) (fabricproto.Frame, error) {
|
||||
if c == nil || c.conn == nil {
|
||||
return fabricproto.Frame{}, fmt.Errorf("fabric session client is closed")
|
||||
}
|
||||
c.readMu.Lock()
|
||||
defer c.readMu.Unlock()
|
||||
c.applyReadDeadline(ctx)
|
||||
messageType, responsePayload, err := c.conn.ReadMessage()
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
if messageType != websocket.BinaryMessage {
|
||||
return fabricproto.Frame{}, fmt.Errorf("fabric session websocket returned non-binary message type %d", messageType)
|
||||
}
|
||||
return fabricproto.UnmarshalFrame(responsePayload, c.maxPayload)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) RoundTrip(ctx context.Context, frame fabricproto.Frame) (fabricproto.Frame, error) {
|
||||
if err := c.WriteFrame(ctx, frame); err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
return c.ReadFrame(ctx)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) StartPump(ctx context.Context, opts FabricSessionPumpOptions) *FabricSessionPump {
|
||||
if opts.OutboundBuffer <= 0 {
|
||||
opts.OutboundBuffer = 64
|
||||
}
|
||||
if opts.InboundBuffer <= 0 {
|
||||
opts.InboundBuffer = 64
|
||||
}
|
||||
if opts.ErrorBuffer <= 0 {
|
||||
opts.ErrorBuffer = 8
|
||||
}
|
||||
pumpCtx, cancel := context.WithCancel(ctx)
|
||||
pump := &FabricSessionPump{
|
||||
session: c,
|
||||
outbound: make(chan fabricproto.Frame, opts.OutboundBuffer),
|
||||
inbound: make(chan fabricproto.Frame, opts.InboundBuffer),
|
||||
errors: make(chan error, opts.ErrorBuffer),
|
||||
done: make(chan struct{}),
|
||||
cancel: cancel,
|
||||
}
|
||||
go pump.writeLoop(pumpCtx)
|
||||
go pump.readLoop(pumpCtx)
|
||||
return pump
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Send(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if p == nil {
|
||||
return fmt.Errorf("fabric session pump is nil")
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-p.done:
|
||||
return fmt.Errorf("fabric session pump is closed")
|
||||
case p.outbound <- frame:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Frames() <-chan fabricproto.Frame {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return p.inbound
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Errors() <-chan error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return p.errors
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Closed() bool {
|
||||
if p == nil {
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-p.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Close() error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
var err error
|
||||
p.closeMu.Do(func() {
|
||||
close(p.done)
|
||||
p.cancel()
|
||||
err = p.session.Close()
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) writeLoop(ctx context.Context) {
|
||||
defer p.Close()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
p.reportError(ctx.Err())
|
||||
return
|
||||
case <-p.done:
|
||||
return
|
||||
case frame := <-p.outbound:
|
||||
if err := p.session.WriteFrame(ctx, frame); err != nil {
|
||||
p.reportError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) readLoop(ctx context.Context) {
|
||||
defer p.Close()
|
||||
for {
|
||||
frame, err := p.session.ReadFrame(ctx)
|
||||
if err != nil {
|
||||
p.reportError(err)
|
||||
return
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
p.reportError(ctx.Err())
|
||||
return
|
||||
case <-p.done:
|
||||
return
|
||||
case p.inbound <- frame:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) reportError(err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case p.errors <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) applyReadDeadline(ctx context.Context) {
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
_ = c.conn.SetReadDeadline(deadline)
|
||||
} else if c.timeout > 0 {
|
||||
_ = c.conn.SetReadDeadline(time.Now().Add(c.timeout))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) applyWriteDeadline(ctx context.Context) {
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
_ = c.conn.SetWriteDeadline(deadline)
|
||||
} else if c.timeout > 0 {
|
||||
_ = c.conn.SetWriteDeadline(time.Now().Add(c.timeout))
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) fabricSessionWebSocketURL() (string, error) {
|
||||
base := strings.TrimSpace(c.BaseURL)
|
||||
if base == "" {
|
||||
return "", fmt.Errorf("mesh base url is required")
|
||||
}
|
||||
parsed, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch parsed.Scheme {
|
||||
case "http":
|
||||
parsed.Scheme = "ws"
|
||||
case "https":
|
||||
parsed.Scheme = "wss"
|
||||
case "ws", "wss":
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported mesh base url scheme %q", parsed.Scheme)
|
||||
}
|
||||
parsed.Path = strings.TrimRight(parsed.Path, "/") + "/mesh/v1/fabric/session/ws"
|
||||
parsed.RawQuery = ""
|
||||
parsed.Fragment = ""
|
||||
return parsed.String(), nil
|
||||
}
|
||||
|
||||
func cloneHeader(header http.Header) http.Header {
|
||||
out := http.Header{}
|
||||
for key, values := range header {
|
||||
for _, value := range values {
|
||||
out.Add(key, value)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
response, err := client.SendFabricSessionFrame(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_clienttest",
|
||||
Timeout: time.Second,
|
||||
}, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 12,
|
||||
Payload: []byte("probe"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send fabric session frame: %v", err)
|
||||
}
|
||||
if response.Type != fabricproto.FramePong || response.Sequence != 12 || string(response.Payload) != "probe" {
|
||||
t.Fatalf("response = %+v, want pong seq 12", response)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_persistent",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
first, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 1,
|
||||
Payload: []byte("first"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("first round trip: %v", err)
|
||||
}
|
||||
second, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 2,
|
||||
Payload: []byte("second"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("second round trip: %v", err)
|
||||
}
|
||||
if first.Type != fabricproto.FramePong || first.Sequence != 1 || string(first.Payload) != "first" {
|
||||
t.Fatalf("first response = %+v, want pong seq 1", first)
|
||||
}
|
||||
if second.Type != fabricproto.FramePong || second.Sequence != 2 || string(second.Payload) != "second" {
|
||||
t.Fatalf("second response = %+v, want pong seq 2", second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_dataacks",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
if err := session.WriteFrame(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
StreamID: 77,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
}); err != nil {
|
||||
t.Fatalf("open stream frame: %v", err)
|
||||
}
|
||||
|
||||
first, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 77,
|
||||
Sequence: 10,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
Payload: []byte("first payload"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("first data round trip: %v", err)
|
||||
}
|
||||
second, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 77,
|
||||
Sequence: 11,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
Payload: []byte("second payload"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("second data round trip: %v", err)
|
||||
}
|
||||
if first.Type != fabricproto.FrameAck || first.StreamID != 77 || first.Sequence != 10 {
|
||||
t.Fatalf("first ack = %+v, want stream 77 seq 10", first)
|
||||
}
|
||||
if second.Type != fabricproto.FrameAck || second.StreamID != 77 || second.Sequence != 11 {
|
||||
t.Fatalf("second ack = %+v, want stream 77 seq 11", second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_pump",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
pump := session.StartPump(ctx, FabricSessionPumpOptions{
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
defer pump.Close()
|
||||
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
StreamID: 900,
|
||||
TrafficClass: fabricproto.TrafficClassBulk,
|
||||
}); err != nil {
|
||||
t.Fatalf("send open bulk stream: %v", err)
|
||||
}
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 900,
|
||||
Sequence: 31,
|
||||
TrafficClass: fabricproto.TrafficClassBulk,
|
||||
Payload: []byte("bulk payload"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send bulk data: %v", err)
|
||||
}
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 32,
|
||||
Payload: []byte("control ping"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send ping: %v", err)
|
||||
}
|
||||
|
||||
gotAck := false
|
||||
gotPong := false
|
||||
for !gotAck || !gotPong {
|
||||
select {
|
||||
case frame := <-pump.Frames():
|
||||
switch {
|
||||
case frame.Type == fabricproto.FrameAck && frame.StreamID == 900 && frame.Sequence == 31:
|
||||
gotAck = true
|
||||
case frame.Type == fabricproto.FramePong && frame.Sequence == 32 && string(frame.Payload) == "control ping":
|
||||
gotPong = true
|
||||
}
|
||||
case err := <-pump.Errors():
|
||||
t.Fatalf("pump error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatalf("timed out waiting for pump frames: ack=%v pong=%v", gotAck, gotPong)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionReportsRejectedStatus(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_, err := client.SendFabricSessionFrame(ctx, FabricSessionDialOptions{}, fabricproto.Frame{Type: fabricproto.FramePing})
|
||||
if err == nil {
|
||||
t.Fatal("send fabric session without token unexpectedly succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionWebSocketURL(t *testing.T) {
|
||||
cases := []struct {
|
||||
base string
|
||||
want string
|
||||
}{
|
||||
{base: "http://node.example", want: "ws://node.example/mesh/v1/fabric/session/ws"},
|
||||
{base: "https://node.example/base/", want: "wss://node.example/base/mesh/v1/fabric/session/ws"},
|
||||
{base: "ws://node.example", want: "ws://node.example/mesh/v1/fabric/session/ws"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
client := NewClient(tc.base)
|
||||
got, err := client.fabricSessionWebSocketURL()
|
||||
if err != nil {
|
||||
t.Fatalf("fabricSessionWebSocketURL(%q): %v", tc.base, err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Fatalf("fabricSessionWebSocketURL(%q) = %q, want %q", tc.base, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
var fabricControlForwardSequence atomic.Uint64
|
||||
|
||||
type FabricControlForwardResult struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
LatencyMs int64 `json:"latency_ms"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
}
|
||||
|
||||
func FabricTransportTargetFromRegistryEndpoint(endpoint FabricRegistryEndpoint) FabricTransportTarget {
|
||||
return FabricTransportTarget{
|
||||
EndpointID: strings.TrimSpace(endpoint.EndpointID),
|
||||
PeerID: strings.TrimSpace(endpoint.EndpointID),
|
||||
Endpoint: strings.TrimSpace(endpoint.Address),
|
||||
Transport: strings.TrimSpace(endpoint.Transport),
|
||||
PeerCertSHA256: strings.TrimSpace(endpoint.PeerCertSHA256),
|
||||
Timeout: 5 * time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
}
|
||||
}
|
||||
|
||||
func SendFabricControlForward(ctx context.Context, transport FabricTransport, endpoint FabricRegistryEndpoint, payload []byte, timeout time.Duration) (FabricControlForwardResult, error) {
|
||||
if transport == nil {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control transport is unavailable")
|
||||
}
|
||||
if len(payload) == 0 {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control payload is empty")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 5 * time.Second
|
||||
}
|
||||
target := FabricTransportTargetFromRegistryEndpoint(endpoint)
|
||||
target.Timeout = timeout
|
||||
session, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
defer session.Close()
|
||||
sequence := fabricControlForwardSequence.Add(1)
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: FabricControlForwardQUICStreamID,
|
||||
Sequence: sequence,
|
||||
Payload: append([]byte(nil), payload...),
|
||||
}); err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
waitCtx := ctx
|
||||
var cancel context.CancelFunc
|
||||
if timeout > 0 {
|
||||
waitCtx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
startedAt := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
return FabricControlForwardResult{}, waitCtx.Err()
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control session closed")
|
||||
}
|
||||
if err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control session closed")
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID || frame.Sequence != sequence {
|
||||
continue
|
||||
}
|
||||
return FabricControlForwardResult{
|
||||
Payload: append(json.RawMessage(nil), frame.Payload...),
|
||||
LatencyMs: time.Since(startedAt).Milliseconds(),
|
||||
Endpoint: endpoint.Address,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -565,6 +565,43 @@ func TestQUICFabricServerHandlesWebIngressForwardFrames(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFabricControlForwardUsesQUICStream(t *testing.T) {
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if string(payload) != `{"method":"GET","path":"/auth/login"}` {
|
||||
return nil, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
return []byte(`{"status_code":200,"body":{"ok":true}}`), nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
result, err := SendFabricControlForward(ctx, NewQUICFabricTransport(nil), FabricRegistryEndpoint{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: testQUICCertSHA256(t, tlsConfig),
|
||||
}, []byte(`{"method":"GET","path":"/auth/login"}`), time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("send fabric control forward: %v", err)
|
||||
}
|
||||
var response quicFabricControlForwardResponse
|
||||
if err := json.Unmarshal(result.Payload, &response); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.Error != "" || string(response.Payload) != `{"status_code":200,"body":{"ok":true}}` {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
}
|
||||
|
||||
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
|
||||
t.Helper()
|
||||
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
|
||||
|
||||
@@ -164,6 +164,7 @@ func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata Fabri
|
||||
case FabricRouteRelay:
|
||||
relayNodeID := firstNonEmpty(strings.TrimSpace(metadata.RelayNodeID), strings.TrimSpace(metadata.ViaNodeID))
|
||||
relayEndpoint := firstNonEmpty(strings.TrimRight(strings.TrimSpace(metadata.RelayEndpoint), "/"), endpoint)
|
||||
relayPeerCertSHA256 := candidatePeerCertSHA256(candidate)
|
||||
hops := []FabricRouteHop{}
|
||||
if localNodeID != "" {
|
||||
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: FabricRouteDirect})
|
||||
@@ -173,7 +174,7 @@ func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata Fabri
|
||||
return hops
|
||||
}
|
||||
hops = append(hops,
|
||||
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint},
|
||||
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint, PeerCertSHA256: relayPeerCertSHA256},
|
||||
FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)},
|
||||
)
|
||||
return hops
|
||||
|
||||
@@ -44,7 +44,13 @@ func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"})
|
||||
metadata, _ := json.Marshal(struct {
|
||||
FabricCandidateMetadata
|
||||
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||
}{
|
||||
FabricCandidateMetadata: FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"},
|
||||
TLSCertSHA256: "relay-cert",
|
||||
})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
@@ -69,6 +75,9 @@ func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T
|
||||
if got := routeSet.Primary.Hops[1].NodeID; got != "node-r" {
|
||||
t.Fatalf("relay hop = %q, want node-r", got)
|
||||
}
|
||||
if got := routeSet.Primary.Hops[1].PeerCertSHA256; got != "relay-cert" {
|
||||
t.Fatalf("relay hop peer cert = %q, want relay-cert", got)
|
||||
}
|
||||
if routeSet.Primary.Capacity != 50 {
|
||||
t.Fatalf("capacity = %d, want 50", routeSet.Primary.Capacity)
|
||||
}
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type FabricSessionPeerManager struct {
|
||||
mu sync.Mutex
|
||||
sessions map[string]*FabricSessionPump
|
||||
stats FabricSessionPeerManagerStats
|
||||
}
|
||||
|
||||
type FabricSessionPeerTarget struct {
|
||||
PeerID string
|
||||
BaseURL string
|
||||
Options FabricSessionDialOptions
|
||||
Pump FabricSessionPumpOptions
|
||||
}
|
||||
|
||||
type FabricSessionPeerManagerStats struct {
|
||||
Opens uint64 `json:"opens"`
|
||||
Reuses uint64 `json:"reuses"`
|
||||
ClosedEvicted uint64 `json:"closed_evicted"`
|
||||
ClosePeerCalls uint64 `json:"close_peer_calls"`
|
||||
CloseAllCalls uint64 `json:"close_all_calls"`
|
||||
}
|
||||
|
||||
type FabricSessionPeerManagerSnapshot struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ActiveCount int `json:"active_count"`
|
||||
ClosedCount int `json:"closed_count"`
|
||||
Stats FabricSessionPeerManagerStats `json:"stats"`
|
||||
}
|
||||
|
||||
func NewFabricSessionPeerManager() *FabricSessionPeerManager {
|
||||
return &FabricSessionPeerManager{
|
||||
sessions: map[string]*FabricSessionPump{},
|
||||
}
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Get(ctx context.Context, target FabricSessionPeerTarget) (*FabricSessionPump, error) {
|
||||
if m == nil {
|
||||
return nil, fmt.Errorf("fabric session peer manager is nil")
|
||||
}
|
||||
key, err := fabricSessionPeerKey(target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.mu.Lock()
|
||||
if pump := m.sessions[key]; pump != nil {
|
||||
if pump.Closed() {
|
||||
delete(m.sessions, key)
|
||||
m.stats.ClosedEvicted++
|
||||
} else {
|
||||
m.stats.Reuses++
|
||||
m.mu.Unlock()
|
||||
return pump, nil
|
||||
}
|
||||
}
|
||||
m.mu.Unlock()
|
||||
|
||||
session, _, err := NewClient(target.BaseURL).OpenFabricSession(ctx, target.Options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pump := session.StartPump(context.Background(), target.Pump)
|
||||
|
||||
m.mu.Lock()
|
||||
if existing := m.sessions[key]; existing != nil {
|
||||
if existing.Closed() {
|
||||
delete(m.sessions, key)
|
||||
m.stats.ClosedEvicted++
|
||||
} else {
|
||||
m.stats.Reuses++
|
||||
m.mu.Unlock()
|
||||
_ = pump.Close()
|
||||
return existing, nil
|
||||
}
|
||||
}
|
||||
if m.sessions == nil {
|
||||
m.sessions = map[string]*FabricSessionPump{}
|
||||
}
|
||||
m.sessions[key] = pump
|
||||
m.stats.Opens++
|
||||
m.mu.Unlock()
|
||||
return pump, nil
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) ClosePeer(target FabricSessionPeerTarget) error {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
key, err := fabricSessionPeerKey(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.stats.ClosePeerCalls++
|
||||
pump := m.sessions[key]
|
||||
delete(m.sessions, key)
|
||||
m.mu.Unlock()
|
||||
if pump == nil {
|
||||
return nil
|
||||
}
|
||||
return pump.Close()
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Close() error {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.stats.CloseAllCalls++
|
||||
sessions := m.sessions
|
||||
m.sessions = map[string]*FabricSessionPump{}
|
||||
m.mu.Unlock()
|
||||
var firstErr error
|
||||
for _, pump := range sessions {
|
||||
if err := pump.Close(); err != nil && firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
return firstErr
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Snapshot() FabricSessionPeerManagerSnapshot {
|
||||
if m == nil {
|
||||
return FabricSessionPeerManagerSnapshot{SchemaVersion: "rap.fabric_session_peer_manager.v1"}
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
snapshot := FabricSessionPeerManagerSnapshot{
|
||||
SchemaVersion: "rap.fabric_session_peer_manager.v1",
|
||||
Stats: m.stats,
|
||||
}
|
||||
for _, pump := range m.sessions {
|
||||
if pump == nil || pump.Closed() {
|
||||
snapshot.ClosedCount++
|
||||
continue
|
||||
}
|
||||
snapshot.ActiveCount++
|
||||
}
|
||||
return snapshot
|
||||
}
|
||||
|
||||
func fabricSessionPeerKey(target FabricSessionPeerTarget) (string, error) {
|
||||
peerID := strings.TrimSpace(target.PeerID)
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(target.BaseURL), "/")
|
||||
if peerID == "" || baseURL == "" {
|
||||
return "", fmt.Errorf("fabric session peer id and base url are required")
|
||||
}
|
||||
return peerID + "\x00" + baseURL, nil
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
Pump: FabricSessionPumpOptions{
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first != second {
|
||||
t.Fatal("manager did not reuse peer pump")
|
||||
}
|
||||
if opened != 1 {
|
||||
t.Fatalf("opened sessions = %d, want 1", opened)
|
||||
}
|
||||
snapshot := manager.Snapshot()
|
||||
if snapshot.SchemaVersion != "rap.fabric_session_peer_manager.v1" ||
|
||||
snapshot.ActiveCount != 1 ||
|
||||
snapshot.ClosedCount != 0 ||
|
||||
snapshot.Stats.Opens != 1 ||
|
||||
snapshot.Stats.Reuses != 1 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
if err := first.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 1,
|
||||
Payload: []byte("manager"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send ping: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-first.Frames():
|
||||
if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "manager" {
|
||||
t.Fatalf("frame = %+v", frame)
|
||||
}
|
||||
case err := <-first.Errors():
|
||||
t.Fatalf("pump error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager_reopen",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
if err := manager.ClosePeer(target); err != nil {
|
||||
t.Fatalf("close peer: %v", err)
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatal("manager reused pump after close peer")
|
||||
}
|
||||
if opened != 2 {
|
||||
t.Fatalf("opened sessions = %d, want 2", opened)
|
||||
}
|
||||
if snapshot := manager.Snapshot(); snapshot.Stats.ClosePeerCalls != 1 || snapshot.Stats.Opens != 2 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerReopensClosedPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager_closed",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
if err := first.Close(); err != nil {
|
||||
t.Fatalf("close first pump: %v", err)
|
||||
}
|
||||
if !first.Closed() {
|
||||
t.Fatal("first pump should report closed")
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatal("manager reused closed pump")
|
||||
}
|
||||
if opened != 2 {
|
||||
t.Fatalf("opened sessions = %d, want 2", opened)
|
||||
}
|
||||
snapshot := manager.Snapshot()
|
||||
if snapshot.ActiveCount != 1 ||
|
||||
snapshot.Stats.Opens != 2 ||
|
||||
snapshot.Stats.ClosedEvicted != 1 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerRejectsIncompleteTarget(t *testing.T) {
|
||||
manager := NewFabricSessionPeerManager()
|
||||
_, err := manager.Get(context.Background(), FabricSessionPeerTarget{PeerID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("incomplete target unexpectedly succeeded")
|
||||
}
|
||||
}
|
||||
@@ -308,7 +308,7 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
Transport: intent.Transport,
|
||||
PeerCertSHA256: intent.BestPeerCertSHA256,
|
||||
}}
|
||||
if intent.DirectCandidate {
|
||||
if intent.DirectCandidate || peerConnectionShouldProbeDirectUpgrade(intent, cacheEntry) {
|
||||
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
||||
}
|
||||
var lastFailure string
|
||||
@@ -354,7 +354,9 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
result.SelectedCandidateID = probePeer.BestCandidateID
|
||||
result.SelectedEndpoint = probePeer.Endpoint
|
||||
result.LatencyMs = latency
|
||||
if intent.RelayCandidate {
|
||||
if probeTargetUsesDirectQUIC(probeTarget) {
|
||||
result.ConnectionState = m.tracker.RecordSuccessForPeer(probePeer, latency, completedAt)
|
||||
} else if intent.RelayCandidate {
|
||||
result.ConnectionState = m.tracker.RecordRelayReady(probePeer, latency, completedAt)
|
||||
} else {
|
||||
result.ConnectionState = m.tracker.RecordSuccessForPeer(probePeer, latency, completedAt)
|
||||
@@ -410,6 +412,10 @@ func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer P
|
||||
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
||||
fallbackPeerCertSHA256 := firstNonEmpty(
|
||||
strings.TrimSpace(cacheEntry.BestPeerCertSHA256),
|
||||
strings.TrimSpace(intent.BestPeerCertSHA256),
|
||||
)
|
||||
add := func(candidateID, endpoint, transport, peerCertSHA256 string) {
|
||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||
if endpoint == "" {
|
||||
@@ -423,6 +429,9 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
if strings.TrimSpace(peerCertSHA256) == "" {
|
||||
peerCertSHA256 = fallbackPeerCertSHA256
|
||||
}
|
||||
out = append(out, peerConnectionProbeTarget{
|
||||
CandidateID: strings.TrimSpace(candidateID),
|
||||
Endpoint: endpoint,
|
||||
@@ -440,6 +449,31 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
||||
return out
|
||||
}
|
||||
|
||||
func peerConnectionShouldProbeDirectUpgrade(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) bool {
|
||||
if intent.DirectCandidate {
|
||||
return true
|
||||
}
|
||||
if strings.TrimSpace(intent.ConnectionState) != PeerConnectionRelayReady &&
|
||||
!intent.RelayCandidate &&
|
||||
strings.TrimSpace(intent.TransportMode) != PeerTransportModeRelayControl {
|
||||
return false
|
||||
}
|
||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||
if candidateUsableForDirectProbe(candidate) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func probeTargetUsesDirectQUIC(target peerConnectionProbeTarget) bool {
|
||||
transport := strings.ToLower(strings.TrimSpace(target.Transport))
|
||||
if strings.Contains(transport, "relay") || strings.Contains(transport, "reverse") || strings.Contains(transport, "outbound") {
|
||||
return false
|
||||
}
|
||||
return peerConnectionTargetIsQUIC(target.Transport, target.Endpoint)
|
||||
}
|
||||
|
||||
func peerConnectionTargetIsQUIC(transport string, endpoint string) bool {
|
||||
return isQUICOnlyCandidateTransport(transport) || strings.HasPrefix(strings.ToLower(strings.TrimSpace(endpoint)), "quic://")
|
||||
}
|
||||
|
||||
@@ -221,6 +221,125 @@ func TestPeerConnectionProbeTargetKeepsPeerForLocalRelayReverseQUIC(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsFallsBackToBestPeerCertSHA256(t *testing.T) {
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "intent-cert",
|
||||
}
|
||||
cacheEntry := PeerCacheEntry{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "cache-cert",
|
||||
BestCandidateID: "node-b-best",
|
||||
BestTransport: "direct_quic",
|
||||
Endpoint: "quic://94.141.118.222:19199",
|
||||
EndpointCandidates: []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19199",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
targets := peerConnectionProbeTargets(intent, cacheEntry)
|
||||
if len(targets) != 1 {
|
||||
t.Fatalf("target count = %d, want 1", len(targets))
|
||||
}
|
||||
for _, target := range targets {
|
||||
if target.Endpoint != "quic://94.141.118.222:19199" {
|
||||
continue
|
||||
}
|
||||
if target.PeerCertSHA256 != "cache-cert" {
|
||||
t.Fatalf("peer cert = %q, want cache-cert", target.PeerCertSHA256)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsUpgradeRelayReadyPeerToDirectQUIC(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
leases := []PeerRendezvousLease{{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "quic://127.0.0.1:1",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
}}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
},
|
||||
},
|
||||
RendezvousLeases: leases,
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
RendezvousLeases: leases,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 1 || cycle.Succeeded != 1 || len(cycle.Results) != 1 {
|
||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||
}
|
||||
result := cycle.Results[0]
|
||||
if result.SelectedCandidateID != "node-b-direct" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||
t.Fatalf("relay-ready peer did not upgrade to direct candidate: %+v", result)
|
||||
}
|
||||
if result.ConnectionState.State != PeerConnectionReady {
|
||||
t.Fatalf("connection state = %q, want ready", result.ConnectionState.State)
|
||||
}
|
||||
if len(result.CandidateResults) == 0 || result.CandidateResults[0].Transport != "direct_quic" || result.CandidateResults[0].LinkStatus != PeerConnectionProbeReachable {
|
||||
t.Fatalf("candidate trail missing direct probe success: %+v", result.CandidateResults)
|
||||
}
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.Ready != 1 || snapshot.RelayReady != 0 {
|
||||
t.Fatalf("unexpected tracker snapshot after direct upgrade: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
|
||||
@@ -102,8 +102,11 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
continue
|
||||
}
|
||||
switch connection.State {
|
||||
case PeerConnectionReady, PeerConnectionRelayReady:
|
||||
case PeerConnectionReady:
|
||||
ready++
|
||||
case PeerConnectionRelayReady:
|
||||
// Relay-ready peers remain valuable for control-plane reachability,
|
||||
// but they do not satisfy the target for direct-ready transport paths.
|
||||
case PeerConnectionDegraded:
|
||||
degraded++
|
||||
case PeerConnectionBackoff:
|
||||
|
||||
@@ -69,7 +69,7 @@ func TestPeerRecoveryPlanAddsRecoverySeedWhenReadyDeficit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
func TestPeerRecoveryPlanTreatsRelayReadyPeersAsRecoveryGap(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
@@ -92,12 +92,15 @@ func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.Mode != PeerRecoveryModeSteady || !plan.Healthy {
|
||||
t.Fatalf("unexpected steady plan: %+v", plan)
|
||||
if plan.Mode != PeerRecoveryModeRecovery || plan.Healthy {
|
||||
t.Fatalf("unexpected relay-ready recovery plan: %+v", plan)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-c", "maintain_ready") {
|
||||
t.Fatalf("relay-ready peer was not maintained: %+v", plan.Candidates)
|
||||
}
|
||||
if plan.ReadyPeerCount != 0 || plan.Deficit != 1 {
|
||||
t.Fatalf("relay-ready peer should not satisfy direct-ready target: %+v", plan)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,713 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
const (
|
||||
FabricRegistryGossipRecordSchema = "rap.fabric.registry.gossip_record.v1"
|
||||
|
||||
FabricRegistryScopeFarm = "farm"
|
||||
FabricRegistryScopeCluster = "cluster"
|
||||
FabricRegistryScopeOrganization = "organization"
|
||||
|
||||
FabricRegistryServiceControlAPI = "control-api"
|
||||
FabricRegistryServiceUpdateStore = "update-store"
|
||||
FabricRegistryServiceUpdateCache = "update-cache"
|
||||
FabricRegistryServiceWebAdmin = "web-admin"
|
||||
FabricRegistryServiceVPNExitPool = "vpn-egress-pool"
|
||||
|
||||
FabricRegistryAuthorityControl = "control-authority"
|
||||
FabricRegistryAuthorityUpdate = "update-authority"
|
||||
FabricRegistryAuthorityStorage = "storage-authority"
|
||||
FabricRegistryAuthorityRoute = "route-authority"
|
||||
)
|
||||
|
||||
type FabricRegistryEndpoint struct {
|
||||
EndpointID string `json:"endpoint_id"`
|
||||
Address string `json:"address"`
|
||||
Transport string `json:"transport"`
|
||||
Reachability string `json:"reachability,omitempty"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
Priority int `json:"priority,omitempty"`
|
||||
Weight int `json:"weight,omitempty"`
|
||||
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||
LastVerifiedAt *time.Time `json:"last_verified_at,omitempty"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistrySignature struct {
|
||||
KeyID string `json:"key_id"`
|
||||
IssuerID string `json:"issuer_id"`
|
||||
Role string `json:"role"`
|
||||
Alg string `json:"alg"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
type FabricRegistryGossipRecord struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
Epoch int64 `json:"epoch"`
|
||||
Generation string `json:"generation,omitempty"`
|
||||
IssuedAt time.Time `json:"issued_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
IssuerNodeID string `json:"issuer_node_id"`
|
||||
IssuerRole string `json:"issuer_role"`
|
||||
Endpoints []FabricRegistryEndpoint `json:"endpoints"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
Signatures []FabricRegistrySignature `json:"signatures,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryTrustedIssuer struct {
|
||||
IssuerID string
|
||||
Role string
|
||||
PublicKey ed25519.PublicKey
|
||||
Scopes []string
|
||||
Services []string
|
||||
}
|
||||
|
||||
type FabricRegistryVerificationPolicy struct {
|
||||
LocalClusterID string
|
||||
TrustedIssuers []FabricRegistryTrustedIssuer
|
||||
RequiredSignatures int
|
||||
MaxClockSkew time.Duration
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type FabricRegistryVerificationResult struct {
|
||||
AcceptedSignatureCount int `json:"accepted_signature_count"`
|
||||
AcceptedIssuers []string `json:"accepted_issuers,omitempty"`
|
||||
RecordHash string `json:"record_hash"`
|
||||
}
|
||||
|
||||
type FabricRegistryEntryState string
|
||||
|
||||
const (
|
||||
FabricRegistryCandidate FabricRegistryEntryState = "candidate"
|
||||
FabricRegistryActive FabricRegistryEntryState = "active"
|
||||
FabricRegistryExpired FabricRegistryEntryState = "expired"
|
||||
FabricRegistryRejected FabricRegistryEntryState = "rejected"
|
||||
)
|
||||
|
||||
type FabricRegistryEntry struct {
|
||||
Record FabricRegistryGossipRecord `json:"record"`
|
||||
State FabricRegistryEntryState `json:"state"`
|
||||
AcceptedAt time.Time `json:"accepted_at"`
|
||||
PromotedAt *time.Time `json:"promoted_at,omitempty"`
|
||||
VerifyResult FabricRegistryVerificationResult `json:"verify_result"`
|
||||
}
|
||||
|
||||
type FabricRegistryBootstrapReport struct {
|
||||
Total int `json:"total"`
|
||||
Active int `json:"active"`
|
||||
Candidate int `json:"candidate"`
|
||||
Rejected int `json:"rejected"`
|
||||
Rejects []string `json:"rejects,omitempty"`
|
||||
RecordKeys []string `json:"record_keys,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryResolveRequest struct {
|
||||
ClusterID string
|
||||
Service string
|
||||
Scope string
|
||||
OrganizationID string
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type FabricRegistryResolvedService struct {
|
||||
Found bool `json:"found"`
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope,omitempty"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
RecordEpoch int64 `json:"record_epoch,omitempty"`
|
||||
RecordHash string `json:"record_hash,omitempty"`
|
||||
Endpoints []FabricRegistryEndpoint `json:"endpoints,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryLiveProbeRequest struct {
|
||||
ClusterID string
|
||||
PreferredRegion string
|
||||
Timeout time.Duration
|
||||
Now time.Time
|
||||
MaxCandidates int
|
||||
}
|
||||
|
||||
type FabricRegistryLiveProbeResult struct {
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
EndpointID string `json:"endpoint_id,omitempty"`
|
||||
Address string `json:"address,omitempty"`
|
||||
Status string `json:"status"`
|
||||
LatencyMs int64 `json:"latency_ms,omitempty"`
|
||||
Promoted bool `json:"promoted"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistrySnapshot struct {
|
||||
Active int `json:"active"`
|
||||
Candidate int `json:"candidate"`
|
||||
ActiveKeys []string `json:"active_keys,omitempty"`
|
||||
CandidateKeys []string `json:"candidate_keys,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistry struct {
|
||||
entries map[string]FabricRegistryEntry
|
||||
candidates map[string]FabricRegistryEntry
|
||||
}
|
||||
|
||||
func NewFabricRegistry() *FabricRegistry {
|
||||
return &FabricRegistry{entries: map[string]FabricRegistryEntry{}, candidates: map[string]FabricRegistryEntry{}}
|
||||
}
|
||||
|
||||
func LoadFabricRegistryBootstrapRecords(recordsJSON string, policy FabricRegistryVerificationPolicy, liveVerified bool) (*FabricRegistry, FabricRegistryBootstrapReport, error) {
|
||||
registry := NewFabricRegistry()
|
||||
recordsJSON = strings.TrimSpace(recordsJSON)
|
||||
if recordsJSON == "" {
|
||||
return registry, FabricRegistryBootstrapReport{}, nil
|
||||
}
|
||||
var records []FabricRegistryGossipRecord
|
||||
if err := json.Unmarshal([]byte(recordsJSON), &records); err != nil {
|
||||
return nil, FabricRegistryBootstrapReport{}, fmt.Errorf("decode fabric registry bootstrap records: %w", err)
|
||||
}
|
||||
report := FabricRegistryBootstrapReport{Total: len(records)}
|
||||
for _, record := range records {
|
||||
entry, changed, err := registry.ApplyGossipRecord(record, policy, liveVerified)
|
||||
if err != nil {
|
||||
report.Rejected++
|
||||
report.Rejects = append(report.Rejects, err.Error())
|
||||
continue
|
||||
}
|
||||
if !changed {
|
||||
continue
|
||||
}
|
||||
report.RecordKeys = append(report.RecordKeys, fabricRegistryRecordKey(record))
|
||||
switch entry.State {
|
||||
case FabricRegistryActive:
|
||||
report.Active++
|
||||
case FabricRegistryCandidate:
|
||||
report.Candidate++
|
||||
}
|
||||
}
|
||||
return registry, report, nil
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) ApplyGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy, liveVerified bool) (FabricRegistryEntry, bool, error) {
|
||||
if r == nil {
|
||||
return FabricRegistryEntry{}, false, fmt.Errorf("fabric registry is nil")
|
||||
}
|
||||
result, err := VerifyFabricRegistryGossipRecord(record, policy)
|
||||
if err != nil {
|
||||
return FabricRegistryEntry{}, false, err
|
||||
}
|
||||
now := registryNow(policy.Now)
|
||||
key := fabricRegistryRecordKey(record)
|
||||
current, exists := r.entries[key]
|
||||
if exists && !fabricRegistryRecordNewer(record, current.Record, now) {
|
||||
return current, false, nil
|
||||
}
|
||||
state := FabricRegistryCandidate
|
||||
var promotedAt *time.Time
|
||||
if liveVerified {
|
||||
state = FabricRegistryActive
|
||||
t := now
|
||||
promotedAt = &t
|
||||
}
|
||||
entry := FabricRegistryEntry{
|
||||
Record: normalizeFabricRegistryRecord(record),
|
||||
State: state,
|
||||
AcceptedAt: now,
|
||||
PromotedAt: promotedAt,
|
||||
VerifyResult: result,
|
||||
}
|
||||
if state == FabricRegistryActive {
|
||||
r.entries[key] = entry
|
||||
delete(r.candidates, key)
|
||||
return entry, true, nil
|
||||
}
|
||||
if r.candidates == nil {
|
||||
r.candidates = map[string]FabricRegistryEntry{}
|
||||
}
|
||||
r.candidates[key] = entry
|
||||
return entry, true, nil
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) MarkLiveVerified(clusterID, service, scope, organizationID string, now time.Time) bool {
|
||||
if r == nil {
|
||||
return false
|
||||
}
|
||||
key := fabricRegistryKey(clusterID, service, scope, organizationID)
|
||||
entry, ok := r.candidates[key]
|
||||
if !ok || entry.State == FabricRegistryExpired || entry.State == FabricRegistryRejected {
|
||||
return false
|
||||
}
|
||||
t := registryNow(now)
|
||||
entry.State = FabricRegistryActive
|
||||
entry.PromotedAt = &t
|
||||
r.entries[key] = entry
|
||||
delete(r.candidates, key)
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Active(clusterID, service, scope, organizationID string, now time.Time) (FabricRegistryGossipRecord, bool) {
|
||||
if r == nil {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
entry, ok := r.entries[fabricRegistryKey(clusterID, service, scope, organizationID)]
|
||||
if !ok || entry.State != FabricRegistryActive || !entry.Record.ExpiresAt.After(registryNow(now)) {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
return entry.Record, true
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) ResolveService(req FabricRegistryResolveRequest) FabricRegistryResolvedService {
|
||||
service := strings.ToLower(strings.TrimSpace(req.Service))
|
||||
if service == "" {
|
||||
return FabricRegistryResolvedService{Found: false, Reason: "service_required"}
|
||||
}
|
||||
scopeOrder := fabricRegistryScopeResolutionOrder(req.Scope, req.OrganizationID)
|
||||
for _, scope := range scopeOrder {
|
||||
organizationID := strings.TrimSpace(req.OrganizationID)
|
||||
if scope != FabricRegistryScopeOrganization {
|
||||
organizationID = ""
|
||||
}
|
||||
record, ok := r.Active(req.ClusterID, service, scope, organizationID, req.Now)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
endpoints := selectFabricRegistryEndpoints(record.Endpoints, req.PreferredRegion)
|
||||
if len(endpoints) == 0 {
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Scope: scope, OrganizationID: organizationID, Reason: "no_usable_endpoints"}
|
||||
}
|
||||
result, _ := canonicalFabricRegistryPayload(record)
|
||||
sum := sha256.Sum256(result)
|
||||
return FabricRegistryResolvedService{
|
||||
Found: true,
|
||||
Service: service,
|
||||
Scope: scope,
|
||||
OrganizationID: organizationID,
|
||||
RecordEpoch: record.Epoch,
|
||||
RecordHash: hex.EncodeToString(sum[:]),
|
||||
Endpoints: endpoints,
|
||||
}
|
||||
}
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Reason: "no_active_record"}
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Snapshot(now time.Time) FabricRegistrySnapshot {
|
||||
if r == nil {
|
||||
return FabricRegistrySnapshot{}
|
||||
}
|
||||
now = registryNow(now)
|
||||
out := FabricRegistrySnapshot{}
|
||||
for key, entry := range r.entries {
|
||||
if entry.State == FabricRegistryActive && entry.Record.ExpiresAt.After(now) {
|
||||
out.Active++
|
||||
out.ActiveKeys = append(out.ActiveKeys, key)
|
||||
}
|
||||
}
|
||||
for key, entry := range r.candidates {
|
||||
if entry.State == FabricRegistryCandidate && entry.Record.ExpiresAt.After(now) {
|
||||
out.Candidate++
|
||||
out.CandidateKeys = append(out.CandidateKeys, key)
|
||||
}
|
||||
}
|
||||
sort.Strings(out.ActiveKeys)
|
||||
sort.Strings(out.CandidateKeys)
|
||||
return out
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) VerifyCandidates(ctx context.Context, transport FabricTransport, req FabricRegistryLiveProbeRequest) []FabricRegistryLiveProbeResult {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
now := registryNow(req.Now)
|
||||
timeout := req.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 2 * time.Second
|
||||
}
|
||||
maxCandidates := req.MaxCandidates
|
||||
if maxCandidates <= 0 {
|
||||
maxCandidates = 16
|
||||
}
|
||||
candidates := make([]FabricRegistryEntry, 0, len(r.candidates))
|
||||
for _, entry := range r.candidates {
|
||||
if entry.State != FabricRegistryCandidate || !entry.Record.ExpiresAt.After(now) {
|
||||
continue
|
||||
}
|
||||
if clusterID := strings.TrimSpace(req.ClusterID); clusterID != "" && entry.Record.ClusterID != clusterID {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, entry)
|
||||
}
|
||||
sort.SliceStable(candidates, func(i, j int) bool {
|
||||
if candidates[i].Record.Service != candidates[j].Record.Service {
|
||||
return candidates[i].Record.Service < candidates[j].Record.Service
|
||||
}
|
||||
if candidates[i].Record.Scope != candidates[j].Record.Scope {
|
||||
return candidates[i].Record.Scope < candidates[j].Record.Scope
|
||||
}
|
||||
return candidates[i].Record.Epoch > candidates[j].Record.Epoch
|
||||
})
|
||||
if len(candidates) > maxCandidates {
|
||||
candidates = candidates[:maxCandidates]
|
||||
}
|
||||
results := make([]FabricRegistryLiveProbeResult, 0, len(candidates))
|
||||
for _, entry := range candidates {
|
||||
record := entry.Record
|
||||
result := FabricRegistryLiveProbeResult{
|
||||
Service: record.Service,
|
||||
Scope: record.Scope,
|
||||
OrganizationID: record.OrganizationID,
|
||||
Status: "unreachable",
|
||||
}
|
||||
endpoints := selectFabricRegistryEndpoints(record.Endpoints, req.PreferredRegion)
|
||||
if len(endpoints) == 0 {
|
||||
result.Error = "no_usable_endpoints"
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
for _, endpoint := range endpoints {
|
||||
probeCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
latency, err := probeFabricRegistryEndpoint(probeCtx, transport, endpoint, timeout)
|
||||
cancel()
|
||||
result.EndpointID = endpoint.EndpointID
|
||||
result.Address = endpoint.Address
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
continue
|
||||
}
|
||||
result.Status = "reachable"
|
||||
result.LatencyMs = latency.Milliseconds()
|
||||
result.Promoted = r.MarkLiveVerified(record.ClusterID, record.Service, record.Scope, record.OrganizationID, now)
|
||||
result.Error = ""
|
||||
break
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func SignFabricRegistryGossipRecord(record FabricRegistryGossipRecord, issuer FabricRegistryTrustedIssuer, privateKey ed25519.PrivateKey) (FabricRegistryGossipRecord, error) {
|
||||
payload, err := canonicalFabricRegistryPayload(record)
|
||||
if err != nil {
|
||||
return record, err
|
||||
}
|
||||
sig := ed25519.Sign(privateKey, payload)
|
||||
record.Signatures = append(record.Signatures, FabricRegistrySignature{
|
||||
KeyID: firstNonEmpty(issuer.IssuerID, record.IssuerNodeID),
|
||||
IssuerID: firstNonEmpty(issuer.IssuerID, record.IssuerNodeID),
|
||||
Role: firstNonEmpty(issuer.Role, record.IssuerRole),
|
||||
Alg: "ed25519",
|
||||
Value: hex.EncodeToString(sig),
|
||||
})
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func VerifyFabricRegistryGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy) (FabricRegistryVerificationResult, error) {
|
||||
record = normalizeFabricRegistryRecord(record)
|
||||
if err := validateFabricRegistryGossipRecord(record, policy); err != nil {
|
||||
return FabricRegistryVerificationResult{}, err
|
||||
}
|
||||
payload, err := canonicalFabricRegistryPayload(record)
|
||||
if err != nil {
|
||||
return FabricRegistryVerificationResult{}, err
|
||||
}
|
||||
sum := sha256.Sum256(payload)
|
||||
trusted := map[string]FabricRegistryTrustedIssuer{}
|
||||
for _, issuer := range policy.TrustedIssuers {
|
||||
if strings.TrimSpace(issuer.IssuerID) != "" {
|
||||
trusted[issuer.IssuerID] = issuer
|
||||
}
|
||||
if strings.TrimSpace(issuer.IssuerID) != "" && strings.TrimSpace(issuer.Role) != "" {
|
||||
trusted[issuer.IssuerID+"\x00"+issuer.Role] = issuer
|
||||
}
|
||||
}
|
||||
accepted := map[string]struct{}{}
|
||||
for _, signature := range record.Signatures {
|
||||
if strings.ToLower(strings.TrimSpace(signature.Alg)) != "ed25519" {
|
||||
continue
|
||||
}
|
||||
issuer, ok := trusted[strings.TrimSpace(signature.IssuerID)+"\x00"+strings.TrimSpace(signature.Role)]
|
||||
if !ok {
|
||||
issuer, ok = trusted[strings.TrimSpace(signature.IssuerID)]
|
||||
}
|
||||
if !ok || !fabricRegistryIssuerAllowed(issuer, record) {
|
||||
continue
|
||||
}
|
||||
rawSig, err := hex.DecodeString(strings.TrimSpace(signature.Value))
|
||||
if err != nil || len(rawSig) != ed25519.SignatureSize || len(issuer.PublicKey) != ed25519.PublicKeySize {
|
||||
continue
|
||||
}
|
||||
if ed25519.Verify(issuer.PublicKey, payload, rawSig) {
|
||||
accepted[signature.IssuerID] = struct{}{}
|
||||
}
|
||||
}
|
||||
required := policy.RequiredSignatures
|
||||
if required <= 0 {
|
||||
required = 1
|
||||
}
|
||||
if len(accepted) < required {
|
||||
return FabricRegistryVerificationResult{RecordHash: hex.EncodeToString(sum[:])}, fmt.Errorf("fabric registry gossip record lacks required trusted signatures")
|
||||
}
|
||||
issuers := make([]string, 0, len(accepted))
|
||||
for issuer := range accepted {
|
||||
issuers = append(issuers, issuer)
|
||||
}
|
||||
sort.Strings(issuers)
|
||||
return FabricRegistryVerificationResult{
|
||||
AcceptedSignatureCount: len(accepted),
|
||||
AcceptedIssuers: issuers,
|
||||
RecordHash: hex.EncodeToString(sum[:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func validateFabricRegistryGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy) error {
|
||||
if record.SchemaVersion != FabricRegistryGossipRecordSchema {
|
||||
return fmt.Errorf("fabric registry gossip record schema_version is invalid")
|
||||
}
|
||||
if strings.TrimSpace(record.ClusterID) == "" || (strings.TrimSpace(policy.LocalClusterID) != "" && record.ClusterID != policy.LocalClusterID) {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if strings.TrimSpace(record.Service) == "" || strings.TrimSpace(record.Scope) == "" || strings.TrimSpace(record.IssuerNodeID) == "" || strings.TrimSpace(record.IssuerRole) == "" {
|
||||
return fmt.Errorf("fabric registry gossip record is missing service, scope, or issuer")
|
||||
}
|
||||
if record.Epoch <= 0 || record.IssuedAt.IsZero() || record.ExpiresAt.IsZero() || !record.ExpiresAt.After(record.IssuedAt) {
|
||||
return fmt.Errorf("fabric registry gossip record has invalid epoch or validity window")
|
||||
}
|
||||
now := registryNow(policy.Now)
|
||||
skew := policy.MaxClockSkew
|
||||
if skew <= 0 {
|
||||
skew = time.Minute
|
||||
}
|
||||
if record.IssuedAt.After(now.Add(skew)) || !record.ExpiresAt.After(now) {
|
||||
return fmt.Errorf("fabric registry gossip record is not currently valid")
|
||||
}
|
||||
if len(record.Endpoints) == 0 {
|
||||
return fmt.Errorf("fabric registry gossip record has no endpoints")
|
||||
}
|
||||
for _, endpoint := range record.Endpoints {
|
||||
if strings.TrimSpace(endpoint.EndpointID) == "" || strings.TrimSpace(endpoint.Address) == "" || strings.TrimSpace(endpoint.Transport) == "" {
|
||||
return fmt.Errorf("fabric registry gossip record contains invalid endpoint")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
return fmt.Errorf("fabric registry gossip endpoint must be QUIC-only")
|
||||
}
|
||||
if len(endpoint.Metadata) > 0 && !json.Valid(endpoint.Metadata) {
|
||||
return fmt.Errorf("fabric registry gossip endpoint metadata is invalid")
|
||||
}
|
||||
}
|
||||
if len(record.Metadata) > 0 && !json.Valid(record.Metadata) {
|
||||
return fmt.Errorf("fabric registry gossip metadata is invalid")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func canonicalFabricRegistryPayload(record FabricRegistryGossipRecord) ([]byte, error) {
|
||||
record = normalizeFabricRegistryRecord(record)
|
||||
record.Signatures = nil
|
||||
payload, err := json.Marshal(record)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var compact bytes.Buffer
|
||||
if err := json.Compact(&compact, payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return compact.Bytes(), nil
|
||||
}
|
||||
|
||||
func normalizeFabricRegistryRecord(record FabricRegistryGossipRecord) FabricRegistryGossipRecord {
|
||||
record.SchemaVersion = strings.TrimSpace(record.SchemaVersion)
|
||||
record.ClusterID = strings.TrimSpace(record.ClusterID)
|
||||
record.Service = strings.ToLower(strings.TrimSpace(record.Service))
|
||||
record.Scope = strings.ToLower(strings.TrimSpace(record.Scope))
|
||||
record.OrganizationID = strings.TrimSpace(record.OrganizationID)
|
||||
record.IssuerNodeID = strings.TrimSpace(record.IssuerNodeID)
|
||||
record.IssuerRole = strings.TrimSpace(record.IssuerRole)
|
||||
record.Generation = strings.TrimSpace(record.Generation)
|
||||
for i := range record.Endpoints {
|
||||
record.Endpoints[i].EndpointID = strings.TrimSpace(record.Endpoints[i].EndpointID)
|
||||
record.Endpoints[i].Address = strings.TrimSpace(record.Endpoints[i].Address)
|
||||
record.Endpoints[i].Transport = strings.TrimSpace(record.Endpoints[i].Transport)
|
||||
record.Endpoints[i].Reachability = strings.TrimSpace(record.Endpoints[i].Reachability)
|
||||
record.Endpoints[i].ConnectivityMode = strings.TrimSpace(record.Endpoints[i].ConnectivityMode)
|
||||
record.Endpoints[i].Region = strings.TrimSpace(record.Endpoints[i].Region)
|
||||
record.Endpoints[i].PeerCertSHA256 = normalizeCertSHA256(record.Endpoints[i].PeerCertSHA256)
|
||||
}
|
||||
sort.SliceStable(record.Endpoints, func(i, j int) bool {
|
||||
if record.Endpoints[i].Priority != record.Endpoints[j].Priority {
|
||||
return record.Endpoints[i].Priority < record.Endpoints[j].Priority
|
||||
}
|
||||
return record.Endpoints[i].EndpointID < record.Endpoints[j].EndpointID
|
||||
})
|
||||
sort.SliceStable(record.Signatures, func(i, j int) bool {
|
||||
if record.Signatures[i].IssuerID != record.Signatures[j].IssuerID {
|
||||
return record.Signatures[i].IssuerID < record.Signatures[j].IssuerID
|
||||
}
|
||||
return record.Signatures[i].KeyID < record.Signatures[j].KeyID
|
||||
})
|
||||
return record
|
||||
}
|
||||
|
||||
func fabricRegistryIssuerAllowed(issuer FabricRegistryTrustedIssuer, record FabricRegistryGossipRecord) bool {
|
||||
if strings.TrimSpace(issuer.Role) != "" && issuer.Role != record.IssuerRole {
|
||||
return false
|
||||
}
|
||||
if len(issuer.Scopes) > 0 && !stringInSlice(record.Scope, issuer.Scopes) {
|
||||
return false
|
||||
}
|
||||
if len(issuer.Services) > 0 && !stringInSlice(record.Service, issuer.Services) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func fabricRegistryRecordKey(record FabricRegistryGossipRecord) string {
|
||||
return fabricRegistryKey(record.ClusterID, record.Service, record.Scope, record.OrganizationID)
|
||||
}
|
||||
|
||||
func fabricRegistryScopeResolutionOrder(scope string, organizationID string) []string {
|
||||
scope = strings.ToLower(strings.TrimSpace(scope))
|
||||
switch scope {
|
||||
case FabricRegistryScopeOrganization:
|
||||
if strings.TrimSpace(organizationID) != "" {
|
||||
return []string{FabricRegistryScopeOrganization, FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
}
|
||||
return []string{FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
case FabricRegistryScopeFarm:
|
||||
return []string{FabricRegistryScopeFarm}
|
||||
case FabricRegistryScopeCluster, "":
|
||||
return []string{FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
default:
|
||||
return []string{scope, FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
}
|
||||
}
|
||||
|
||||
func selectFabricRegistryEndpoints(endpoints []FabricRegistryEndpoint, preferredRegion string) []FabricRegistryEndpoint {
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
out := make([]FabricRegistryEndpoint, 0, len(endpoints))
|
||||
for _, endpoint := range endpoints {
|
||||
if strings.TrimSpace(endpoint.Address) == "" || !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
continue
|
||||
}
|
||||
out = append(out, endpoint)
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
if preferredRegion != "" {
|
||||
iMatch := strings.EqualFold(out[i].Region, preferredRegion)
|
||||
jMatch := strings.EqualFold(out[j].Region, preferredRegion)
|
||||
if iMatch != jMatch {
|
||||
return iMatch
|
||||
}
|
||||
}
|
||||
if out[i].Priority != out[j].Priority {
|
||||
return out[i].Priority < out[j].Priority
|
||||
}
|
||||
if out[i].Weight != out[j].Weight {
|
||||
return out[i].Weight > out[j].Weight
|
||||
}
|
||||
return out[i].EndpointID < out[j].EndpointID
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
func probeFabricRegistryEndpoint(ctx context.Context, transport FabricTransport, endpoint FabricRegistryEndpoint, timeout time.Duration) (time.Duration, error) {
|
||||
if transport == nil {
|
||||
return 0, fmt.Errorf("fabric registry live probe transport is unavailable")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 2 * time.Second
|
||||
}
|
||||
target := FabricTransportTarget{
|
||||
EndpointID: endpoint.EndpointID,
|
||||
PeerID: endpoint.EndpointID,
|
||||
Endpoint: endpoint.Address,
|
||||
Transport: endpoint.Transport,
|
||||
PeerCertSHA256: endpoint.PeerCertSHA256,
|
||||
Timeout: timeout,
|
||||
InboundBuffer: 2,
|
||||
ErrorBuffer: 2,
|
||||
}
|
||||
startedAt := time.Now()
|
||||
session, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer session.Close()
|
||||
sequence := uint64(startedAt.UnixNano())
|
||||
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, TrafficClass: fabricproto.TrafficClassReliable, Sequence: sequence, Payload: []byte("fabric-registry-live-probe")}); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric registry live probe session closed")
|
||||
}
|
||||
if frame.Type == fabricproto.FramePong && frame.Sequence == sequence {
|
||||
return time.Since(startedAt), nil
|
||||
}
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric registry live probe error channel closed")
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return 0, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fabricRegistryKey(clusterID, service, scope, organizationID string) string {
|
||||
return strings.TrimSpace(clusterID) + "\x00" + strings.ToLower(strings.TrimSpace(service)) + "\x00" + strings.ToLower(strings.TrimSpace(scope)) + "\x00" + strings.TrimSpace(organizationID)
|
||||
}
|
||||
|
||||
func fabricRegistryRecordNewer(next, current FabricRegistryGossipRecord, now time.Time) bool {
|
||||
if !current.ExpiresAt.After(now) {
|
||||
return true
|
||||
}
|
||||
if next.Epoch != current.Epoch {
|
||||
return next.Epoch > current.Epoch
|
||||
}
|
||||
if !next.IssuedAt.Equal(current.IssuedAt) {
|
||||
return next.IssuedAt.After(current.IssuedAt)
|
||||
}
|
||||
return strings.TrimSpace(next.Generation) > strings.TrimSpace(current.Generation)
|
||||
}
|
||||
|
||||
func registryNow(now time.Time) time.Time {
|
||||
if now.IsZero() {
|
||||
return time.Now().UTC()
|
||||
}
|
||||
return now.UTC()
|
||||
}
|
||||
|
||||
func stringInSlice(value string, values []string) bool {
|
||||
value = strings.TrimSpace(value)
|
||||
for _, candidate := range values {
|
||||
if strings.TrimSpace(candidate) == value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFabricRegistryGossipRecordRequiresTrustedSignature(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
record := testFabricRegistryGossipRecord(now, 10)
|
||||
issuer := FabricRegistryTrustedIssuer{
|
||||
IssuerID: "authority-1",
|
||||
Role: FabricRegistryAuthorityControl,
|
||||
PublicKey: publicKey,
|
||||
Scopes: []string{FabricRegistryScopeCluster},
|
||||
Services: []string{FabricRegistryServiceControlAPI},
|
||||
}
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(signed, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("verify signed record: %v", err)
|
||||
}
|
||||
tampered := signed
|
||||
tampered.Endpoints[0].Address = "quic://10.10.10.10:19443"
|
||||
if _, err := VerifyFabricRegistryGossipRecord(tampered, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("tampered record verified")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryRejectsLegacyEndpointAndExpiredRecord(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := testFabricRegistryGossipRecord(now, 10)
|
||||
record.Endpoints[0].Address = "https://control.example.test/api/v1"
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(signed, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{
|
||||
{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey},
|
||||
},
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("legacy HTTP endpoint was accepted")
|
||||
}
|
||||
expired := testFabricRegistryGossipRecord(now.Add(-2*time.Hour), 11)
|
||||
expired.ExpiresAt = now.Add(-time.Minute)
|
||||
expiredSigned, err := SignFabricRegistryGossipRecord(expired, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign expired record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(expiredSigned, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{
|
||||
{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey},
|
||||
},
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("expired record was accepted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryKeepsActiveRecordUntilNewerVerified(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
active, err := SignFabricRegistryGossipRecord(testFabricRegistryGossipRecord(now, 10), issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign active: %v", err)
|
||||
}
|
||||
entry, changed, err := registry.ApplyGossipRecord(active, policy, true)
|
||||
if err != nil || !changed || entry.State != FabricRegistryActive {
|
||||
t.Fatalf("apply active entry changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
old := testFabricRegistryGossipRecord(now.Add(time.Minute), 9)
|
||||
old.Endpoints[0].Address = "quic://192.0.2.9:19443"
|
||||
oldSigned, err := SignFabricRegistryGossipRecord(old, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign old: %v", err)
|
||||
}
|
||||
entry, changed, err = registry.ApplyGossipRecord(oldSigned, policy, true)
|
||||
if err != nil {
|
||||
t.Fatalf("apply old: %v", err)
|
||||
}
|
||||
if changed || entry.Record.Epoch != 10 || entry.Record.Endpoints[0].Address != "quic://192.0.2.10:19443" {
|
||||
t.Fatalf("older record replaced active entry: changed=%t entry=%+v", changed, entry)
|
||||
}
|
||||
newer := testFabricRegistryGossipRecord(now.Add(2*time.Minute), 11)
|
||||
newer.Endpoints[0].Address = "quic://192.0.2.11:19443"
|
||||
newerSigned, err := SignFabricRegistryGossipRecord(newer, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign newer: %v", err)
|
||||
}
|
||||
policy.Now = now.Add(2 * time.Minute)
|
||||
entry, changed, err = registry.ApplyGossipRecord(newerSigned, policy, false)
|
||||
if err != nil || !changed || entry.State != FabricRegistryCandidate {
|
||||
t.Fatalf("apply newer candidate changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
activeRecord, ok := registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now)
|
||||
if !ok || activeRecord.Endpoints[0].Address != "quic://192.0.2.10:19443" {
|
||||
t.Fatalf("unverified newer candidate displaced active fallback: ok=%t record=%+v", ok, activeRecord)
|
||||
}
|
||||
if !registry.MarkLiveVerified("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now.Add(time.Second)) {
|
||||
t.Fatal("mark live verified failed")
|
||||
}
|
||||
activeRecord, ok = registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now.Add(time.Second))
|
||||
if !ok || activeRecord.Endpoints[0].Address != "quic://192.0.2.11:19443" {
|
||||
t.Fatalf("newer verified record not active: ok=%t record=%+v", ok, activeRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryResolveServicePrefersVerifiedScopedRegionalEndpoint(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
clusterRecord := testFabricRegistryGossipRecord(now, 10)
|
||||
clusterRecord.Endpoints = []FabricRegistryEndpoint{
|
||||
{EndpointID: "control-eu", Address: "quic://eu.example.test:19443", Transport: "direct_quic", Region: "eu", Priority: 10, Weight: 1},
|
||||
{EndpointID: "control-us", Address: "quic://us.example.test:19443", Transport: "direct_quic", Region: "us", Priority: 10, Weight: 10},
|
||||
}
|
||||
signedCluster, err := SignFabricRegistryGossipRecord(clusterRecord, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign cluster record: %v", err)
|
||||
}
|
||||
if _, _, err := registry.ApplyGossipRecord(signedCluster, policy, true); err != nil {
|
||||
t.Fatalf("apply cluster record: %v", err)
|
||||
}
|
||||
orgRecord := testFabricRegistryGossipRecord(now.Add(time.Minute), 11)
|
||||
orgRecord.Scope = FabricRegistryScopeOrganization
|
||||
orgRecord.OrganizationID = "org-1"
|
||||
orgRecord.Endpoints = []FabricRegistryEndpoint{
|
||||
{EndpointID: "control-org", Address: "quic://org.example.test:19443", Transport: "direct_quic", Region: "eu", Priority: 1, Weight: 1},
|
||||
}
|
||||
signedOrg, err := SignFabricRegistryGossipRecord(orgRecord, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign org record: %v", err)
|
||||
}
|
||||
policy.Now = now.Add(time.Minute)
|
||||
if _, _, err := registry.ApplyGossipRecord(signedOrg, policy, false); err != nil {
|
||||
t.Fatalf("apply org candidate: %v", err)
|
||||
}
|
||||
resolved := registry.ResolveService(FabricRegistryResolveRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeOrganization,
|
||||
OrganizationID: "org-1",
|
||||
PreferredRegion: "us",
|
||||
Now: now.Add(time.Minute),
|
||||
})
|
||||
if !resolved.Found || resolved.Scope != FabricRegistryScopeCluster || resolved.Endpoints[0].EndpointID != "control-us" {
|
||||
t.Fatalf("expected cluster fallback with preferred region endpoint, got %+v", resolved)
|
||||
}
|
||||
if !registry.MarkLiveVerified("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeOrganization, "org-1", now.Add(2*time.Minute)) {
|
||||
t.Fatal("mark org live verified failed")
|
||||
}
|
||||
resolved = registry.ResolveService(FabricRegistryResolveRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeOrganization,
|
||||
OrganizationID: "org-1",
|
||||
Now: now.Add(2 * time.Minute),
|
||||
})
|
||||
if !resolved.Found || resolved.Scope != FabricRegistryScopeOrganization || resolved.Endpoints[0].EndpointID != "control-org" {
|
||||
t.Fatalf("expected verified organization record, got %+v", resolved)
|
||||
}
|
||||
snapshot := registry.Snapshot(now.Add(2 * time.Minute))
|
||||
if snapshot.Active != 2 || snapshot.Candidate != 0 {
|
||||
t.Fatalf("unexpected snapshot: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryVerifyCandidatesPromotesAfterQUICPong(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
listener := startQUICFabricEchoServerWithTLS(t, tlsConfig)
|
||||
defer listener.Close()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
record := testFabricRegistryGossipRecord(now, 12)
|
||||
record.Endpoints[0].Address = "quic://" + listener.Addr().String()
|
||||
record.Endpoints[0].PeerCertSHA256 = testQUICCertSHA256(t, tlsConfig)
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
if entry, changed, err := registry.ApplyGossipRecord(signed, policy, false); err != nil || !changed || entry.State != FabricRegistryCandidate {
|
||||
t.Fatalf("apply candidate changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
results := registry.VerifyCandidates(context.Background(), NewQUICFabricTransport(nil), FabricRegistryLiveProbeRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Timeout: 3 * time.Second,
|
||||
Now: now.Add(time.Second),
|
||||
MaxCandidates: 1,
|
||||
})
|
||||
if len(results) != 1 || results[0].Status != "reachable" || !results[0].Promoted {
|
||||
t.Fatalf("unexpected live probe results: %+v", results)
|
||||
}
|
||||
if _, ok := registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", now.Add(time.Second)); !ok {
|
||||
t.Fatal("candidate was not promoted to active")
|
||||
}
|
||||
}
|
||||
|
||||
func testFabricRegistryGossipRecord(now time.Time, epoch int64) FabricRegistryGossipRecord {
|
||||
return FabricRegistryGossipRecord{
|
||||
SchemaVersion: FabricRegistryGossipRecordSchema,
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeCluster,
|
||||
Epoch: epoch,
|
||||
Generation: "gen",
|
||||
IssuedAt: now,
|
||||
ExpiresAt: now.Add(10 * time.Minute),
|
||||
IssuerNodeID: "authority-1",
|
||||
IssuerRole: FabricRegistryAuthorityControl,
|
||||
Endpoints: []FabricRegistryEndpoint{
|
||||
{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://192.0.2.10:19443",
|
||||
Transport: "direct_quic",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
type ProductionEnvelopeObserver func(context.Context, ProductionEnvelopeObservation) error
|
||||
@@ -55,6 +54,22 @@ type RemoteWorkspaceFrameSinkSessionMailboxConsumerResume interface {
|
||||
type RemoteWorkspaceFrameSinkSessionMailboxPreflight interface {
|
||||
PreflightAdapterSessionMailboxConsumerResume(adapterSessionID string, consumerID string, resumeFrom string, limit int, now time.Time) (RemoteWorkspaceAdapterMailboxPreflightSnapshot, error)
|
||||
}
|
||||
type FabricSessionEventLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
AcceptedBy string `json:"accepted_by,omitempty"`
|
||||
SessionID string `json:"session_id,omitempty"`
|
||||
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
|
||||
StreamID uint64 `json:"stream_id,omitempty"`
|
||||
Sequence uint64 `json:"sequence,omitempty"`
|
||||
TrafficClass fabricproto.TrafficClass `json:"traffic_class,omitempty"`
|
||||
RemoteAddr string `json:"remote_addr,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
}
|
||||
|
||||
type VPNPacketIngress interface {
|
||||
SendClientPacketBatch(ctx context.Context, clusterID string, vpnConnectionID string, packets [][]byte) error
|
||||
ReceiveClientPacketBatch(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration) ([][]byte, error)
|
||||
@@ -69,24 +84,21 @@ type VPNPacketIngressRoutePreference interface {
|
||||
}
|
||||
|
||||
type Server struct {
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
DisableHTTPDataPlane bool
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
FabricSessionEnabled bool
|
||||
FabricSessionWebSocketEnabled bool
|
||||
FabricSessionLogger FabricSessionEventLogger
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
DisableHTTPDataPlane bool
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
}
|
||||
|
||||
func (s Server) Handler() http.Handler {
|
||||
@@ -94,9 +106,6 @@ func (s Server) Handler() http.Handler {
|
||||
mux.HandleFunc("/mesh/v1/health", s.handleHealth)
|
||||
mux.HandleFunc("/mesh/v1/forward", s.handleForward)
|
||||
mux.HandleFunc("/mesh/v1/synthetic/probe", s.handleSyntheticProbe)
|
||||
if s.FabricSessionEnabled && s.FabricSessionWebSocketEnabled {
|
||||
mux.HandleFunc("/mesh/v1/fabric/session/ws", s.handleFabricSessionWebSocket)
|
||||
}
|
||||
if s.RemoteWorkspaceFrameSink != nil {
|
||||
mux.HandleFunc("/mesh/v1/remote-workspace/adapter-sessions/", s.handleRemoteWorkspaceAdapterSessionControl)
|
||||
}
|
||||
@@ -196,185 +205,6 @@ func (s Server) handleRemoteWorkspaceAdapterSessionSnapshot(w http.ResponseWrite
|
||||
_ = json.NewEncoder(w).Encode(snapshotter.SnapshotAdapterSessions(includeTerminal, limit, time.Now().UTC()))
|
||||
}
|
||||
|
||||
type FabricSessionEventLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
AcceptedBy string `json:"accepted_by,omitempty"`
|
||||
SessionID string `json:"session_id,omitempty"`
|
||||
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
|
||||
StreamID uint64 `json:"stream_id,omitempty"`
|
||||
Sequence uint64 `json:"sequence,omitempty"`
|
||||
TrafficClass fabricproto.TrafficClass `json:"traffic_class,omitempty"`
|
||||
RemoteAddr string `json:"remote_addr,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
}
|
||||
|
||||
type fabricSessionAuthorityPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
SessionID string `json:"session_id"`
|
||||
SourceNodeID string `json:"source_node_id,omitempty"`
|
||||
SelectedEntryNodeID string `json:"selected_entry_node_id,omitempty"`
|
||||
TokenHash string `json:"token_hash"`
|
||||
IssuedAt time.Time `json:"issued_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
}
|
||||
|
||||
type fabricSessionAuthDecision struct {
|
||||
AcceptedBy string
|
||||
SessionID string
|
||||
}
|
||||
|
||||
func (s Server) handleFabricSessionWebSocket(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
decision, ok := s.validateFabricSessionRequest(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
upgrader := websocket.Upgrader{
|
||||
CheckOrigin: func(_ *http.Request) bool { return true },
|
||||
}
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_opened",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
loop := fabricproto.TransportLoop{
|
||||
Session: fabricproto.NewSession(fabricproto.SessionConfig{}),
|
||||
OnEvent: func(event fabricproto.SessionEvent) ([]fabricproto.Frame, error) {
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_event",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
SessionEvent: event.Type,
|
||||
StreamID: event.StreamID,
|
||||
Sequence: event.Sequence,
|
||||
TrafficClass: event.TrafficClass,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
return nil, nil
|
||||
},
|
||||
}
|
||||
err = loop.RunWebSocket(r.Context(), conn, fabricproto.WebSocketTransportConfig{})
|
||||
if err != nil && !errors.Is(err, context.Canceled) {
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_closed",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
Reason: err.Error(),
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
return
|
||||
}
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_closed",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
func (s Server) validateFabricSessionRequest(w http.ResponseWriter, r *http.Request) (fabricSessionAuthDecision, bool) {
|
||||
var decision fabricSessionAuthDecision
|
||||
token := fabricSessionBearerToken(r)
|
||||
if !strings.HasPrefix(token, "rap_fsn_") {
|
||||
http.Error(w, "fabric session token is required", http.StatusUnauthorized)
|
||||
return decision, false
|
||||
}
|
||||
payload, err := s.verifyFabricSessionAuthority(r, token)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusForbidden)
|
||||
return decision, false
|
||||
}
|
||||
decision.AcceptedBy = "legacy_unsigned"
|
||||
if payload != nil {
|
||||
decision.AcceptedBy = "signed"
|
||||
decision.SessionID = strings.TrimSpace(payload.SessionID)
|
||||
}
|
||||
return decision, true
|
||||
}
|
||||
|
||||
func (s Server) verifyFabricSessionAuthority(r *http.Request, token string) (*fabricSessionAuthorityPayload, error) {
|
||||
publicKey := strings.TrimSpace(s.ClusterAuthorityPublicKey)
|
||||
payloadHeader := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Authority-Payload"))
|
||||
signatureHeader := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Authority-Signature"))
|
||||
if payloadHeader == "" && signatureHeader == "" {
|
||||
if publicKey != "" {
|
||||
return nil, fmt.Errorf("%w: signed fabric session authority is required", ErrUnauthorizedChannel)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
if publicKey == "" {
|
||||
return nil, ErrUnauthorizedChannel
|
||||
}
|
||||
if payloadHeader == "" || signatureHeader == "" {
|
||||
return nil, fmt.Errorf("%w: fabric session authority payload and signature are required together", ErrUnauthorizedChannel)
|
||||
}
|
||||
payloadRaw, err := decodeHeaderJSON(payloadHeader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority payload", ErrUnauthorizedChannel)
|
||||
}
|
||||
signatureRaw, err := decodeHeaderJSON(signatureHeader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority signature", ErrUnauthorizedChannel)
|
||||
}
|
||||
var signature authority.Signature
|
||||
if err := json.Unmarshal(signatureRaw, &signature); err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority signature", ErrUnauthorizedChannel)
|
||||
}
|
||||
if err := authority.VerifyRaw(publicKey, payloadRaw, signature); err != nil {
|
||||
return nil, fmt.Errorf("%w: fabric session authority signature rejected", ErrUnauthorizedChannel)
|
||||
}
|
||||
var payload fabricSessionAuthorityPayload
|
||||
if err := json.Unmarshal(payloadRaw, &payload); err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority payload", ErrUnauthorizedChannel)
|
||||
}
|
||||
if payload.SchemaVersion != "rap.fabric_session_authority.v1" ||
|
||||
payload.ClusterID != s.Local.ClusterID ||
|
||||
payload.TokenHash != fabricSessionTokenHash(token) ||
|
||||
strings.TrimSpace(payload.SessionID) == "" {
|
||||
return nil, fmt.Errorf("%w: fabric session authority payload mismatch", ErrUnauthorizedChannel)
|
||||
}
|
||||
if payload.SelectedEntryNodeID != "" && s.Local.NodeID != "" && payload.SelectedEntryNodeID != s.Local.NodeID {
|
||||
return nil, fmt.Errorf("%w: fabric session entry node mismatch", ErrUnauthorizedChannel)
|
||||
}
|
||||
if !payload.ExpiresAt.IsZero() && !payload.ExpiresAt.After(time.Now().UTC()) {
|
||||
return nil, fmt.Errorf("%w: fabric session lease expired", ErrUnauthorizedChannel)
|
||||
}
|
||||
return &payload, nil
|
||||
}
|
||||
|
||||
func (s Server) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||
if s.FabricSessionLogger != nil {
|
||||
s.FabricSessionLogger(entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleRemoteWorkspaceAdapterSessionMailbox(w http.ResponseWriter, r *http.Request) {
|
||||
reader, ok := s.RemoteWorkspaceFrameSink.(RemoteWorkspaceFrameSinkSessionMailbox)
|
||||
if !ok {
|
||||
@@ -711,15 +541,15 @@ func parseRemoteWorkspaceAdapterSessionControlPath(path string) (string, bool) {
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if clusterID, vpnConnectionID, ok := parseVPNClientPacketWebSocketPath(r.URL.Path); ok {
|
||||
s.handleVPNPacketWebSocket(w, r, clusterID, "", vpnConnectionID, false, true, "")
|
||||
if isVPNClientPacketWebSocketPath(r.URL.Path) {
|
||||
http.Error(w, "legacy VPN WebSocket dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
clusterID, vpnConnectionID, ok := parseVPNClientPacketPath(r.URL.Path)
|
||||
if !ok {
|
||||
if _, _, ok := parseVPNClientPacketPath(r.URL.Path); !ok {
|
||||
return false
|
||||
}
|
||||
return s.handleVPNPacketHTTP(w, r, clusterID, "", vpnConnectionID, "", false, true, "")
|
||||
http.Error(w, "legacy VPN HTTP dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
@@ -728,7 +558,7 @@ func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.Response
|
||||
return false
|
||||
}
|
||||
if webSocket {
|
||||
http.Error(w, "remote workspace service-channel websocket forwarding is not implemented", http.StatusNotImplemented)
|
||||
http.Error(w, "remote workspace service-channel websocket ingress is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
decision, valid := s.validateFabricServiceChannelRequest(w, r, clusterID, channelID, resourceID, FabricServiceClassRemoteWorkspace, channelClass)
|
||||
@@ -809,7 +639,7 @@ func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.Response
|
||||
"channel_id": channelID,
|
||||
"resource_id": resourceID,
|
||||
"data_plane": "validated",
|
||||
"payload_flow": "not_implemented",
|
||||
"payload_flow": "validated_only",
|
||||
})
|
||||
return true
|
||||
}
|
||||
@@ -898,7 +728,7 @@ func validateRemoteWorkspaceFrameBatchProbe(payload []byte, requiredChannelClass
|
||||
return decoded, fmt.Errorf("unsupported remote workspace frame batch schema")
|
||||
}
|
||||
if !decoded.ProbeOnly {
|
||||
return decoded, fmt.Errorf("remote workspace payload forwarding is not implemented")
|
||||
return decoded, fmt.Errorf("remote workspace production payload forwarding is disabled; probe_only required")
|
||||
}
|
||||
if strings.TrimSpace(strings.ToLower(decoded.ServiceClass)) != FabricServiceClassRemoteWorkspace {
|
||||
return decoded, fmt.Errorf("remote workspace frame batch service class mismatch")
|
||||
@@ -952,438 +782,6 @@ func isAllowedRemoteWorkspaceAdapterFrameDirection(channel string, direction str
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if clusterID, channelID, vpnConnectionID, ok := parseFabricServiceChannelVPNPacketWebSocketPath(r.URL.Path); ok {
|
||||
decision, valid := s.validateFabricServiceChannelVPNRequest(w, r, clusterID, channelID, vpnConnectionID)
|
||||
if !valid {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelAccess(r, clusterID, channelID, vpnConnectionID, decision)
|
||||
s.preferVPNPacketIngressRoute(decision.PreferredRouteID)
|
||||
s.handleVPNPacketWebSocket(w, r, clusterID, channelID, vpnConnectionID, decision.ForceBackendFallback, decision.BackendFallbackAllowed(), decision.BackendRelayPolicy)
|
||||
return true
|
||||
}
|
||||
clusterID, channelID, vpnConnectionID, ok := parseFabricServiceChannelVPNPacketPath(r.URL.Path)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
decision, valid := s.validateFabricServiceChannelVPNRequest(w, r, clusterID, channelID, vpnConnectionID)
|
||||
if !valid {
|
||||
return true
|
||||
}
|
||||
w.Header().Set("X-RAP-Service-Channel-Accepted-By", decision.AcceptedBy)
|
||||
s.logFabricServiceChannelAccess(r, clusterID, channelID, vpnConnectionID, decision)
|
||||
s.preferVPNPacketIngressRoute(decision.PreferredRouteID)
|
||||
backendPath := "/api/v1/clusters/" + clusterID + "/vpn-connections/" + vpnConnectionID + "/tunnel/client/packets"
|
||||
return s.handleVPNPacketHTTP(w, r, clusterID, channelID, vpnConnectionID, backendPath, decision.ForceBackendFallback, decision.BackendFallbackAllowed(), decision.BackendRelayPolicy)
|
||||
}
|
||||
|
||||
func (s Server) preferVPNPacketIngressRoute(routeID string) {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if routeID == "" || s.VPNPacketIngress == nil {
|
||||
return
|
||||
}
|
||||
if preferred, ok := s.VPNPacketIngress.(VPNPacketIngressRoutePreference); ok {
|
||||
preferred.PreferClientRoute(routeID)
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketHTTP(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string, backendFallbackPath string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) bool {
|
||||
switch r.Method {
|
||||
case http.MethodPost:
|
||||
body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, MaxProductionVPNPacketPayloadBytes))
|
||||
if err != nil {
|
||||
http.Error(w, "invalid vpn packet payload", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
if r.URL.Query().Get("batch") != "true" && len(body) == 0 {
|
||||
http.Error(w, "empty vpn packet payload", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
packets := [][]byte{body}
|
||||
if r.URL.Query().Get("batch") == "true" {
|
||||
packets, err = decodeVPNIngressPacketBatch(body)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid vpn packet batch", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
http.Error(w, "empty vpn packet batch", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
if forceBackendFallback {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, body, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
http.Error(w, ErrRouteNotFound.Error(), vpnIngressStatusCode(ErrRouteNotFound))
|
||||
return true
|
||||
}
|
||||
trafficClass := inferVPNPacketTrafficClass(r.Header.Get("X-RAP-Traffic-Class"), packets)
|
||||
var sendErr error
|
||||
if classIngress, ok := s.VPNPacketIngress.(VPNPacketIngressTrafficClass); ok {
|
||||
sendErr = classIngress.SendClientPacketBatchWithTrafficClass(r.Context(), clusterID, vpnConnectionID, trafficClass, packets)
|
||||
} else {
|
||||
sendErr = s.VPNPacketIngress.SendClientPacketBatch(r.Context(), clusterID, vpnConnectionID, packets)
|
||||
}
|
||||
if sendErr != nil {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, body, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_send_failed_backend_fallback_blocked", sendErr.Error())
|
||||
http.Error(w, sendErr.Error(), vpnIngressStatusCode(sendErr))
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusAccepted)
|
||||
return true
|
||||
case http.MethodGet:
|
||||
if forceBackendFallback {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, nil, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
timeout := vpnIngressTimeout(r)
|
||||
packets, err := s.VPNPacketIngress.ReceiveClientPacketBatch(r.Context(), clusterID, vpnConnectionID, timeout)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), vpnIngressStatusCode(err))
|
||||
return true
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, nil, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
if r.URL.Query().Get("batch") == "true" {
|
||||
w.Header().Set("Content-Type", "application/vnd.rap.vpn-packet-batch.v1")
|
||||
_, _ = w.Write(encodeVPNIngressPacketBatch(packets))
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/octet-stream")
|
||||
_, _ = w.Write(packets[0])
|
||||
return true
|
||||
default:
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketWebSocket(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if s.VPNPacketIngress == nil {
|
||||
http.Error(w, ErrForwardRuntimeUnavailable.Error(), http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
upgrader := websocket.Upgrader{
|
||||
CheckOrigin: func(_ *http.Request) bool { return true },
|
||||
}
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
conn.SetReadLimit(MaxProductionVPNPacketPayloadBytes)
|
||||
|
||||
ctx, cancel := context.WithCancel(r.Context())
|
||||
defer cancel()
|
||||
trafficClass := r.Header.Get("X-RAP-Traffic-Class")
|
||||
errCh := make(chan error, 2)
|
||||
go func() {
|
||||
errCh <- s.readVPNPacketWebSocket(ctx, conn, clusterID, channelID, vpnConnectionID, trafficClass, forceBackendFallback, backendFallbackAllowed, backendRelayPolicy)
|
||||
}()
|
||||
go func() {
|
||||
errCh <- s.writeVPNPacketWebSocket(ctx, conn, clusterID, channelID, vpnConnectionID, forceBackendFallback, backendFallbackAllowed, backendRelayPolicy)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-errCh:
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) readVPNPacketWebSocket(ctx context.Context, conn *websocket.Conn, clusterID string, channelID string, vpnConnectionID string, trafficClass string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) error {
|
||||
for {
|
||||
messageType, payload, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if messageType != websocket.BinaryMessage {
|
||||
continue
|
||||
}
|
||||
packets, err := decodeVPNIngressPacketBatch(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
continue
|
||||
}
|
||||
if forceBackendFallback {
|
||||
if !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
if proxyErr := s.backendVPNPacketPost(ctx, clusterID, vpnConnectionID, payload); proxyErr != nil {
|
||||
return proxyErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
sendErr := s.sendVPNPacketWebSocketBatch(ctx, clusterID, vpnConnectionID, inferVPNPacketTrafficClass(trafficClass, packets), packets, !backendFallbackAllowed)
|
||||
if sendErr != nil {
|
||||
if !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_send_failed_backend_fallback_blocked", sendErr.Error())
|
||||
if isRetryableVPNPacketIngressError(sendErr) {
|
||||
continue
|
||||
}
|
||||
return sendErr
|
||||
}
|
||||
if proxyErr := s.backendVPNPacketPost(ctx, clusterID, vpnConnectionID, payload); proxyErr != nil {
|
||||
return sendErr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) sendVPNPacketWebSocketBatch(ctx context.Context, clusterID string, vpnConnectionID string, trafficClass string, packets [][]byte, retryRouteErrors bool) error {
|
||||
const maxAttempts = 6
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
var sendErr error
|
||||
if classIngress, ok := s.VPNPacketIngress.(VPNPacketIngressTrafficClass); ok {
|
||||
sendErr = classIngress.SendClientPacketBatchWithTrafficClass(ctx, clusterID, vpnConnectionID, trafficClass, packets)
|
||||
} else {
|
||||
sendErr = s.VPNPacketIngress.SendClientPacketBatch(ctx, clusterID, vpnConnectionID, packets)
|
||||
}
|
||||
if sendErr == nil {
|
||||
return nil
|
||||
}
|
||||
lastErr = sendErr
|
||||
if !retryRouteErrors || !isRetryableVPNPacketIngressError(sendErr) {
|
||||
return sendErr
|
||||
}
|
||||
timer := time.NewTimer(time.Duration(75+attempt*50) * time.Millisecond)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func isRetryableVPNPacketIngressError(err error) bool {
|
||||
return errors.Is(err, ErrRouteNotFound) ||
|
||||
errors.Is(err, ErrForwardRuntimeUnavailable) ||
|
||||
errors.Is(err, ErrForwardPeerUnavailable) ||
|
||||
errors.Is(err, ErrSyntheticPeerUnavailable)
|
||||
}
|
||||
|
||||
func (s Server) receiveVPNPacketWebSocketBatch(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration, retryRouteErrors bool) ([][]byte, error) {
|
||||
const maxAttempts = 4
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
packets, err := s.VPNPacketIngress.ReceiveClientPacketBatch(ctx, clusterID, vpnConnectionID, timeout)
|
||||
if err == nil {
|
||||
return packets, nil
|
||||
}
|
||||
lastErr = err
|
||||
if !retryRouteErrors || !isRetryableVPNPacketIngressError(err) {
|
||||
return nil, err
|
||||
}
|
||||
timer := time.NewTimer(time.Duration(75+attempt*50) * time.Millisecond)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
if retryRouteErrors && isRetryableVPNPacketIngressError(lastErr) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func (s Server) writeVPNPacketWebSocket(ctx context.Context, conn *websocket.Conn, clusterID string, channelID string, vpnConnectionID string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) error {
|
||||
lastPing := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
var packets [][]byte
|
||||
var err error
|
||||
if !forceBackendFallback {
|
||||
packets, err = s.receiveVPNPacketWebSocketBatch(ctx, clusterID, vpnConnectionID, 50*time.Millisecond, !backendFallbackAllowed)
|
||||
}
|
||||
if forceBackendFallback && !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
if err != nil && !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_receive_failed_backend_fallback_blocked", err.Error())
|
||||
return err
|
||||
}
|
||||
if backendFallbackAllowed && (forceBackendFallback || err != nil || len(packets) == 0) {
|
||||
backendPackets, proxyErr := s.backendVPNPacketGet(ctx, clusterID, vpnConnectionID, 50*time.Millisecond)
|
||||
if proxyErr != nil && err != nil {
|
||||
return err
|
||||
}
|
||||
if len(backendPackets) > 0 {
|
||||
packets = backendPackets
|
||||
}
|
||||
}
|
||||
if len(packets) > 0 {
|
||||
if err := conn.SetWriteDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := conn.WriteMessage(websocket.BinaryMessage, encodeVPNIngressPacketBatch(packets)); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if time.Since(lastPing) >= 15*time.Second {
|
||||
if err := conn.SetWriteDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := conn.WriteMessage(websocket.PingMessage, []byte("rap-vpn")); err != nil {
|
||||
return err
|
||||
}
|
||||
lastPing = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) backendVPNPacketPost(ctx context.Context, clusterID string, vpnConnectionID string, batchPayload []byte) error {
|
||||
target := strings.TrimRight(strings.TrimSpace(s.BackendProxyBaseURL), "/")
|
||||
if target == "" {
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, target+"/clusters/"+clusterID+"/vpn-connections/"+vpnConnectionID+"/tunnel/client/packets?batch=true", bytes.NewReader(batchPayload))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/octet-stream")
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("backend vpn packet post failed: status=%d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s Server) backendVPNPacketGet(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration) ([][]byte, error) {
|
||||
target := strings.TrimRight(strings.TrimSpace(s.BackendProxyBaseURL), "/")
|
||||
if target == "" {
|
||||
return nil, ErrRouteNotFound
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 50 * time.Millisecond
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target+"/clusters/"+clusterID+"/vpn-connections/"+vpnConnectionID+"/tunnel/client/packets?batch=true&timeout_ms="+strconv.FormatInt(timeout.Milliseconds(), 10), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Accept", "application/vnd.rap.vpn-packet-batch.v1")
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNoContent {
|
||||
return nil, nil
|
||||
}
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("backend vpn packet get failed: status=%d", resp.StatusCode)
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, MaxProductionVPNPacketPayloadBytes))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
return decodeVPNIngressPacketBatch(body)
|
||||
}
|
||||
|
||||
func (s Server) proxyVPNPacketIngressToBackend(w http.ResponseWriter, r *http.Request, body []byte) bool {
|
||||
return s.proxyVPNPacketIngressToBackendPath(w, r, body, "")
|
||||
}
|
||||
|
||||
func (s Server) proxyVPNPacketIngressToBackendPath(w http.ResponseWriter, r *http.Request, body []byte, backendPath string) bool {
|
||||
if strings.TrimSpace(s.BackendProxyBaseURL) == "" {
|
||||
return false
|
||||
}
|
||||
target, err := url.Parse(s.BackendProxyBaseURL)
|
||||
if err != nil || target.Scheme == "" || target.Host == "" {
|
||||
return false
|
||||
}
|
||||
if strings.EqualFold(target.Host, r.Host) {
|
||||
return false
|
||||
}
|
||||
var reader io.Reader
|
||||
if body != nil {
|
||||
reader = bytes.NewReader(body)
|
||||
}
|
||||
requestURI := r.URL.RequestURI()
|
||||
if backendPath != "" {
|
||||
requestURI = backendPath
|
||||
if r.URL.RawQuery != "" {
|
||||
requestURI += "?" + r.URL.RawQuery
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(r.Context(), r.Method, target.Scheme+"://"+target.Host+requestURI, reader)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, key := range []string{"Accept", "Content-Type"} {
|
||||
if value := r.Header.Get(key); value != "" {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
for _, key := range []string{"Content-Type"} {
|
||||
if value := resp.Header.Get(key); value != "" {
|
||||
w.Header().Set(key, value)
|
||||
}
|
||||
}
|
||||
w.WriteHeader(resp.StatusCode)
|
||||
_, _ = io.Copy(w, resp.Body)
|
||||
return true
|
||||
}
|
||||
|
||||
type fabricServiceChannelLeaseAuthorityPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
@@ -1443,10 +841,6 @@ func (d fabricServiceChannelRequestDecision) BackendFallbackAllowed() bool {
|
||||
return strings.TrimSpace(d.BackendRelayPolicy) != "disabled"
|
||||
}
|
||||
|
||||
func (s Server) validateFabricServiceChannelVPNRequest(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string) (fabricServiceChannelRequestDecision, bool) {
|
||||
return s.validateFabricServiceChannelRequest(w, r, clusterID, channelID, vpnConnectionID, FabricServiceClassVPNPackets, ProductionChannelVPNPacket)
|
||||
}
|
||||
|
||||
func (s Server) validateFabricServiceChannelRequest(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, resourceID string, expectedServiceClass string, defaultChannelClass string) (fabricServiceChannelRequestDecision, bool) {
|
||||
var decision fabricServiceChannelRequestDecision
|
||||
expectedServiceClass = strings.TrimSpace(strings.ToLower(expectedServiceClass))
|
||||
@@ -1485,7 +879,7 @@ func (s Server) validateFabricServiceChannelRequest(w http.ResponseWriter, r *ht
|
||||
http.Error(w, err.Error(), http.StatusForbidden)
|
||||
return decision, false
|
||||
}
|
||||
decision.AcceptedBy = "legacy_unsigned"
|
||||
decision.AcceptedBy = "token_authorized"
|
||||
decision.ServiceClass = serviceClass
|
||||
decision.ChannelClass = channelClass
|
||||
if payload != nil && (payload.Status == "degraded_fallback" || payload.PrimaryRoute.Status == "missing_route_intent") {
|
||||
@@ -1571,30 +965,6 @@ func (s Server) logFabricServiceChannelAccess(r *http.Request, clusterID string,
|
||||
s.FabricServiceChannelLogger(entry)
|
||||
}
|
||||
|
||||
func (s Server) logFabricServiceChannelViolation(r *http.Request, clusterID string, channelID string, resourceID string, backendRelayPolicy string, status string, reason string) {
|
||||
if s.FabricServiceChannelLogger == nil || strings.TrimSpace(channelID) == "" {
|
||||
return
|
||||
}
|
||||
entry := FabricServiceChannelAccessLogEntry{
|
||||
Event: "fabric_service_channel_data_plane_violation",
|
||||
ClusterID: clusterID,
|
||||
ChannelID: channelID,
|
||||
ResourceID: resourceID,
|
||||
LocalNodeID: s.Local.NodeID,
|
||||
BackendRelayPolicy: strings.TrimSpace(backendRelayPolicy),
|
||||
ViolationStatus: strings.TrimSpace(status),
|
||||
ViolationReason: strings.TrimSpace(reason),
|
||||
OccurredAt: time.Now().UTC(),
|
||||
}
|
||||
if r != nil {
|
||||
entry.Method = r.Method
|
||||
if r.URL != nil {
|
||||
entry.Path = r.URL.Path
|
||||
}
|
||||
}
|
||||
s.FabricServiceChannelLogger(entry)
|
||||
}
|
||||
|
||||
func (s Server) verifyFabricServiceChannelLeaseAuthority(r *http.Request, clusterID string, channelID string, resourceID string, serviceClass string, channelClass string, token string) (*fabricServiceChannelLeaseAuthorityPayload, error) {
|
||||
publicKey := strings.TrimSpace(s.ClusterAuthorityPublicKey)
|
||||
payloadHeader := strings.TrimSpace(r.Header.Get("X-RAP-Service-Channel-Authority-Payload"))
|
||||
@@ -1657,15 +1027,15 @@ func validateFabricServiceChannelDataPlaneContract(contract fabricServiceChannel
|
||||
}
|
||||
requiredFlowClass = strings.TrimSpace(strings.ToLower(requiredFlowClass))
|
||||
if contract.SchemaVersion != "rap.fabric_service_channel_data_plane.v1" ||
|
||||
contract.WorkingDataTransport != "fabric_service_channel" ||
|
||||
contract.WorkingDataTransport != "fabric_quic_route" ||
|
||||
contract.SteadyStateTransport != "fabric_route" ||
|
||||
(contract.BackendRelayPolicy != "degraded_fallback_only" && contract.BackendRelayPolicy != "disabled") ||
|
||||
contract.BackendRelayPolicy != "disabled" ||
|
||||
!contract.ServiceNeutral ||
|
||||
!contract.ProtocolAgnostic ||
|
||||
contract.LogicalFlowMode != "multi_flow_isolated" {
|
||||
return fmt.Errorf("%w: unsupported service channel data-plane contract", ErrUnauthorizedChannel)
|
||||
}
|
||||
if contract.Mode != "" && contract.Mode != "fabric_primary" && contract.Mode != "degraded_backend_fallback" {
|
||||
if contract.Mode != "" && contract.Mode != "fabric_primary" && contract.Mode != "fabric_quic_only" {
|
||||
return fmt.Errorf("%w: unsupported service channel data-plane mode", ErrUnauthorizedChannel)
|
||||
}
|
||||
if requiredFlowClass != "" && len(contract.RequiredFlowIsolationClasses) > 0 && !containsString(contract.RequiredFlowIsolationClasses, requiredFlowClass) {
|
||||
@@ -1796,29 +1166,6 @@ func fabricServiceChannelBearerToken(r *http.Request) string {
|
||||
return strings.TrimSpace(r.URL.Query().Get("service_channel_token"))
|
||||
}
|
||||
|
||||
func fabricSessionTokenHash(token string) string {
|
||||
sum := sha256.Sum256([]byte(strings.TrimSpace(token)))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func fabricSessionBearerToken(r *http.Request) string {
|
||||
if r == nil {
|
||||
return ""
|
||||
}
|
||||
if token := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Token")); token != "" {
|
||||
return token
|
||||
}
|
||||
auth := strings.TrimSpace(r.Header.Get("Authorization"))
|
||||
if len(auth) > len("Bearer ") && strings.EqualFold(auth[:len("Bearer ")], "Bearer ") {
|
||||
return strings.TrimSpace(auth[len("Bearer "):])
|
||||
}
|
||||
return strings.TrimSpace(r.URL.Query().Get("fabric_session_token"))
|
||||
}
|
||||
|
||||
func isAllowedFabricServiceVPNChannel(channel string) bool {
|
||||
return isAllowedFabricServiceChannelForClass(FabricServiceClassVPNPackets, channel)
|
||||
}
|
||||
|
||||
func isAllowedFabricServiceChannelForClass(serviceClass string, channel string) bool {
|
||||
serviceClass = strings.TrimSpace(strings.ToLower(serviceClass))
|
||||
channel = strings.TrimSpace(strings.ToLower(channel))
|
||||
@@ -1846,25 +1193,6 @@ func containsString(values []string, target string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelVPNPacketWebSocketPath(path string) (string, string, string, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 11 ||
|
||||
parts[0] != "api" ||
|
||||
parts[1] != "v1" ||
|
||||
parts[2] != "clusters" ||
|
||||
parts[4] != "fabric" ||
|
||||
parts[5] != "service-channels" ||
|
||||
parts[7] != "vpn-connections" ||
|
||||
parts[9] != "packets" ||
|
||||
parts[10] != "ws" {
|
||||
return "", "", "", false
|
||||
}
|
||||
if parts[3] == "" || parts[6] == "" || parts[8] == "" {
|
||||
return "", "", "", false
|
||||
}
|
||||
return parts[3], parts[6], parts[8], true
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelRemoteWorkspacePath(path string) (string, string, string, string, bool, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) == 11 &&
|
||||
@@ -1897,6 +1225,34 @@ func parseFabricServiceChannelRemoteWorkspacePath(path string) (string, string,
|
||||
return parts[3], parts[6], parts[8], strings.TrimSpace(strings.ToLower(parts[10])), false, true
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if isFabricServiceChannelVPNPacketWebSocketPath(r.URL.Path) {
|
||||
http.Error(w, "fabric service-channel WebSocket dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
if _, _, _, ok := parseFabricServiceChannelVPNPacketPath(r.URL.Path); !ok {
|
||||
return false
|
||||
}
|
||||
http.Error(w, "fabric service-channel HTTP dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
|
||||
func isFabricServiceChannelVPNPacketWebSocketPath(path string) bool {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 11 ||
|
||||
parts[0] != "api" ||
|
||||
parts[1] != "v1" ||
|
||||
parts[2] != "clusters" ||
|
||||
parts[4] != "fabric" ||
|
||||
parts[5] != "service-channels" ||
|
||||
parts[7] != "vpn-connections" ||
|
||||
parts[9] != "packets" ||
|
||||
parts[10] != "ws" {
|
||||
return false
|
||||
}
|
||||
return parts[3] != "" && parts[6] != "" && parts[8] != ""
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelVPNPacketPath(path string) (string, string, string, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 10 ||
|
||||
@@ -1915,7 +1271,7 @@ func parseFabricServiceChannelVPNPacketPath(path string) (string, string, string
|
||||
return parts[3], parts[6], parts[8], true
|
||||
}
|
||||
|
||||
func parseVPNClientPacketWebSocketPath(path string) (string, string, bool) {
|
||||
func isVPNClientPacketWebSocketPath(path string) bool {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 10 ||
|
||||
parts[0] != "api" ||
|
||||
@@ -1926,12 +1282,9 @@ func parseVPNClientPacketWebSocketPath(path string) (string, string, bool) {
|
||||
parts[7] != "client" ||
|
||||
parts[8] != "packets" ||
|
||||
parts[9] != "ws" {
|
||||
return "", "", false
|
||||
return false
|
||||
}
|
||||
if parts[3] == "" || parts[5] == "" {
|
||||
return "", "", false
|
||||
}
|
||||
return parts[3], parts[5], true
|
||||
return parts[3] != "" && parts[5] != ""
|
||||
}
|
||||
|
||||
func parseVPNClientPacketPath(path string) (string, string, bool) {
|
||||
@@ -1952,28 +1305,6 @@ func parseVPNClientPacketPath(path string) (string, string, bool) {
|
||||
return parts[3], parts[5], true
|
||||
}
|
||||
|
||||
func vpnIngressTimeout(r *http.Request) time.Duration {
|
||||
timeoutMs, _ := strconv.Atoi(r.URL.Query().Get("timeout_ms"))
|
||||
if timeoutMs <= 0 {
|
||||
timeoutMs = 25000
|
||||
}
|
||||
if timeoutMs > 30000 {
|
||||
timeoutMs = 30000
|
||||
}
|
||||
return time.Duration(timeoutMs) * time.Millisecond
|
||||
}
|
||||
|
||||
func vpnIngressStatusCode(err error) int {
|
||||
switch err {
|
||||
case ErrForwardRuntimeUnavailable, ErrRouteNotFound, ErrForwardPeerUnavailable:
|
||||
return http.StatusServiceUnavailable
|
||||
case ErrUnauthorizedChannel, ErrClusterMismatch, ErrNodeMismatch:
|
||||
return http.StatusForbidden
|
||||
default:
|
||||
return http.StatusBadGateway
|
||||
}
|
||||
}
|
||||
|
||||
func encodeVPNIngressPacketBatch(packets [][]byte) []byte {
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
total := 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user