3
This commit is contained in:
@@ -140,15 +140,12 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
return smokeReport{}, fmt.Errorf("test service: %w", err)
|
||||
}
|
||||
fabricSessionStartedAt := time.Now()
|
||||
fabricSession, _, err := mesh.NewClient(nodeB.URL).OpenFabricSession(ctx, mesh.FabricSessionDialOptions{
|
||||
Token: "rap_fsn_mesh_live_smoke",
|
||||
Timeout: 3 * time.Second,
|
||||
})
|
||||
fabricSession, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
||||
if err != nil {
|
||||
return smokeReport{}, fmt.Errorf("fabric session open: %w", err)
|
||||
return smokeReport{}, fmt.Errorf("fabric quic session open: %w", err)
|
||||
}
|
||||
defer fabricSession.Close()
|
||||
firstFabricSessionResponse, err := fabricSession.RoundTrip(ctx, fabricproto.Frame{
|
||||
firstFabricSessionResponse, err := smokeFabricSessionRoundTrip(ctx, fabricSession, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: uint64(fabricSessionStartedAt.UnixNano()),
|
||||
Payload: []byte("mesh-live-smoke-fabric-session"),
|
||||
@@ -156,7 +153,7 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
if err != nil {
|
||||
return smokeReport{}, fmt.Errorf("fabric session first round trip: %w", err)
|
||||
}
|
||||
secondFabricSessionResponse, err := fabricSession.RoundTrip(ctx, fabricproto.Frame{
|
||||
secondFabricSessionResponse, err := smokeFabricSessionRoundTrip(ctx, fabricSession, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: uint64(fabricSessionStartedAt.UnixNano()) + 1,
|
||||
Payload: []byte("mesh-live-smoke-fabric-session-2"),
|
||||
@@ -175,13 +172,9 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
}
|
||||
fabricVPNBulkPressure, fabricVPNBulkChannels, fabricVPNInteractiveChannels, fabricVPNBulkWindow, fabricVPNInteractiveWindow, fabricVPNPressureLevel, fabricVPNPressureScore, fabricVPNPressureReasons, fabricVPNPressureAction := smokeVPNFlowSchedulerBulkPressure()
|
||||
fabricVPNRouteRecovered, fabricVPNRouteSwitches, fabricVPNRecoveryMS, fabricVPNRecoveryMaxMS, fabricVPNRecoveryAvgMS, fabricVPNRecoveryReason := smokeVPNFlowSchedulerRouteRecovery()
|
||||
fabricQUICAccepted, fabricQUICEndpoint, fabricQUICPressure, err := smokeQUICFabricSession(ctx)
|
||||
if err != nil {
|
||||
return smokeReport{}, fmt.Errorf("fabric quic smoke: %w", err)
|
||||
}
|
||||
|
||||
return smokeReport{
|
||||
Stage: "C17F scoped synthetic config plus live HTTP transport",
|
||||
Stage: "C17F scoped synthetic config plus live QUIC fabric transport",
|
||||
ProductionForwarding: false,
|
||||
ScopedConfigLoaded: nodeAConfig.ConfigVersion == "smoke-config-v1",
|
||||
DirectProbeAccepted: directAck.MessageType == mesh.SyntheticMessageProbeAck,
|
||||
@@ -210,11 +203,11 @@ func run(ctx context.Context) (smokeReport, error) {
|
||||
FabricVPNRecoveryMaxMS: fabricVPNRecoveryMaxMS,
|
||||
FabricVPNRecoveryAvgMS: fabricVPNRecoveryAvgMS,
|
||||
FabricVPNRecoveryReason: fabricVPNRecoveryReason,
|
||||
FabricQUICAccepted: fabricQUICAccepted,
|
||||
FabricQUICAccepted: fabricSessionAccepted,
|
||||
FabricQUICEndpoint: fabricQUICEndpoint,
|
||||
FabricQUICPressure: fabricQUICPressure,
|
||||
FabricSessionLatencyMS: fabricSessionLatency.Milliseconds(),
|
||||
FabricSessionEndpoint: nodeB.URL + "/mesh/v1/fabric/session/ws",
|
||||
FabricSessionEndpoint: "quic://" + fabricQUICEndpoint,
|
||||
PeerEndpoints: map[string]any{
|
||||
"node-a": nodeA.URL,
|
||||
"node-r": nodeR.URL,
|
||||
@@ -269,18 +262,16 @@ func smokeVPNFlowSchedulerRouteRecovery() (bool, uint64, int64, int64, int64, st
|
||||
stat.LastRouteSwitchReason
|
||||
}
|
||||
|
||||
func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
||||
func smokeQUICFabricSession(ctx context.Context) (mesh.FabricTransportSession, string, int, error) {
|
||||
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: smokeQUICTLSConfig(),
|
||||
})
|
||||
if err != nil {
|
||||
return false, "", 0, err
|
||||
return nil, "", 0, err
|
||||
}
|
||||
defer server.Close()
|
||||
endpoint := server.Addr().String()
|
||||
transport := mesh.NewQUICFabricTransport(nil)
|
||||
defer transport.Close()
|
||||
session, err := transport.Connect(ctx, mesh.FabricTransportTarget{
|
||||
PeerID: "node-b",
|
||||
Endpoint: endpoint,
|
||||
@@ -293,31 +284,12 @@ func smokeQUICFabricSession(ctx context.Context) (bool, string, int, error) {
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
if err != nil {
|
||||
return false, endpoint, 0, err
|
||||
}
|
||||
defer session.Close()
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: uint64(time.Now().UnixNano()),
|
||||
Payload: []byte("mesh-live-smoke-quic"),
|
||||
}); err != nil {
|
||||
return false, endpoint, 0, err
|
||||
}
|
||||
timer := time.NewTimer(3 * time.Second)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
select {
|
||||
case frame := <-session.Frames():
|
||||
snapshot := transport.Snapshot()
|
||||
return frame.Type == fabricproto.FramePong && string(frame.Payload) == "mesh-live-smoke-quic", endpoint, snapshot.CapacityPressurePercent, nil
|
||||
case err := <-session.Errors():
|
||||
return false, endpoint, 0, err
|
||||
case <-timer.C:
|
||||
return false, endpoint, 0, fmt.Errorf("timed out waiting for quic pong")
|
||||
case <-ctx.Done():
|
||||
return false, endpoint, 0, ctx.Err()
|
||||
}
|
||||
_ = transport.Close()
|
||||
_ = server.Close()
|
||||
return nil, endpoint, 0, err
|
||||
}
|
||||
snapshot := transport.Snapshot()
|
||||
return &smokeManagedFabricSession{session: session, transport: transport, server: server}, endpoint, snapshot.CapacityPressurePercent, nil
|
||||
}
|
||||
|
||||
func smokeQUICTLSConfig() *tls.Config {
|
||||
@@ -341,25 +313,20 @@ func smokeQUICTLSConfig() *tls.Config {
|
||||
}
|
||||
}
|
||||
|
||||
func smokeFabricVPNPacketOverSession(ctx context.Context, fabricSession *mesh.FabricSessionClient) (bool, bool, int, error) {
|
||||
func smokeFabricVPNPacketOverSession(ctx context.Context, fabricSession mesh.FabricTransportSession) (bool, bool, int, error) {
|
||||
const interactiveStreamID uint64 = 4400
|
||||
const bulkStreamID uint64 = 4401
|
||||
pump := fabricSession.StartPump(ctx, mesh.FabricSessionPumpOptions{
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
defer pump.Close()
|
||||
for _, frame := range []fabricproto.Frame{
|
||||
{Type: fabricproto.FrameOpenStream, StreamID: interactiveStreamID, TrafficClass: fabricproto.TrafficClassInteractive},
|
||||
{Type: fabricproto.FrameOpenStream, StreamID: bulkStreamID, TrafficClass: fabricproto.TrafficClassBulk},
|
||||
} {
|
||||
if err := pump.Send(ctx, frame); err != nil {
|
||||
if err := fabricSession.Send(ctx, frame); err != nil {
|
||||
return false, false, 0, err
|
||||
}
|
||||
}
|
||||
transport := &vpnruntime.FabricSessionPacketTransport{
|
||||
Sender: pump,
|
||||
Sender: fabricSession,
|
||||
Receiver: fabricSession,
|
||||
StreamID: interactiveStreamID,
|
||||
VPNConnectionID: "vpn-smoke",
|
||||
SendDirection: vpnruntime.FabricDirectionGatewayToClient,
|
||||
@@ -378,7 +345,7 @@ func smokeFabricVPNPacketOverSession(ctx context.Context, fabricSession *mesh.Fa
|
||||
acked := map[uint64]bool{}
|
||||
for {
|
||||
select {
|
||||
case frame := <-pump.Frames():
|
||||
case frame := <-fabricSession.Frames():
|
||||
if frame.Type == fabricproto.FrameAck && frame.Sequence == 1 {
|
||||
acked[frame.StreamID] = true
|
||||
if acked[interactiveStreamID] && acked[bulkStreamID] {
|
||||
@@ -393,7 +360,7 @@ func smokeFabricVPNPacketOverSession(ctx context.Context, fabricSession *mesh.Fa
|
||||
return true, sharded, int(fanout), nil
|
||||
}
|
||||
}
|
||||
case err := <-pump.Errors():
|
||||
case err := <-fabricSession.Errors():
|
||||
return false, false, 0, err
|
||||
case <-timer.C:
|
||||
return false, false, 0, fmt.Errorf("timed out waiting for fabric vpn packet ack")
|
||||
@@ -403,6 +370,68 @@ func smokeFabricVPNPacketOverSession(ctx context.Context, fabricSession *mesh.Fa
|
||||
}
|
||||
}
|
||||
|
||||
type smokeManagedFabricSession struct {
|
||||
session mesh.FabricTransportSession
|
||||
transport *mesh.QUICFabricTransport
|
||||
server *mesh.QUICFabricServer
|
||||
}
|
||||
|
||||
func (s *smokeManagedFabricSession) Send(ctx context.Context, frame fabricproto.Frame) error {
|
||||
return s.session.Send(ctx, frame)
|
||||
}
|
||||
|
||||
func (s *smokeManagedFabricSession) Frames() <-chan fabricproto.Frame {
|
||||
return s.session.Frames()
|
||||
}
|
||||
|
||||
func (s *smokeManagedFabricSession) Errors() <-chan error {
|
||||
return s.session.Errors()
|
||||
}
|
||||
|
||||
func (s *smokeManagedFabricSession) Closed() bool {
|
||||
return s.session.Closed()
|
||||
}
|
||||
|
||||
func (s *smokeManagedFabricSession) Close() error {
|
||||
var firstErr error
|
||||
if s.session != nil {
|
||||
firstErr = s.session.Close()
|
||||
}
|
||||
if s.transport != nil {
|
||||
if err := s.transport.Close(); firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
if s.server != nil {
|
||||
if err := s.server.Close(); firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
return firstErr
|
||||
}
|
||||
|
||||
func smokeFabricSessionRoundTrip(ctx context.Context, session mesh.FabricTransportSession, frame fabricproto.Frame) (fabricproto.Frame, error) {
|
||||
if err := session.Send(ctx, frame); err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
timer := time.NewTimer(3 * time.Second)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
select {
|
||||
case response := <-session.Frames():
|
||||
if response.Sequence == frame.Sequence {
|
||||
return response, nil
|
||||
}
|
||||
case err := <-session.Errors():
|
||||
return fabricproto.Frame{}, err
|
||||
case <-timer.C:
|
||||
return fabricproto.Frame{}, fmt.Errorf("timed out waiting for fabric session response")
|
||||
case <-ctx.Done():
|
||||
return fabricproto.Frame{}, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func smokeIPv4TCPPacket(src [4]byte, dst [4]byte, srcPort uint16, dstPort uint16, flags byte) []byte {
|
||||
packet := make([]byte, 40)
|
||||
packet[0] = 0x45
|
||||
@@ -445,7 +474,7 @@ func writeSmokeScopedConfig(local mesh.PeerIdentity, peers map[string]string, ro
|
||||
func newSmokeNode(local mesh.PeerIdentity) *smokeNode {
|
||||
node := &smokeNode{Local: local}
|
||||
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime, FabricSessionEnabled: true, FabricSessionWebSocketEnabled: true}.Handler().ServeHTTP(w, r)
|
||||
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime}.Handler().ServeHTTP(w, r)
|
||||
}))
|
||||
node.URL = node.server.URL
|
||||
return node
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
@@ -15,9 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/agent"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/hostagent"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
)
|
||||
|
||||
type installCommandConfig struct {
|
||||
@@ -82,10 +79,6 @@ func main() {
|
||||
if err := runUpdateHostAgentLoop(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("update-host-agent-loop failed: %v", err)
|
||||
}
|
||||
case "fabric-session-smoke":
|
||||
if err := runFabricSessionSmoke(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("fabric-session-smoke failed: %v", err)
|
||||
}
|
||||
default:
|
||||
usage()
|
||||
os.Exit(2)
|
||||
@@ -117,78 +110,6 @@ func applyStagedSelfUpdate() {
|
||||
_ = os.Remove(backup)
|
||||
}
|
||||
|
||||
func runFabricSessionSmoke(ctx context.Context, args []string) error {
|
||||
fs := flag.NewFlagSet("fabric-session-smoke", flag.ContinueOnError)
|
||||
var meshURL string
|
||||
var token string
|
||||
var timeoutSeconds int
|
||||
var payload string
|
||||
var authorityPayload string
|
||||
var authoritySignature string
|
||||
fs.StringVar(&meshURL, "mesh-url", getenv("RAP_MESH_SMOKE_URL", ""), "Mesh base URL, for example http://node:19131.")
|
||||
fs.StringVar(&token, "token", getenv("RAP_FABRIC_SESSION_TOKEN", ""), "Fabric session token starting with rap_fsn_.")
|
||||
fs.IntVar(&timeoutSeconds, "timeout-seconds", getenvInt("RAP_FABRIC_SESSION_SMOKE_TIMEOUT_SECONDS", 5), "Smoke timeout in seconds.")
|
||||
fs.StringVar(&payload, "payload", getenv("RAP_FABRIC_SESSION_SMOKE_PAYLOAD", "rap-fabric-session-smoke"), "Ping payload.")
|
||||
fs.StringVar(&authorityPayload, "authority-payload", getenv("RAP_FABRIC_SESSION_AUTHORITY_PAYLOAD", ""), "Base64 or JSON fabric session authority payload header.")
|
||||
fs.StringVar(&authoritySignature, "authority-signature", getenv("RAP_FABRIC_SESSION_AUTHORITY_SIGNATURE", ""), "Base64 or JSON fabric session authority signature header.")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(meshURL) == "" {
|
||||
return fmt.Errorf("mesh-url is required")
|
||||
}
|
||||
if strings.TrimSpace(token) == "" {
|
||||
return fmt.Errorf("token is required")
|
||||
}
|
||||
if timeoutSeconds <= 0 {
|
||||
timeoutSeconds = 5
|
||||
}
|
||||
smokeCtx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds)*time.Second)
|
||||
defer cancel()
|
||||
header := make(http.Header)
|
||||
if strings.TrimSpace(authorityPayload) != "" {
|
||||
header.Set("X-RAP-Fabric-Session-Authority-Payload", strings.TrimSpace(authorityPayload))
|
||||
}
|
||||
if strings.TrimSpace(authoritySignature) != "" {
|
||||
header.Set("X-RAP-Fabric-Session-Authority-Signature", strings.TrimSpace(authoritySignature))
|
||||
}
|
||||
startedAt := time.Now()
|
||||
response, err := mesh.NewClient(meshURL).SendFabricSessionFrame(smokeCtx, mesh.FabricSessionDialOptions{
|
||||
Token: token,
|
||||
Header: header,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
}, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: uint64(startedAt.UnixNano()),
|
||||
Payload: []byte(payload),
|
||||
})
|
||||
duration := time.Since(startedAt)
|
||||
result := map[string]any{
|
||||
"schema_version": "rap.fabric_session_smoke_result.v1",
|
||||
"mesh_url": strings.TrimSpace(meshURL),
|
||||
"ok": err == nil && response.Type == fabricproto.FramePong && string(response.Payload) == payload,
|
||||
"latency_ms": duration.Milliseconds(),
|
||||
"response_type": response.Type,
|
||||
"sequence": response.Sequence,
|
||||
"authority": strings.TrimSpace(authorityPayload) != "" || strings.TrimSpace(authoritySignature) != "",
|
||||
}
|
||||
if err != nil {
|
||||
result["error"] = err.Error()
|
||||
}
|
||||
encoded, marshalErr := json.MarshalIndent(result, "", " ")
|
||||
if marshalErr != nil {
|
||||
return marshalErr
|
||||
}
|
||||
fmt.Println(string(encoded))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if response.Type != fabricproto.FramePong || string(response.Payload) != payload {
|
||||
return fmt.Errorf("fabric session smoke returned unexpected response type=%d payload=%q", response.Type, string(response.Payload))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func runInstallLinux(ctx context.Context, args []string) error {
|
||||
fs := flag.NewFlagSet("install-linux", flag.ContinueOnError)
|
||||
cfg := hostagent.LinuxInstallConfig{}
|
||||
@@ -215,16 +136,15 @@ func runInstallLinux(ctx context.Context, args []string) error {
|
||||
fs.IntVar(&cfg.AutoUpdateHealthTimeoutSeconds, "auto-update-health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated service health timeout in seconds.")
|
||||
fs.StringVar(&cfg.HostAgentSourcePath, "host-agent-source-path", getenv("RAP_HOST_AGENT_SOURCE_PATH", ""), "Source rap-host-agent path copied to the persistent updater location.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable historical synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricSessionStreamShards, "vpn-fabric-session-stream-shards", getenvInt("RAP_VPN_FABRIC_SESSION_STREAM_SHARDS", 4), "VPN fabric-session stream shards per traffic class.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricQUICMaxStreamsPerConn, "vpn-fabric-quic-max-streams-per-conn", getenvInt("RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN", 64), "Maximum logical fabric-session streams per cached VPN QUIC carrier connection.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricQUICIdleTTLSeconds, "vpn-fabric-quic-idle-ttl-seconds", getenvInt("RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS", 300), "Idle TTL seconds for cached VPN QUIC carrier connections.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ":19131"), "Synthetic mesh HTTP listen address.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ""), "Historical synthetic mesh HTTP listen address.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenPortMode, "mesh-listen-port-mode", getenv("RAP_MESH_LISTEN_PORT_MODE", "auto"), "Mesh listen port behavior: manual, auto, or disabled.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortStart, "mesh-listen-auto-port-start", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_START", 19131), "First port used when mesh listen port mode is auto.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||
@@ -303,16 +223,15 @@ func runInstallWindows(ctx context.Context, args []string) error {
|
||||
fs.IntVar(&cfg.AutoUpdateHealthTimeoutSeconds, "auto-update-health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated service health timeout in seconds.")
|
||||
fs.StringVar(&cfg.HostAgentSourcePath, "host-agent-source-path", getenv("RAP_HOST_AGENT_SOURCE_PATH", ""), "Source rap-host-agent.exe path copied to the persistent updater location.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable historical synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricSessionStreamShards, "vpn-fabric-session-stream-shards", getenvInt("RAP_VPN_FABRIC_SESSION_STREAM_SHARDS", 4), "VPN fabric-session stream shards per traffic class.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricQUICMaxStreamsPerConn, "vpn-fabric-quic-max-streams-per-conn", getenvInt("RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN", 64), "Maximum logical fabric-session streams per cached VPN QUIC carrier connection.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.VPNFabricQUICIdleTTLSeconds, "vpn-fabric-quic-idle-ttl-seconds", getenvInt("RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS", 300), "Idle TTL seconds for cached VPN QUIC carrier connections.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ":19131"), "Synthetic mesh HTTP listen address.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ""), "Historical synthetic mesh HTTP listen address.")
|
||||
fs.StringVar(&cfg.RuntimeConfig.MeshListenPortMode, "mesh-listen-port-mode", getenv("RAP_MESH_LISTEN_PORT_MODE", "auto"), "Mesh listen port behavior: manual, auto, or disabled.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortStart, "mesh-listen-auto-port-start", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_START", 19131), "First port used when mesh listen port mode is auto.")
|
||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||
@@ -513,16 +432,19 @@ func runUpdateLoop(ctx context.Context, args []string) error {
|
||||
}
|
||||
cfg.HostAgentUpdateEnabled = hostAgentStatusEnabled
|
||||
cfg.HostAgentUpdateRequest = hostagent.HostAgentUpdateRequest{
|
||||
BackendURL: req.BackendURL,
|
||||
ClusterID: req.ClusterID,
|
||||
NodeID: req.NodeID,
|
||||
StateDir: req.StateDir,
|
||||
CurrentVersion: hostAgentVersion,
|
||||
Channel: req.Channel,
|
||||
OS: firstNonEmptyLocal(req.OS, runtime.GOOS),
|
||||
Arch: firstNonEmptyLocal(req.Arch, runtime.GOARCH),
|
||||
InstallType: hostagent.BinaryUpdateInstallType,
|
||||
BinaryPath: hostAgentBinaryPath,
|
||||
BackendURL: req.BackendURL,
|
||||
ClusterID: req.ClusterID,
|
||||
NodeID: req.NodeID,
|
||||
StateDir: req.StateDir,
|
||||
ClusterAuthorityPublicKey: req.ClusterAuthorityPublicKey,
|
||||
FabricRegistryRecordsJSON: req.FabricRegistryRecordsJSON,
|
||||
MeshRegion: req.MeshRegion,
|
||||
CurrentVersion: hostAgentVersion,
|
||||
Channel: req.Channel,
|
||||
OS: firstNonEmptyLocal(req.OS, runtime.GOOS),
|
||||
Arch: firstNonEmptyLocal(req.Arch, runtime.GOARCH),
|
||||
InstallType: hostagent.BinaryUpdateInstallType,
|
||||
BinaryPath: hostAgentBinaryPath,
|
||||
}
|
||||
if req.InstallType == hostagent.WindowsUpdateInstallType || runtime.GOOS == "windows" {
|
||||
cfg.HostAgentUpdateRequest.InstallType = "windows_binary"
|
||||
@@ -569,6 +491,9 @@ func parseMonitor(args []string) (hostagent.MonitorConfig, error) {
|
||||
fs.StringVar(&cfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&cfg.NodeID, "node-id", getenv("RAP_NODE_ID", ""), "Already enrolled node ID.")
|
||||
fs.StringVar(&cfg.StateDir, "state-dir", getenv("RAP_NODE_STATE_DIR", hostagent.DefaultStateDir), "Host path containing node-agent identity.json.")
|
||||
fs.StringVar(&cfg.ClusterAuthorityPublicKey, "cluster-authority-public-key", getenv("RAP_CLUSTER_AUTHORITY_PUBLIC_KEY", ""), "Pinned Ed25519 cluster authority public key for signed fabric registry records.")
|
||||
fs.StringVar(&cfg.FabricRegistryRecordsJSON, "fabric-registry-records-json", getenv("RAP_FABRIC_REGISTRY_RECORDS_JSON", ""), "JSON array of signed QUIC-only fabric registry records used to reach update/control services.")
|
||||
fs.StringVar(&cfg.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint for fabric registry endpoint selection.")
|
||||
fs.StringVar(&cfg.Product, "product", getenv("RAP_MONITOR_PRODUCT", hostagent.DefaultMonitorProduct), "Status product name.")
|
||||
fs.StringVar(&cfg.CurrentVersion, "current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Current rap-host-agent version.")
|
||||
fs.StringVar(&cfg.DockerBinary, "docker-binary", getenv("RAP_DOCKER_BINARY", "docker"), "Docker CLI binary.")
|
||||
@@ -716,6 +641,9 @@ func parseHostAgentUpdate(args []string) (hostagent.HostAgentUpdateRequest, int,
|
||||
fs.StringVar(&req.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&req.NodeID, "node-id", getenv("RAP_NODE_ID", ""), "Already enrolled node ID.")
|
||||
fs.StringVar(&req.StateDir, "state-dir", getenv("RAP_NODE_STATE_DIR", ""), "Host path containing node-agent identity.json.")
|
||||
fs.StringVar(&req.ClusterAuthorityPublicKey, "cluster-authority-public-key", getenv("RAP_CLUSTER_AUTHORITY_PUBLIC_KEY", ""), "Pinned Ed25519 cluster authority public key for signed fabric registry records.")
|
||||
fs.StringVar(&req.FabricRegistryRecordsJSON, "fabric-registry-records-json", getenv("RAP_FABRIC_REGISTRY_RECORDS_JSON", ""), "JSON array of signed QUIC-only fabric registry records used to reach update/control services.")
|
||||
fs.StringVar(&req.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint for fabric registry endpoint selection.")
|
||||
fs.StringVar(&req.CurrentVersion, "current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Currently installed rap-host-agent version.")
|
||||
fs.StringVar(&req.Channel, "channel", getenv("RAP_UPDATE_CHANNEL", ""), "Optional update channel override.")
|
||||
fs.StringVar(&req.OS, "os", getenv("RAP_HOST_AGENT_UPDATE_OS", runtime.GOOS), "Host-agent artifact OS selector.")
|
||||
@@ -739,6 +667,9 @@ func registerUpdateFlags(fs *flag.FlagSet, req *hostagent.UpdateRequest, healthT
|
||||
fs.StringVar(&req.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&req.NodeID, "node-id", getenv("RAP_NODE_ID", ""), "Already enrolled node ID.")
|
||||
fs.StringVar(&req.StateDir, "state-dir", getenv("RAP_NODE_STATE_DIR", ""), "Host path containing node-agent identity.json; used when node-id is not known yet.")
|
||||
fs.StringVar(&req.ClusterAuthorityPublicKey, "cluster-authority-public-key", getenv("RAP_CLUSTER_AUTHORITY_PUBLIC_KEY", ""), "Pinned Ed25519 cluster authority public key for signed fabric registry records.")
|
||||
fs.StringVar(&req.FabricRegistryRecordsJSON, "fabric-registry-records-json", getenv("RAP_FABRIC_REGISTRY_RECORDS_JSON", ""), "JSON array of signed QUIC-only fabric registry records used to reach update/control services.")
|
||||
fs.StringVar(&req.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint for fabric registry endpoint selection.")
|
||||
fs.StringVar(&req.Product, "product", getenv("RAP_UPDATE_PRODUCT", hostagent.DefaultUpdateProduct), "Update product name.")
|
||||
fs.StringVar(&req.CurrentVersion, "current-version", getenv("RAP_NODE_AGENT_VERSION", agent.Version), "Currently running product version.")
|
||||
fs.StringVar(&req.OS, "os", getenv("RAP_UPDATE_OS", runtime.GOOS), "Artifact OS selector.")
|
||||
@@ -797,16 +728,15 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
||||
fs.IntVar(&autoUpdate.MonitorDiskCritical, "monitor-disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
|
||||
fs.BoolVar(&autoUpdate.MonitorCleanupDocker, "monitor-cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
|
||||
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable historical synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||
fs.IntVar(&cfg.VPNFabricSessionStreamShards, "vpn-fabric-session-stream-shards", getenvInt("RAP_VPN_FABRIC_SESSION_STREAM_SHARDS", 4), "VPN fabric-session stream shards per traffic class.")
|
||||
fs.IntVar(&cfg.VPNFabricQUICMaxStreamsPerConn, "vpn-fabric-quic-max-streams-per-conn", getenvInt("RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN", 64), "Maximum logical fabric-session streams per cached VPN QUIC carrier connection.")
|
||||
fs.IntVar(&cfg.VPNFabricQUICIdleTTLSeconds, "vpn-fabric-quic-idle-ttl-seconds", getenvInt("RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS", 300), "Idle TTL seconds for cached VPN QUIC carrier connections.")
|
||||
fs.StringVar(&cfg.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ""), "Synthetic mesh HTTP listen address inside container.")
|
||||
fs.StringVar(&cfg.MeshListenAddr, "mesh-listen-addr", getenv("RAP_MESH_LISTEN_ADDR", ""), "Historical synthetic mesh HTTP listen address inside container.")
|
||||
fs.StringVar(&cfg.MeshListenPortMode, "mesh-listen-port-mode", getenv("RAP_MESH_LISTEN_PORT_MODE", ""), "Mesh listen port behavior: manual, auto, or disabled.")
|
||||
fs.IntVar(&cfg.MeshListenAutoPortStart, "mesh-listen-auto-port-start", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_START", 0), "First port used when mesh listen port mode is auto.")
|
||||
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 0), "Last port used when mesh listen port mode is auto.")
|
||||
@@ -941,13 +871,12 @@ func usage() {
|
||||
rap-host-agent install -backend-url URL -cluster-id ID -join-token TOKEN -node-name NAME [docker options]
|
||||
rap-host-agent install-windows -profile-url URL -install-token TOKEN [-node-name NAME] [windows options]
|
||||
rap-host-agent install-linux -profile-url URL -install-token TOKEN [-node-name NAME] [linux/systemd options]
|
||||
rap-host-agent install-updater -backend-url URL -cluster-id ID -state-dir DIR -container-name NAME
|
||||
rap-host-agent update-host-agent -backend-url URL -cluster-id ID -state-dir DIR
|
||||
rap-host-agent update-host-agent-loop -backend-url URL -cluster-id ID -state-dir DIR
|
||||
rap-host-agent monitor-loop -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent monitor-once -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent fabric-session-smoke -mesh-url URL -token rap_fsn_TOKEN [-authority-payload VALUE -authority-signature VALUE]
|
||||
rap-host-agent update -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent update-loop -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent install-updater (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -state-dir DIR -container-name NAME
|
||||
rap-host-agent update-host-agent (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -state-dir DIR
|
||||
rap-host-agent update-host-agent-loop (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -state-dir DIR
|
||||
rap-host-agent monitor-loop (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent monitor-once (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent update (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent update-loop (-backend-url URL | -fabric-registry-records-json JSON) -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent status [-container-name NAME]`)
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -204,7 +205,7 @@ func TestRouteManagerDecisionsFromControlPlaneConsumesRebuildRouteCommand(t *tes
|
||||
}
|
||||
decision := decisions[0]
|
||||
if decision.RouteID != "route-primary" ||
|
||||
decision.RebuildStatus != "pending_degraded_fallback" ||
|
||||
decision.RebuildStatus != "pending_degraded_route_state" ||
|
||||
decision.DecisionSource != "service_channel_remediation_command" ||
|
||||
decision.RebuildRequestID != "cmd-rebuild" {
|
||||
t.Fatalf("unexpected rebuild remediation decision: %+v", decision)
|
||||
@@ -279,7 +280,6 @@ func TestGatewayTransportForAssignmentUsesFabricSessionWhenEnabled(t *testing.T)
|
||||
&syntheticMeshState{
|
||||
ProductionForwardTransport: noopProductionForwardTransport{},
|
||||
VPNFabricInbox: inbox,
|
||||
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
|
||||
"entry-1": {{
|
||||
EndpointID: "entry-1-quic",
|
||||
@@ -322,7 +322,6 @@ func TestGatewayTransportForAssignmentFallsBackWhenFabricSessionUnavailable(t *t
|
||||
&syntheticMeshState{
|
||||
ProductionForwardTransport: noopProductionForwardTransport{},
|
||||
VPNFabricInbox: inbox,
|
||||
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
||||
PeerEndpoints: map[string]string{},
|
||||
Routes: []mesh.SyntheticRoute{{
|
||||
RouteID: "route-exit-entry",
|
||||
@@ -424,6 +423,496 @@ func testMainQUICCertSHA256(t *testing.T, config *tls.Config) string {
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func TestFabricControlForwardHandlerUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
var req client.RawControlRequest
|
||||
if err := json.Unmarshal(payload, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Path != "/auth/login" {
|
||||
return nil, fmt.Errorf("unexpected path %s", req.Path)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 200, Body: json.RawMessage(`{"via":"fabric"}`)})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
now := time.Now().UTC()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := mesh.FabricRegistryGossipRecord{
|
||||
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
|
||||
ClusterID: "cluster-1",
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
Epoch: 1,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Hour),
|
||||
IssuerNodeID: "authority-1",
|
||||
IssuerRole: mesh.FabricRegistryAuthorityControl,
|
||||
Endpoints: []mesh.FabricRegistryEndpoint{{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: testMainQUICCertSHA256(t, tlsConfig),
|
||||
}},
|
||||
}
|
||||
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign registry record: %v", err)
|
||||
}
|
||||
registry := mesh.NewFabricRegistry()
|
||||
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}, true); err != nil {
|
||||
t.Fatalf("apply registry record: %v", err)
|
||||
}
|
||||
transport := mesh.NewQUICFabricTransport(nil)
|
||||
transport.SetLocalPeerID("node-a")
|
||||
handler := fabricControlForwardHandlerFromMeshState(nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: transport,
|
||||
ListenerRuntimeConfig: config.Config{MeshRegion: "test"},
|
||||
})
|
||||
payload, err := handler(context.Background(), []byte(`{"method":"POST","path":"/auth/login","body":{"user":"a"}}`))
|
||||
if err != nil {
|
||||
t.Fatalf("fabric control handler: %v", err)
|
||||
}
|
||||
var response client.RawControlResponse
|
||||
if err := json.Unmarshal(payload, &response); err != nil {
|
||||
t.Fatalf("decode raw control response: %v", err)
|
||||
}
|
||||
if response.StatusCode != 200 || string(response.Body) != `{"via":"fabric"}` {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeartbeatViaFabricControlUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
var req client.RawControlRequest
|
||||
if err := json.Unmarshal(payload, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Method != http.MethodPost || req.Path != "/clusters/cluster-1/nodes/node-a/heartbeats" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", req)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 202,
|
||||
Body: json.RawMessage(`{
|
||||
"heartbeat":{"id":"hb-1"},
|
||||
"testing_flags":{"enabled":true,"synthetic_links_enabled":true,"applied_scopes":["cluster"]},
|
||||
"update_hint":{"schema_version":"rap.node_update_hint.v1","check_now":true,"generation":"gen-1"}
|
||||
}`),
|
||||
})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
now := time.Now().UTC()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := mesh.FabricRegistryGossipRecord{
|
||||
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
|
||||
ClusterID: "cluster-1",
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
Epoch: 1,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Hour),
|
||||
IssuerNodeID: "authority-1",
|
||||
IssuerRole: mesh.FabricRegistryAuthorityControl,
|
||||
Endpoints: []mesh.FabricRegistryEndpoint{{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: testMainQUICCertSHA256(t, tlsConfig),
|
||||
}},
|
||||
}
|
||||
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign registry record: %v", err)
|
||||
}
|
||||
registry := mesh.NewFabricRegistry()
|
||||
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}, true); err != nil {
|
||||
t.Fatalf("apply registry record: %v", err)
|
||||
}
|
||||
response, viaFabric, err := heartbeatViaFabricControl(context.Background(), state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
}, client.HeartbeatRequest{HealthStatus: "healthy"})
|
||||
if err != nil {
|
||||
t.Fatalf("heartbeat via fabric: %v", err)
|
||||
}
|
||||
if !viaFabric || !response.TestingFlags.Enabled || response.UpdateHint == nil || response.UpdateHint.Generation != "gen-1" {
|
||||
t.Fatalf("unexpected heartbeat response viaFabric=%t response=%+v", viaFabric, response)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticMeshConfigRefreshUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
var req client.RawControlRequest
|
||||
if err := json.Unmarshal(payload, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Method != http.MethodGet || req.Path != "/clusters/cluster-1/nodes/node-a/mesh/synthetic-config" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", req)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 200,
|
||||
Body: json.RawMessage(`{
|
||||
"synthetic_mesh_config":{
|
||||
"enabled":true,
|
||||
"config_version":"fabric-gen-1",
|
||||
"peer_directory_version":"pd-1",
|
||||
"policy_version":"pol-1",
|
||||
"peer_endpoints":{},
|
||||
"routes":[]
|
||||
}
|
||||
}`),
|
||||
})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
loaded, err := loadSyntheticMeshConfigRuntime(context.Background(), config.Config{}, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, nil, &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("load synthetic mesh config via fabric: %v", err)
|
||||
}
|
||||
if loaded.Source != "control_plane" || loaded.ConfigVersion != "fabric-gen-1" {
|
||||
t.Fatalf("loaded = %+v", loaded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReportMeshLinkUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
var received client.RawControlRequest
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if err := json.Unmarshal(payload, &received); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/mesh/links" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", received)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 202, Body: json.RawMessage(`{"ok":true}`)})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
err = reportMeshLink(context.Background(), nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
}, client.MeshLinkObservationRequest{
|
||||
SourceNodeID: "node-a",
|
||||
TargetNodeID: "node-b",
|
||||
LinkStatus: "reachable",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("report mesh link via fabric: %v", err)
|
||||
}
|
||||
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"target_node_id":"node-b"`) {
|
||||
t.Fatalf("unexpected received body: %s", string(received.Body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestReportTelemetryUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
var received client.RawControlRequest
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if err := json.Unmarshal(payload, &received); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/nodes/node-a/telemetry" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", received)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 202, Body: json.RawMessage(`{"ok":true}`)})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
err = reportTelemetry(context.Background(), nil, state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}, &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
}, client.TelemetryRequest{Payload: map[string]any{"fabric": "quic"}})
|
||||
if err != nil {
|
||||
t.Fatalf("report telemetry via fabric: %v", err)
|
||||
}
|
||||
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"fabric":"quic"`) {
|
||||
t.Fatalf("unexpected received body: %s", string(received.Body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorkloadControlUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
var paths []string
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
var req client.RawControlRequest
|
||||
if err := json.Unmarshal(payload, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
paths = append(paths, req.Method+" "+req.Path)
|
||||
switch req.Path {
|
||||
case "/clusters/cluster-1/nodes/node-a/workloads/desired":
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 200,
|
||||
Body: json.RawMessage(`{"desired_workloads":[{"service_type":"vpn-egress","desired_state":"enabled","runtime_mode":"node"}]}`),
|
||||
})
|
||||
case "/clusters/cluster-1/nodes/node-a/workloads/vpn-egress/status":
|
||||
if len(req.Body) == 0 || !strings.Contains(string(req.Body), `"reported_state":"running"`) {
|
||||
return nil, fmt.Errorf("unexpected status body: %s", string(req.Body))
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected request: %+v", req)
|
||||
}
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
meshState := &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
}
|
||||
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
desired, err := desiredWorkloads(context.Background(), nil, identity, meshState)
|
||||
if err != nil {
|
||||
t.Fatalf("desired workloads via fabric: %v", err)
|
||||
}
|
||||
if len(desired) != 1 || desired[0].ServiceType != "vpn-egress" {
|
||||
t.Fatalf("desired = %+v", desired)
|
||||
}
|
||||
if err := reportSingleWorkloadStatus(context.Background(), nil, identity, meshState, "vpn-egress", client.WorkloadStatusRequest{ReportedState: "running"}); err != nil {
|
||||
t.Fatalf("report workload status via fabric: %v", err)
|
||||
}
|
||||
want := []string{
|
||||
"GET /clusters/cluster-1/nodes/node-a/workloads/desired",
|
||||
"POST /clusters/cluster-1/nodes/node-a/workloads/vpn-egress/status",
|
||||
}
|
||||
if !reflect.DeepEqual(paths, want) {
|
||||
t.Fatalf("paths = %+v, want %+v", paths, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeProjectionUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
var received client.RawControlRequest
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if err := json.Unmarshal(payload, &received); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/nodes/node-a/admin-runtime/projection" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", received)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 200,
|
||||
Body: json.RawMessage(`{"schema_version":"rap.admin_runtime_projection.v1","status":"ok","status_code":200,"body":{"page":"cluster"}}`),
|
||||
})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
projection, err := controlAPIProjectionClient{
|
||||
Identity: state.Identity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
MeshState: &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
},
|
||||
}.Project(context.Background(), webingress.ControlAPIProjectionRequest{
|
||||
SchemaVersion: "rap.web_ingress_projection.v1",
|
||||
Method: http.MethodGet,
|
||||
Path: "/cluster-admin",
|
||||
Scope: "cluster",
|
||||
ServiceClass: "cluster_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("admin projection via fabric: %v", err)
|
||||
}
|
||||
if projection.StatusCode != 200 || string(projection.Body) != `{"page":"cluster"}` {
|
||||
t.Fatalf("projection = %+v", projection)
|
||||
}
|
||||
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"service_class":"cluster_admin"`) {
|
||||
t.Fatalf("unexpected received body: %s", string(received.Body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNAssignmentControlUsesRegistryQUICControlAPI(t *testing.T) {
|
||||
tlsConfig := testMainQUICTLSConfig(t)
|
||||
var paths []string
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
var req client.RawControlRequest
|
||||
if err := json.Unmarshal(payload, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
paths = append(paths, req.Method+" "+req.Path)
|
||||
switch req.Path {
|
||||
case "/clusters/cluster-1/nodes/node-a/vpn/assignments":
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 200,
|
||||
Body: json.RawMessage(`{"vpn_assignments":[{"vpn_connection_id":"vpn-1","desired_state":"enabled","assignment_reason":"eligible_candidate"}]}`),
|
||||
})
|
||||
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/acquire":
|
||||
return json.Marshal(client.RawControlResponse{
|
||||
StatusCode: 201,
|
||||
Body: json.RawMessage(`{"lease":{"lease_id":"lease-1","owner_node_id":"node-a","lease_generation":1,"status":"active"}}`),
|
||||
})
|
||||
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/lease-1/renew":
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
|
||||
case "/clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/status":
|
||||
if len(req.Body) == 0 || !strings.Contains(string(req.Body), `"observed_status":"assigned"`) {
|
||||
return nil, fmt.Errorf("unexpected status body: %s", string(req.Body))
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected request: %+v", req)
|
||||
}
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
registry := signedTestControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testMainQUICCertSHA256(t, tlsConfig))
|
||||
meshState := &syntheticMeshState{
|
||||
FabricRegistry: registry,
|
||||
VPNFabricQUICTransport: mesh.NewQUICFabricTransport(nil),
|
||||
}
|
||||
identity := state.Identity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
assignments, err := nodeVPNAssignments(context.Background(), nil, identity, meshState)
|
||||
if err != nil {
|
||||
t.Fatalf("vpn assignments via fabric: %v", err)
|
||||
}
|
||||
if len(assignments) != 1 || assignments[0].VPNConnectionID != "vpn-1" {
|
||||
t.Fatalf("assignments = %+v", assignments)
|
||||
}
|
||||
lease, err := acquireNodeVPNAssignmentLease(context.Background(), nil, identity, meshState, "vpn-1", client.NodeVPNAssignmentLeaseAcquireRequest{TTLSeconds: 300})
|
||||
if err != nil {
|
||||
t.Fatalf("acquire lease via fabric: %v", err)
|
||||
}
|
||||
if lease == nil || lease.LeaseID != "lease-1" {
|
||||
t.Fatalf("lease = %+v", lease)
|
||||
}
|
||||
if err := renewNodeVPNAssignmentLease(context.Background(), nil, identity, meshState, "vpn-1", "lease-1", client.NodeVPNAssignmentLeaseRenewRequest{TTLSeconds: 300}); err != nil {
|
||||
t.Fatalf("renew lease via fabric: %v", err)
|
||||
}
|
||||
if err := reportNodeVPNAssignmentStatus(context.Background(), nil, identity, meshState, "vpn-1", client.NodeVPNAssignmentStatusRequest{ObservedStatus: "assigned"}); err != nil {
|
||||
t.Fatalf("report status via fabric: %v", err)
|
||||
}
|
||||
want := []string{
|
||||
"GET /clusters/cluster-1/nodes/node-a/vpn/assignments",
|
||||
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/acquire",
|
||||
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/lease/lease-1/renew",
|
||||
"POST /clusters/cluster-1/nodes/node-a/vpn/assignments/vpn-1/status",
|
||||
}
|
||||
if !reflect.DeepEqual(paths, want) {
|
||||
t.Fatalf("paths = %+v, want %+v", paths, want)
|
||||
}
|
||||
}
|
||||
|
||||
func signedTestControlRegistry(t *testing.T, clusterID string, endpoint string, certSHA256 string) *mesh.FabricRegistry {
|
||||
t.Helper()
|
||||
now := time.Now().UTC()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := mesh.FabricRegistryGossipRecord{
|
||||
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
|
||||
ClusterID: clusterID,
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
Epoch: 1,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Hour),
|
||||
IssuerNodeID: "authority-1",
|
||||
IssuerRole: mesh.FabricRegistryAuthorityControl,
|
||||
Endpoints: []mesh.FabricRegistryEndpoint{{
|
||||
EndpointID: "control-a",
|
||||
Address: endpoint,
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: certSHA256,
|
||||
}},
|
||||
}
|
||||
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign registry record: %v", err)
|
||||
}
|
||||
registry := mesh.NewFabricRegistry()
|
||||
if _, _, err := registry.ApplyGossipRecord(signed, mesh.FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: clusterID,
|
||||
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}, true); err != nil {
|
||||
t.Fatalf("apply registry record: %v", err)
|
||||
}
|
||||
return registry
|
||||
}
|
||||
|
||||
func TestRouteManagerDecisionsFromControlPlaneKeepsExplicitRemediationCommand(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{
|
||||
@@ -493,9 +982,10 @@ func TestFabricServiceChannelAccessStatsReportsDataPlaneViolations(t *testing.T)
|
||||
OccurredAt: time.Unix(10, 0).UTC(),
|
||||
})
|
||||
report := stats.Report(time.Unix(20, 0).UTC())
|
||||
if report["backend_fallback_blocked"] != int64(1) ||
|
||||
if report["degraded_compatibility_blocked"] != int64(1) ||
|
||||
report["fabric_route_send_failure"] != int64(1) ||
|
||||
report["last_data_plane_violation_status"] != "fabric_route_send_failed_backend_fallback_blocked" ||
|
||||
report["last_data_plane_violation_status"] != "degraded_compatibility_blocked" ||
|
||||
report["last_data_plane_violation_status_raw"] != "fabric_route_send_failed_backend_fallback_blocked" ||
|
||||
report["last_data_plane_violation_reason"] != "mesh synthetic route not found" {
|
||||
t.Fatalf("unexpected violation report: %+v", report)
|
||||
}
|
||||
@@ -790,7 +1280,56 @@ func TestVerifyEnrollmentBootstrapRejectsPinnedAuthorityMismatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeLoadedSyntheticMeshConfigMigratesLegacyControlPlaneSurfaces(t *testing.T) {
|
||||
func TestLoadFabricRegistryBootstrapAcceptsSignedCandidate(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateKey: %v", err)
|
||||
}
|
||||
record := mesh.FabricRegistryGossipRecord{
|
||||
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
|
||||
ClusterID: "cluster-1",
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
Epoch: 1,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Hour),
|
||||
IssuerNodeID: "authority-node",
|
||||
IssuerRole: mesh.FabricRegistryAuthorityControl,
|
||||
Endpoints: []mesh.FabricRegistryEndpoint{
|
||||
{EndpointID: "control-a", Address: "quic://control.example.test:19443", Transport: "direct_quic"},
|
||||
},
|
||||
}
|
||||
signed, err := mesh.SignFabricRegistryGossipRecord(record, mesh.FabricRegistryTrustedIssuer{
|
||||
IssuerID: "cluster-authority",
|
||||
Role: mesh.FabricRegistryAuthorityControl,
|
||||
}, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign registry record: %v", err)
|
||||
}
|
||||
raw, err := json.Marshal([]mesh.FabricRegistryGossipRecord{signed})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal registry records: %v", err)
|
||||
}
|
||||
registry, report := loadFabricRegistryBootstrap(config.Config{
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
FabricRegistryRecordsJSON: string(raw),
|
||||
}, state.Identity{ClusterID: "cluster-1"})
|
||||
if registry == nil || report.Total != 1 || report.Candidate != 1 || report.Rejected != 0 {
|
||||
t.Fatalf("unexpected registry bootstrap report: %+v registry=%v", report, registry)
|
||||
}
|
||||
if _, ok := registry.Active("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now); ok {
|
||||
t.Fatal("bootstrap record should remain candidate until live verification")
|
||||
}
|
||||
if !registry.MarkLiveVerified("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now) {
|
||||
t.Fatal("MarkLiveVerified = false")
|
||||
}
|
||||
if _, ok := registry.Active("cluster-1", mesh.FabricRegistryServiceControlAPI, mesh.FabricRegistryScopeCluster, "", now); !ok {
|
||||
t.Fatal("expected active record after live verification")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeLoadedSyntheticMeshConfigMigratesNonQUICControlPlaneSurfaces(t *testing.T) {
|
||||
loaded := loadedSyntheticMeshConfig{
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-a": "https://node-a.example.test:443",
|
||||
@@ -798,7 +1337,7 @@ func TestNormalizeLoadedSyntheticMeshConfigMigratesLegacyControlPlaneSurfaces(t
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-legacy",
|
||||
EndpointID: "node-b-http-migration",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_http",
|
||||
Address: "https://node-b.example.test:443",
|
||||
@@ -816,7 +1355,7 @@ func TestNormalizeLoadedSyntheticMeshConfigMigratesLegacyControlPlaneSurfaces(t
|
||||
},
|
||||
RendezvousLeases: []mesh.PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-legacy",
|
||||
LeaseID: "lease-http-migration",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r.example.test:19001",
|
||||
@@ -824,7 +1363,7 @@ func TestNormalizeLoadedSyntheticMeshConfigMigratesLegacyControlPlaneSurfaces(t
|
||||
},
|
||||
},
|
||||
RoutePathDecisions: &client.RoutePathDecisionReport{
|
||||
Decisions: []client.RoutePathDecision{{DecisionID: "decision-legacy", SelectedRelayEndpoint: "http://node-r.example.test:19001"}},
|
||||
Decisions: []client.RoutePathDecision{{DecisionID: "decision-http-migration", SelectedRelayEndpoint: "http://node-r.example.test:19001"}},
|
||||
},
|
||||
}
|
||||
normalizeLoadedSyntheticMeshConfigQUICOnly(&loaded)
|
||||
@@ -849,14 +1388,14 @@ func TestNormalizeLoadedSyntheticMeshConfigMigratesLegacyControlPlaneSurfaces(t
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateLoadedSyntheticMeshConfigRejectsUnnormalizedLegacyControlPlaneSurfaces(t *testing.T) {
|
||||
func TestValidateLoadedSyntheticMeshConfigRejectsUnnormalizedNonQUICControlPlaneSurfaces(t *testing.T) {
|
||||
err := validateLoadedSyntheticMeshConfigQUICOnly(loadedSyntheticMeshConfig{
|
||||
RoutePathDecisions: &client.RoutePathDecisionReport{
|
||||
Decisions: []client.RoutePathDecision{{DecisionID: "decision-legacy", SelectedRelayEndpoint: "http://node-r.example.test:19001"}},
|
||||
Decisions: []client.RoutePathDecision{{DecisionID: "decision-http-migration", SelectedRelayEndpoint: "http://node-r.example.test:19001"}},
|
||||
},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "QUIC selected relay endpoint") {
|
||||
t.Fatalf("expected legacy selected relay endpoint rejection, got %v", err)
|
||||
t.Fatalf("expected non-QUIC selected relay endpoint rejection, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -942,7 +1481,6 @@ func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
|
||||
MeshRegion: "eu",
|
||||
MeshSyntheticRuntimeEnabled: true,
|
||||
MeshProductionForwardingEnabled: true,
|
||||
MeshFabricSessionEnabled: true,
|
||||
VPNFabricSessionTransportEnabled: true,
|
||||
VPNFabricSessionStreamShards: 6,
|
||||
VPNFabricQUICMaxStreamsPerConn: 24,
|
||||
@@ -952,7 +1490,6 @@ func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-a",
|
||||
}, &syntheticMeshState{
|
||||
VPNFabricSessionPeers: mesh.NewFabricSessionPeerManager(),
|
||||
VPNFabricQUICTransport: func() *mesh.QUICFabricTransport {
|
||||
transport := mesh.NewQUICFabricTransport(nil)
|
||||
transport.MaxStreamsPerConn = 24
|
||||
@@ -1010,8 +1547,7 @@ func TestHeartbeatPayloadIncludesMeshEndpointReport(t *testing.T) {
|
||||
if report, ok := payload.Metadata["vpn_fabric_session_transport_report"].(map[string]any); !ok ||
|
||||
report["packet_payload"] != "rap.vpn_packet_batch.fabric.v1" ||
|
||||
report["transport"] != "fabric_session_binary_frames" ||
|
||||
report["stream_shards_per_class"] != 6 ||
|
||||
report["peer_sessions"] == nil {
|
||||
report["stream_shards_per_class"] != 6 {
|
||||
t.Fatalf("vpn fabric session report missing: %+v", payload.Metadata)
|
||||
} else if report["quic_sessions"] == nil || report["quic_max_streams_per_conn"] != 24 {
|
||||
t.Fatalf("vpn fabric quic session report missing: %+v", report)
|
||||
@@ -1242,14 +1778,14 @@ func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVPNFabricSessionTargetFallsBackToLegacyPeerEndpoint(t *testing.T) {
|
||||
func TestVPNFabricSessionTargetRejectsNonQUICPeerEndpoint(t *testing.T) {
|
||||
_, ok := vpnFabricSessionTarget(&syntheticMeshState{
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "https://node-b.example.test:443/",
|
||||
},
|
||||
}, "node-b")
|
||||
if ok {
|
||||
t.Fatal("legacy peer endpoint unexpectedly produced a QUIC target")
|
||||
t.Fatal("non-QUIC peer endpoint unexpectedly produced a QUIC target")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1257,7 +1793,7 @@ func TestVPNFabricSessionTargetsIncludeRankedQUICCandidatesWithoutLegacyFallback
|
||||
now := time.Now().UTC()
|
||||
targets := vpnFabricSessionTargets(&syntheticMeshState{
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "https://node-b-legacy.example.test:443/",
|
||||
"node-b": "https://node-b-http-migration.example.test:443/",
|
||||
},
|
||||
PeerEndpointCandidates: map[string][]mesh.PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
@@ -2731,7 +3267,7 @@ func TestWebIngressForwardHandlerFromConfigVerifiesSignedEnvelope(t *testing.T)
|
||||
keyID := "web-key-1"
|
||||
handler := webIngressForwardHandlerFromConfig(config.Config{
|
||||
WebIngressTrustedKeysJSON: webingress.TrustedKeysJSONForPublicKey(keyID, publicKey),
|
||||
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-1"}, nil)
|
||||
}, state.Identity{ClusterID: "cluster-1", NodeID: "node-1"}, nil, nil)
|
||||
if handler == nil {
|
||||
t.Fatal("handler is nil")
|
||||
}
|
||||
@@ -2780,10 +3316,10 @@ func TestWebIngressForwardHandlerFromConfigVerifiesSignedEnvelope(t *testing.T)
|
||||
}
|
||||
|
||||
func TestWebIngressForwardHandlerFromConfigDisabledWithoutTrustedKeys(t *testing.T) {
|
||||
if handler := webIngressForwardHandlerFromConfig(config.Config{}, state.Identity{}, nil); handler != nil {
|
||||
if handler := webIngressForwardHandlerFromConfig(config.Config{}, state.Identity{}, nil, nil); handler != nil {
|
||||
t.Fatal("handler should be nil without trusted keys")
|
||||
}
|
||||
if handler := webIngressForwardHandlerFromConfig(config.Config{WebIngressTrustedKeysJSON: `{"bad":"key"}`}, state.Identity{}, nil); handler != nil {
|
||||
if handler := webIngressForwardHandlerFromConfig(config.Config{WebIngressTrustedKeysJSON: `{"bad":"key"}`}, state.Identity{}, nil, nil); handler != nil {
|
||||
t.Fatal("handler should be nil with invalid trusted keys")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
const Version = "0.2.309-latencyaware"
|
||||
const Version = "0.2.321-directreadytarget"
|
||||
|
||||
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
|
||||
return client.EnrollRequest{
|
||||
|
||||
@@ -828,9 +828,6 @@ func (c *Client) RawControl(ctx context.Context, request RawControlRequest) (Raw
|
||||
if err != nil {
|
||||
return RawControlResponse{}, err
|
||||
}
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
return RawControlResponse{}, fmt.Errorf("backend returned status %d: %s", httpResp.StatusCode, string(payload))
|
||||
}
|
||||
return RawControlResponse{StatusCode: httpResp.StatusCode, Body: json.RawMessage(payload)}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"os"
|
||||
@@ -31,7 +32,6 @@ type Config struct {
|
||||
EnrollmentPollTimeout time.Duration
|
||||
MeshSyntheticRuntimeEnabled bool
|
||||
MeshProductionForwardingEnabled bool
|
||||
MeshFabricSessionEnabled bool
|
||||
VPNFabricSessionTransportEnabled bool
|
||||
MeshQUICFabricEnabled bool
|
||||
MeshQUICFabricListenAddr string
|
||||
@@ -45,6 +45,7 @@ type Config struct {
|
||||
MeshListenAutoPortEnd int
|
||||
MeshAdvertiseEndpoint string
|
||||
MeshAdvertiseEndpointsJSON string
|
||||
FabricRegistryRecordsJSON string
|
||||
MeshAdvertiseTransport string
|
||||
MeshConnectivityMode string
|
||||
MeshNATType string
|
||||
@@ -86,7 +87,6 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
fs.StringVar(&cfg.WebIngressRuntimeServiceClasses, "web-ingress-runtime-service-classes", getEnv(env, "RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES", ""), "Optional comma-separated allow-list of web ingress runtime service classes accepted by this node.")
|
||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getEnvBool(env, "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable C17A synthetic fabric probe runtime. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getEnvBool(env, "RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production fabric-control direct next-hop forwarding gate. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint. Disabled by default.")
|
||||
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getEnvBool(env, "RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric session when explicitly enabled. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getEnvBool(env, "RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener. Disabled by default.")
|
||||
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getEnv(env, "RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "Listen address for QUIC/UDP fabric endpoint, for example :19443.")
|
||||
@@ -94,12 +94,13 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
fs.IntVar(&cfg.VPNFabricQUICMaxStreamsPerConn, "vpn-fabric-quic-max-streams-per-conn", getEnvInt(env, "RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN", 64), "Maximum logical fabric-session streams per cached VPN QUIC carrier connection.")
|
||||
fs.DurationVar(&cfg.VPNFabricQUICIdleTTL, "vpn-fabric-quic-idle-ttl", time.Duration(getEnvInt(env, "RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS", 300))*time.Second, "Idle TTL for cached VPN QUIC carrier connections.")
|
||||
fs.IntVar(&cfg.MeshProductionObservationSinkCapacity, "mesh-production-observation-sink-capacity", getEnvSignedInt(env, "RAP_MESH_PRODUCTION_OBSERVATION_SINK_CAPACITY", 0), "Bounded local metadata-only production envelope observation sink capacity. Disabled when 0.")
|
||||
fs.StringVar(&cfg.MeshListenAddr, "mesh-listen-addr", getEnv(env, "RAP_MESH_LISTEN_ADDR", ""), "Listen address for disabled-by-default C17E synthetic mesh HTTP endpoint.")
|
||||
fs.StringVar(&cfg.MeshListenAddr, "mesh-listen-addr", getEnv(env, "RAP_MESH_LISTEN_ADDR", ""), "Listen address for disabled-by-default historical synthetic mesh HTTP endpoint.")
|
||||
fs.StringVar(&cfg.MeshListenPortMode, "mesh-listen-port-mode", getEnv(env, "RAP_MESH_LISTEN_PORT_MODE", "manual"), "Mesh listen port behavior: manual, auto, or disabled.")
|
||||
fs.IntVar(&cfg.MeshListenAutoPortStart, "mesh-listen-auto-port-start", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_START", 19131), "First port used when mesh listen port mode is auto.")
|
||||
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint reported to the Control Plane. Empty disables endpoint reporting.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "JSON array of advertised mesh endpoint candidates, including private/corporate endpoints.")
|
||||
fs.StringVar(&cfg.FabricRegistryRecordsJSON, "fabric-registry-records-json", getEnv(env, "RAP_FABRIC_REGISTRY_RECORDS_JSON", ""), "JSON array of signed QUIC-only fabric registry gossip records used as bootstrap discovery seeds.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Transport label for the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getEnv(env, "RAP_MESH_CONNECTIVITY_MODE", "direct"), "Connectivity mode reported with the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getEnv(env, "RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint reported with the advertised mesh endpoint.")
|
||||
@@ -150,6 +151,7 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
}
|
||||
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
|
||||
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
|
||||
cfg.FabricRegistryRecordsJSON = strings.TrimSpace(cfg.FabricRegistryRecordsJSON)
|
||||
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
|
||||
if cfg.MeshAdvertiseTransport == "" {
|
||||
cfg.MeshAdvertiseTransport = "quic"
|
||||
@@ -199,6 +201,9 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
if cfg.MeshProductionObservationSinkCapacity > MaxMeshProductionObservationSinkCapacity {
|
||||
return Config{}, errors.New("mesh production observation sink capacity exceeds maximum")
|
||||
}
|
||||
if cfg.FabricRegistryRecordsJSON != "" && !isJSONArray(cfg.FabricRegistryRecordsJSON) {
|
||||
return Config{}, errors.New("fabric registry records must be a JSON array")
|
||||
}
|
||||
switch cfg.MeshListenPortMode {
|
||||
case "", "manual", "auto", "disabled":
|
||||
if cfg.MeshListenPortMode == "" {
|
||||
@@ -269,6 +274,11 @@ func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
}
|
||||
|
||||
func isJSONArray(value string) bool {
|
||||
var items []json.RawMessage
|
||||
return json.Unmarshal([]byte(strings.TrimSpace(value)), &items) == nil
|
||||
}
|
||||
|
||||
func readEnv() map[string]string {
|
||||
out := map[string]string{}
|
||||
for _, pair := range os.Environ() {
|
||||
|
||||
@@ -25,7 +25,6 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
"RAP_ENROLLMENT_POLL_TIMEOUT_SECONDS": "30",
|
||||
"RAP_MESH_SYNTHETIC_RUNTIME_ENABLED": "true",
|
||||
"RAP_MESH_PRODUCTION_FORWARDING_ENABLED": "true",
|
||||
"RAP_MESH_FABRIC_SESSION_ENABLED": "true",
|
||||
"RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED": "true",
|
||||
"RAP_MESH_QUIC_FABRIC_ENABLED": "true",
|
||||
"RAP_MESH_QUIC_FABRIC_LISTEN_ADDR": ":19443",
|
||||
@@ -39,6 +38,7 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
"RAP_MESH_LISTEN_AUTO_PORT_END": "19020",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443/",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINTS_JSON": `[{"endpoint_id":"node-a-lan","address":"10.10.0.20:19001"}]`,
|
||||
"RAP_FABRIC_REGISTRY_RECORDS_JSON": ` [{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api"}] `,
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
|
||||
"RAP_MESH_CONNECTIVITY_MODE": "outbound_only",
|
||||
"RAP_MESH_NAT_TYPE": "symmetric",
|
||||
@@ -93,9 +93,6 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
if !cfg.MeshProductionForwardingEnabled {
|
||||
t.Fatal("MeshProductionForwardingEnabled = false, want true")
|
||||
}
|
||||
if !cfg.MeshFabricSessionEnabled {
|
||||
t.Fatal("MeshFabricSessionEnabled = false, want true")
|
||||
}
|
||||
if !cfg.VPNFabricSessionTransportEnabled {
|
||||
t.Fatal("VPNFabricSessionTransportEnabled = false, want true")
|
||||
}
|
||||
@@ -122,6 +119,7 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
}
|
||||
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:19443" ||
|
||||
cfg.MeshAdvertiseEndpointsJSON == "" ||
|
||||
cfg.FabricRegistryRecordsJSON != `[{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api"}]` ||
|
||||
cfg.MeshAdvertiseTransport != "direct_quic" ||
|
||||
cfg.MeshConnectivityMode != "outbound_only" ||
|
||||
cfg.MeshNATType != "symmetric" ||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package hostagent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
@@ -29,7 +30,6 @@ type RuntimeConfig struct {
|
||||
WorkloadSupervisionEnabled bool
|
||||
MeshSyntheticRuntimeEnabled bool
|
||||
MeshProductionForwardingEnabled bool
|
||||
MeshFabricSessionEnabled bool
|
||||
VPNFabricSessionTransportEnabled bool
|
||||
MeshQUICFabricEnabled bool
|
||||
MeshQUICFabricListenAddr string
|
||||
@@ -42,6 +42,7 @@ type RuntimeConfig struct {
|
||||
MeshListenAutoPortEnd int
|
||||
MeshAdvertiseEndpoint string
|
||||
MeshAdvertiseEndpointsJSON string
|
||||
FabricRegistryRecordsJSON string
|
||||
MeshAdvertiseTransport string
|
||||
MeshConnectivityMode string
|
||||
MeshNATType string
|
||||
@@ -84,6 +85,7 @@ func (cfg RuntimeConfig) Normalize() RuntimeConfig {
|
||||
cfg.MeshListenPortMode = strings.ToLower(strings.TrimSpace(cfg.MeshListenPortMode))
|
||||
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
|
||||
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
|
||||
cfg.FabricRegistryRecordsJSON = strings.TrimSpace(cfg.FabricRegistryRecordsJSON)
|
||||
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
|
||||
cfg.MeshConnectivityMode = strings.TrimSpace(cfg.MeshConnectivityMode)
|
||||
cfg.MeshNATType = strings.TrimSpace(cfg.MeshNATType)
|
||||
@@ -145,6 +147,9 @@ func (cfg RuntimeConfig) ValidateInstall() error {
|
||||
if cfg.ProductionObservationSinkCap < 0 {
|
||||
return errors.New("production observation sink capacity must not be negative")
|
||||
}
|
||||
if cfg.FabricRegistryRecordsJSON != "" && !isJSONArray(cfg.FabricRegistryRecordsJSON) {
|
||||
return errors.New("fabric registry records must be a JSON array")
|
||||
}
|
||||
for _, item := range cfg.ExtraEnv {
|
||||
if !strings.Contains(item, "=") {
|
||||
return fmt.Errorf("extra env %q must be KEY=VALUE", item)
|
||||
@@ -176,3 +181,8 @@ func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
strings.HasPrefix(endpoint, "ws://") ||
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
}
|
||||
|
||||
func isJSONArray(value string) bool {
|
||||
var items []json.RawMessage
|
||||
return json.Unmarshal([]byte(strings.TrimSpace(value)), &items) == nil
|
||||
}
|
||||
|
||||
@@ -264,7 +264,6 @@ func NodeAgentEnvWithStateDir(cfg RuntimeConfig, stateDir string) []string {
|
||||
"RAP_WORKLOAD_SUPERVISION_ENABLED=" + boolString(cfg.WorkloadSupervisionEnabled),
|
||||
"RAP_MESH_SYNTHETIC_RUNTIME_ENABLED=" + boolString(cfg.MeshSyntheticRuntimeEnabled),
|
||||
"RAP_MESH_PRODUCTION_FORWARDING_ENABLED=" + boolString(cfg.MeshProductionForwardingEnabled),
|
||||
"RAP_MESH_FABRIC_SESSION_ENABLED=" + boolString(cfg.MeshFabricSessionEnabled),
|
||||
"RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED=" + boolString(cfg.VPNFabricSessionTransportEnabled),
|
||||
"RAP_MESH_QUIC_FABRIC_ENABLED=" + boolString(cfg.MeshQUICFabricEnabled),
|
||||
"RAP_VPN_FABRIC_SESSION_STREAM_SHARDS=" + strconv.Itoa(cfg.VPNFabricSessionStreamShards),
|
||||
@@ -295,6 +294,9 @@ func NodeAgentEnvWithStateDir(cfg RuntimeConfig, stateDir string) []string {
|
||||
if cfg.MeshAdvertiseEndpointsJSON != "" {
|
||||
env = append(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON="+cfg.MeshAdvertiseEndpointsJSON)
|
||||
}
|
||||
if cfg.FabricRegistryRecordsJSON != "" {
|
||||
env = append(env, "RAP_FABRIC_REGISTRY_RECORDS_JSON="+cfg.FabricRegistryRecordsJSON)
|
||||
}
|
||||
if cfg.MeshAdvertiseTransport != "" {
|
||||
env = append(env, "RAP_MESH_ADVERTISE_TRANSPORT="+cfg.MeshAdvertiseTransport)
|
||||
}
|
||||
|
||||
@@ -74,6 +74,7 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
||||
VPNFabricQUICIdleTTLSeconds: 120,
|
||||
MeshListenAddr: ":19131",
|
||||
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443/",
|
||||
FabricRegistryRecordsJSON: `[{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api"}]`,
|
||||
MeshAdvertiseTransport: "direct_quic",
|
||||
MeshConnectivityMode: "private_lan",
|
||||
})
|
||||
@@ -96,6 +97,7 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
||||
"RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS=120",
|
||||
"RAP_MESH_LISTEN_ADDR=:19131",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT=quic://10.0.0.11:19443",
|
||||
`RAP_FABRIC_REGISTRY_RECORDS_JSON=[{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api"}]`,
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT=direct_quic",
|
||||
"RAP_MESH_CONNECTIVITY_MODE=private_lan",
|
||||
"rap-node-agent:test",
|
||||
@@ -164,6 +166,11 @@ func TestFetchDockerInstallProfileBuildsRuntimeConfig(t *testing.T) {
|
||||
"node_name": "node-a",
|
||||
"image": "rap-node-agent:test",
|
||||
"artifact_endpoints": []string{"https://cache.example.test/artifacts"},
|
||||
"fabric_registry_records": []map[string]any{{
|
||||
"schema": "rap.fabric.registry.gossip_record.v1",
|
||||
"service_class": "control-api",
|
||||
"service_id": "control-a",
|
||||
}},
|
||||
"docker_image_artifact": map[string]any{
|
||||
"kind": "docker_image_tar",
|
||||
"image": "rap-node-agent:test",
|
||||
@@ -207,6 +214,7 @@ func TestFetchDockerInstallProfileBuildsRuntimeConfig(t *testing.T) {
|
||||
!cfg.MeshQUICFabricEnabled ||
|
||||
cfg.MeshQUICFabricListenAddr != ":19443" ||
|
||||
cfg.VPNFabricSessionStreamShards != 6 ||
|
||||
cfg.FabricRegistryRecordsJSON != `[{"schema":"rap.fabric.registry.gossip_record.v1","service_class":"control-api","service_id":"control-a"}]` ||
|
||||
cfg.MeshConnectivityMode != "outbound_only" {
|
||||
t.Fatalf("unexpected cfg: %+v", cfg)
|
||||
}
|
||||
|
||||
@@ -72,7 +72,6 @@ func LinuxInstallConfigFromProfile(profile LinuxInstallProfile) LinuxInstallConf
|
||||
WorkloadSupervisionEnabled: profile.WorkloadSupervisionEnabled,
|
||||
MeshSyntheticRuntimeEnabled: profile.MeshSyntheticRuntimeEnabled,
|
||||
MeshProductionForwardingEnabled: profile.MeshProductionForwardingEnabled,
|
||||
MeshFabricSessionEnabled: profile.MeshFabricSessionEnabled,
|
||||
VPNFabricSessionTransportEnabled: profile.VPNFabricSessionTransportEnabled,
|
||||
MeshQUICFabricEnabled: profile.MeshQUICFabricEnabled,
|
||||
MeshQUICFabricListenAddr: profile.MeshQUICFabricListenAddr,
|
||||
@@ -287,7 +286,6 @@ func installLinuxHostAgentUpdater(ctx context.Context, m LinuxManager, result Li
|
||||
args := []string{
|
||||
result.HostAgentPath,
|
||||
"update-loop",
|
||||
"--backend-url", cfg.RuntimeConfig.BackendURL,
|
||||
"--cluster-id", cfg.RuntimeConfig.ClusterID,
|
||||
"--state-dir", result.StateDir,
|
||||
"--current-version", cfg.AutoUpdateCurrentVersion,
|
||||
@@ -303,6 +301,10 @@ func installLinuxHostAgentUpdater(ctx context.Context, m LinuxManager, result Li
|
||||
"--host-agent-current-version", firstNonEmpty(cfg.AutoUpdateCurrentVersion, "0.0.0"),
|
||||
"--host-agent-binary-path", result.HostAgentPath,
|
||||
}
|
||||
if strings.TrimSpace(cfg.RuntimeConfig.BackendURL) != "" {
|
||||
args = append(args, "--backend-url", strings.TrimSpace(cfg.RuntimeConfig.BackendURL))
|
||||
}
|
||||
args = appendFabricUpdateArgs(args, cfg.RuntimeConfig)
|
||||
if strings.TrimSpace(cfg.NodeID) != "" {
|
||||
args = append(args, "--node-id", strings.TrimSpace(cfg.NodeID))
|
||||
}
|
||||
@@ -363,48 +365,48 @@ func (m LinuxManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Updat
|
||||
}
|
||||
status.Payload["systemd_unit"] = req.SystemdUnitName
|
||||
status.Payload["binary_path"] = req.BinaryPath
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, status)
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, status)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
if plan.ProductionForwarding && !req.AllowProductionMesh {
|
||||
err := errors.New("refusing update plan with production forwarding enabled")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact == nil {
|
||||
err := errors.New("update plan has no artifact")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact.InstallType != "" && plan.Artifact.InstallType != BinaryUpdateInstallType {
|
||||
err := fmt.Errorf("unsupported update artifact install type %q", plan.Artifact.InstallType)
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if req.DryRun {
|
||||
return result, nil
|
||||
}
|
||||
urls := artifactURLsForBackend(*plan.Artifact, req.BackendURL)
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{Product: req.Product, CurrentVersion: req.CurrentVersion, TargetVersion: plan.TargetVersion, Phase: "download", Status: "started", AttemptID: updateAttemptID(plan), ObservedAt: time.Now().UTC(), Payload: map[string]any{"artifact_url": plan.Artifact.URL, "artifact_urls": urls, "binary_path": req.BinaryPath}})
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{Product: req.Product, CurrentVersion: req.CurrentVersion, TargetVersion: plan.TargetVersion, Phase: "download", Status: "started", AttemptID: updateAttemptID(plan), ObservedAt: time.Now().UTC(), Payload: map[string]any{"artifact_url": plan.Artifact.URL, "artifact_urls": urls, "binary_path": req.BinaryPath}})
|
||||
path, err := downloadFirstArtifact(ctx, urls, plan.Artifact.SHA256, plan.Artifact.SizeBytes)
|
||||
if err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "download", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "download", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
defer os.Remove(path)
|
||||
runner := m.runner()
|
||||
_, _ = runner.Run(ctx, "systemctl", "stop", req.SystemdUnitName)
|
||||
if err := copyFile(path, req.BinaryPath, 0o755); err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "apply", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "apply", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
result.Replaced = true
|
||||
if _, err := runner.Run(ctx, "systemctl", "restart", req.SystemdUnitName); err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "restart", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "restart", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{Product: req.Product, CurrentVersion: req.CurrentVersion, TargetVersion: plan.TargetVersion, Phase: "health_check", Status: "succeeded", AttemptID: updateAttemptID(plan), ObservedAt: time.Now().UTC(), Payload: map[string]any{"systemd_unit": req.SystemdUnitName, "binary_path": req.BinaryPath}})
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{Product: req.Product, CurrentVersion: req.CurrentVersion, TargetVersion: plan.TargetVersion, Phase: "health_check", Status: "succeeded", AttemptID: updateAttemptID(plan), ObservedAt: time.Now().UTC(), Payload: map[string]any{"systemd_unit": req.SystemdUnitName, "binary_path": req.BinaryPath}})
|
||||
_ = saveUpdateState(req.StateDir, UpdateState{Product: req.Product, CurrentVersion: plan.TargetVersion, TargetVersion: plan.TargetVersion, Image: req.BinaryPath, UpdatedAt: time.Now().UTC()})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -31,31 +31,34 @@ const (
|
||||
)
|
||||
|
||||
type MonitorConfig struct {
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
Product string
|
||||
CurrentVersion string
|
||||
Interval time.Duration
|
||||
InitialDelay time.Duration
|
||||
MaxRuns int
|
||||
DockerBinary string
|
||||
WatchContainers []string
|
||||
RestartContainers bool
|
||||
RestartCooldown time.Duration
|
||||
StaleRestartingAfter time.Duration
|
||||
DiskPath string
|
||||
TmpDir string
|
||||
DiskWarnPercent int
|
||||
DiskCleanupPercent int
|
||||
DiskCriticalPercent int
|
||||
TmpMinAge time.Duration
|
||||
CleanupDocker bool
|
||||
StatusFile string
|
||||
Runner CommandRunner
|
||||
Logf func(format string, args ...any)
|
||||
restartHistory map[string]time.Time
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
ClusterAuthorityPublicKey string
|
||||
FabricRegistryRecordsJSON string
|
||||
MeshRegion string
|
||||
Product string
|
||||
CurrentVersion string
|
||||
Interval time.Duration
|
||||
InitialDelay time.Duration
|
||||
MaxRuns int
|
||||
DockerBinary string
|
||||
WatchContainers []string
|
||||
RestartContainers bool
|
||||
RestartCooldown time.Duration
|
||||
StaleRestartingAfter time.Duration
|
||||
DiskPath string
|
||||
TmpDir string
|
||||
DiskWarnPercent int
|
||||
DiskCleanupPercent int
|
||||
DiskCriticalPercent int
|
||||
TmpMinAge time.Duration
|
||||
CleanupDocker bool
|
||||
StatusFile string
|
||||
Runner CommandRunner
|
||||
Logf func(format string, args ...any)
|
||||
restartHistory map[string]time.Time
|
||||
}
|
||||
|
||||
type DiskUsage struct {
|
||||
@@ -421,7 +424,18 @@ func reportMonitorStatus(ctx context.Context, cfg MonitorConfig, result MonitorR
|
||||
if errText != "" {
|
||||
req.ErrorMessage = &errText
|
||||
}
|
||||
return ReportNodeUpdateStatus(ctx, cfg.BackendURL, clusterID, nodeID, req)
|
||||
return ReportNodeUpdateStatusForRequest(ctx, UpdateRequest{
|
||||
BackendURL: cfg.BackendURL,
|
||||
ClusterID: clusterID,
|
||||
NodeID: nodeID,
|
||||
StateDir: cfg.StateDir,
|
||||
ClusterAuthorityPublicKey: cfg.ClusterAuthorityPublicKey,
|
||||
FabricRegistryRecordsJSON: cfg.FabricRegistryRecordsJSON,
|
||||
MeshRegion: cfg.MeshRegion,
|
||||
Product: cfg.Product,
|
||||
CurrentVersion: cfg.CurrentVersion,
|
||||
InstallType: DefaultUpdateInstallType,
|
||||
}, req)
|
||||
}
|
||||
|
||||
func resolveMonitorIdentity(cfg MonitorConfig) (string, string, error) {
|
||||
|
||||
@@ -16,6 +16,7 @@ type DockerInstallProfile struct {
|
||||
BackendURL string `json:"backend_url"`
|
||||
ControlPlaneEndpoints []string `json:"control_plane_endpoints"`
|
||||
ArtifactEndpoints []string `json:"artifact_endpoints"`
|
||||
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records"`
|
||||
DockerImageArtifact *DockerArtifact `json:"docker_image_artifact"`
|
||||
JoinToken string `json:"join_token"`
|
||||
NodeName string `json:"node_name"`
|
||||
@@ -30,7 +31,6 @@ type DockerInstallProfile struct {
|
||||
WorkloadSupervisionEnabled bool `json:"workload_supervision_enabled"`
|
||||
MeshSyntheticRuntimeEnabled bool `json:"mesh_synthetic_runtime_enabled"`
|
||||
MeshProductionForwardingEnabled bool `json:"mesh_production_forwarding_enabled"`
|
||||
MeshFabricSessionEnabled bool `json:"mesh_fabric_session_enabled"`
|
||||
VPNFabricSessionTransportEnabled bool `json:"vpn_fabric_session_transport_enabled"`
|
||||
MeshQUICFabricEnabled bool `json:"mesh_quic_fabric_enabled"`
|
||||
MeshQUICFabricListenAddr string `json:"mesh_quic_fabric_listen_addr"`
|
||||
@@ -70,6 +70,7 @@ type WindowsInstallProfile struct {
|
||||
BackendURL string `json:"backend_url"`
|
||||
ControlPlaneEndpoints []string `json:"control_plane_endpoints"`
|
||||
ArtifactEndpoints []string `json:"artifact_endpoints"`
|
||||
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records"`
|
||||
NodeAgentArtifact *DockerArtifact `json:"node_agent_artifact"`
|
||||
JoinToken string `json:"join_token"`
|
||||
NodeName string `json:"node_name"`
|
||||
@@ -79,7 +80,6 @@ type WindowsInstallProfile struct {
|
||||
WorkloadSupervisionEnabled bool `json:"workload_supervision_enabled"`
|
||||
MeshSyntheticRuntimeEnabled bool `json:"mesh_synthetic_runtime_enabled"`
|
||||
MeshProductionForwardingEnabled bool `json:"mesh_production_forwarding_enabled"`
|
||||
MeshFabricSessionEnabled bool `json:"mesh_fabric_session_enabled"`
|
||||
VPNFabricSessionTransportEnabled bool `json:"vpn_fabric_session_transport_enabled"`
|
||||
MeshQUICFabricEnabled bool `json:"mesh_quic_fabric_enabled"`
|
||||
MeshQUICFabricListenAddr string `json:"mesh_quic_fabric_listen_addr"`
|
||||
@@ -109,6 +109,7 @@ type LinuxInstallProfile struct {
|
||||
BackendURL string `json:"backend_url"`
|
||||
ControlPlaneEndpoints []string `json:"control_plane_endpoints"`
|
||||
ArtifactEndpoints []string `json:"artifact_endpoints"`
|
||||
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records"`
|
||||
NodeAgentArtifact *DockerArtifact `json:"node_agent_artifact"`
|
||||
JoinToken string `json:"join_token"`
|
||||
NodeName string `json:"node_name"`
|
||||
@@ -118,7 +119,6 @@ type LinuxInstallProfile struct {
|
||||
WorkloadSupervisionEnabled bool `json:"workload_supervision_enabled"`
|
||||
MeshSyntheticRuntimeEnabled bool `json:"mesh_synthetic_runtime_enabled"`
|
||||
MeshProductionForwardingEnabled bool `json:"mesh_production_forwarding_enabled"`
|
||||
MeshFabricSessionEnabled bool `json:"mesh_fabric_session_enabled"`
|
||||
VPNFabricSessionTransportEnabled bool `json:"vpn_fabric_session_transport_enabled"`
|
||||
MeshQUICFabricEnabled bool `json:"mesh_quic_fabric_enabled"`
|
||||
MeshQUICFabricListenAddr string `json:"mesh_quic_fabric_listen_addr"`
|
||||
@@ -302,7 +302,6 @@ func RuntimeConfigFromProfile(profile DockerInstallProfile) RuntimeConfig {
|
||||
WorkloadSupervisionEnabled: profile.WorkloadSupervisionEnabled,
|
||||
MeshSyntheticRuntimeEnabled: profile.MeshSyntheticRuntimeEnabled,
|
||||
MeshProductionForwardingEnabled: profile.MeshProductionForwardingEnabled,
|
||||
MeshFabricSessionEnabled: profile.MeshFabricSessionEnabled,
|
||||
VPNFabricSessionTransportEnabled: profile.VPNFabricSessionTransportEnabled,
|
||||
MeshQUICFabricEnabled: profile.MeshQUICFabricEnabled,
|
||||
MeshQUICFabricListenAddr: profile.MeshQUICFabricListenAddr,
|
||||
@@ -315,6 +314,7 @@ func RuntimeConfigFromProfile(profile DockerInstallProfile) RuntimeConfig {
|
||||
MeshListenAutoPortEnd: profile.MeshListenAutoPortEnd,
|
||||
MeshAdvertiseEndpoint: profile.MeshAdvertiseEndpoint,
|
||||
MeshAdvertiseEndpointsJSON: string(profile.MeshAdvertiseEndpointsJSON),
|
||||
FabricRegistryRecordsJSON: string(profile.FabricRegistryRecords),
|
||||
MeshAdvertiseTransport: profile.MeshAdvertiseTransport,
|
||||
MeshConnectivityMode: profile.MeshConnectivityMode,
|
||||
MeshNATType: profile.MeshNATType,
|
||||
|
||||
@@ -10,19 +10,22 @@ import (
|
||||
)
|
||||
|
||||
type HostAgentUpdateRequest struct {
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
CurrentVersion string
|
||||
Channel string
|
||||
OS string
|
||||
Arch string
|
||||
InstallType string
|
||||
BinaryPath string
|
||||
DryRun bool
|
||||
RestartService string
|
||||
RestartAfterApply bool
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
ClusterAuthorityPublicKey string
|
||||
FabricRegistryRecordsJSON string
|
||||
MeshRegion string
|
||||
CurrentVersion string
|
||||
Channel string
|
||||
OS string
|
||||
Arch string
|
||||
InstallType string
|
||||
BinaryPath string
|
||||
DryRun bool
|
||||
RestartService string
|
||||
RestartAfterApply bool
|
||||
}
|
||||
|
||||
type HostAgentUpdateLoopConfig struct {
|
||||
@@ -37,18 +40,21 @@ type HostAgentUpdateLoopConfig struct {
|
||||
|
||||
func (req HostAgentUpdateRequest) updateRequest() UpdateRequest {
|
||||
return UpdateRequest{
|
||||
BackendURL: req.BackendURL,
|
||||
ClusterID: req.ClusterID,
|
||||
NodeID: req.NodeID,
|
||||
StateDir: req.StateDir,
|
||||
Product: HostAgentUpdateProduct,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
OS: firstNonEmpty(req.OS, "linux"),
|
||||
Arch: req.Arch,
|
||||
InstallType: firstNonEmpty(req.InstallType, BinaryUpdateInstallType),
|
||||
Channel: req.Channel,
|
||||
ContainerName: "host-agent-service",
|
||||
DryRun: req.DryRun,
|
||||
BackendURL: req.BackendURL,
|
||||
ClusterID: req.ClusterID,
|
||||
NodeID: req.NodeID,
|
||||
StateDir: req.StateDir,
|
||||
ClusterAuthorityPublicKey: req.ClusterAuthorityPublicKey,
|
||||
FabricRegistryRecordsJSON: req.FabricRegistryRecordsJSON,
|
||||
MeshRegion: req.MeshRegion,
|
||||
Product: HostAgentUpdateProduct,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
OS: firstNonEmpty(req.OS, "linux"),
|
||||
Arch: req.Arch,
|
||||
InstallType: firstNonEmpty(req.InstallType, BinaryUpdateInstallType),
|
||||
Channel: req.Channel,
|
||||
ContainerName: "host-agent-service",
|
||||
DryRun: req.DryRun,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,25 +85,25 @@ func (m DockerManager) ApplyHostAgentUpdate(ctx context.Context, req HostAgentUp
|
||||
status.Payload = map[string]any{}
|
||||
}
|
||||
status.Payload["binary_path"] = binaryPath
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, status)
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, status)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
if plan.Artifact == nil {
|
||||
err := errors.New("host-agent update plan has no artifact")
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, statusFromError(resolved, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, statusFromError(resolved, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if !isBinaryInstallType(plan.Artifact.InstallType) {
|
||||
err := fmt.Errorf("unsupported host-agent artifact install type %q", plan.Artifact.InstallType)
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, statusFromError(resolved, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, statusFromError(resolved, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if req.DryRun {
|
||||
return result, nil
|
||||
}
|
||||
urls := artifactURLsForBackend(*plan.Artifact, resolved.BackendURL)
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, NodeUpdateStatusRequest{
|
||||
Product: HostAgentUpdateProduct,
|
||||
CurrentVersion: resolved.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -109,7 +115,7 @@ func (m DockerManager) ApplyHostAgentUpdate(ctx context.Context, req HostAgentUp
|
||||
})
|
||||
path, err := downloadFirstArtifact(ctx, urls, plan.Artifact.SHA256, plan.Artifact.SizeBytes)
|
||||
if err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, statusFromError(resolved, plan, "download", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, statusFromError(resolved, plan, "download", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
defer os.Remove(path)
|
||||
@@ -125,7 +131,7 @@ func (m DockerManager) ApplyHostAgentUpdate(ctx context.Context, req HostAgentUp
|
||||
Image: binaryPath,
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
})
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, NodeUpdateStatusRequest{
|
||||
Product: HostAgentUpdateProduct,
|
||||
CurrentVersion: resolved.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -137,7 +143,7 @@ func (m DockerManager) ApplyHostAgentUpdate(ctx context.Context, req HostAgentUp
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, statusFromError(resolved, plan, "apply", "failed", fmt.Errorf("%w; stage failed: %v", err, stageErr)))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, statusFromError(resolved, plan, "apply", "failed", fmt.Errorf("%w; stage failed: %v", err, stageErr)))
|
||||
return result, err
|
||||
}
|
||||
result.Loaded = true
|
||||
@@ -151,7 +157,7 @@ func (m DockerManager) ApplyHostAgentUpdate(ctx context.Context, req HostAgentUp
|
||||
Image: binaryPath,
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
})
|
||||
_ = ReportNodeUpdateStatus(ctx, resolved.BackendURL, resolved.ClusterID, resolved.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, resolved, NodeUpdateStatusRequest{
|
||||
Product: HostAgentUpdateProduct,
|
||||
CurrentVersion: resolved.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
|
||||
@@ -173,8 +173,8 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
|
||||
func buildUpdateServiceUnit(cfg UpdateServiceConfig) (string, error) {
|
||||
runtimeCfg := cfg.RuntimeConfig.Normalize()
|
||||
var missing []string
|
||||
if runtimeCfg.BackendURL == "" {
|
||||
missing = append(missing, "backend-url")
|
||||
if runtimeCfg.BackendURL == "" && runtimeCfg.FabricRegistryRecordsJSON == "" {
|
||||
missing = append(missing, "backend-url-or-fabric-registry-records-json")
|
||||
}
|
||||
if runtimeCfg.ClusterID == "" {
|
||||
missing = append(missing, "cluster-id")
|
||||
@@ -191,7 +191,6 @@ func buildUpdateServiceUnit(cfg UpdateServiceConfig) (string, error) {
|
||||
args := []string{
|
||||
cfg.BinaryInstallPath,
|
||||
"update-loop",
|
||||
"--backend-url", runtimeCfg.BackendURL,
|
||||
"--cluster-id", runtimeCfg.ClusterID,
|
||||
"--state-dir", runtimeCfg.StateDir,
|
||||
"--container-name", runtimeCfg.ContainerName,
|
||||
@@ -202,9 +201,13 @@ func buildUpdateServiceUnit(cfg UpdateServiceConfig) (string, error) {
|
||||
"--jitter", fmt.Sprintf("%.3f", cfg.Jitter),
|
||||
"--health-timeout-seconds", fmt.Sprintf("%d", cfg.HealthTimeoutSec),
|
||||
}
|
||||
if runtimeCfg.BackendURL != "" {
|
||||
args = append(args, "--backend-url", runtimeCfg.BackendURL)
|
||||
}
|
||||
if strings.TrimSpace(cfg.Channel) != "" {
|
||||
args = append(args, "--channel", strings.TrimSpace(cfg.Channel))
|
||||
}
|
||||
args = appendFabricUpdateArgs(args, runtimeCfg)
|
||||
execStart := systemdJoin(args)
|
||||
return fmt.Sprintf(`[Unit]
|
||||
Description=RAP host-agent updater for %s
|
||||
@@ -225,8 +228,8 @@ WantedBy=multi-user.target
|
||||
|
||||
func buildHostAgentSelfUpdateUnit(cfg UpdateServiceConfig) (string, string, string, error) {
|
||||
runtimeCfg := cfg.RuntimeConfig.Normalize()
|
||||
if runtimeCfg.BackendURL == "" || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
|
||||
return "", "", "", fmt.Errorf("backend-url, cluster-id, and state-dir are required for host-agent self updater")
|
||||
if (runtimeCfg.BackendURL == "" && runtimeCfg.FabricRegistryRecordsJSON == "") || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
|
||||
return "", "", "", fmt.Errorf("backend-url-or-fabric-registry-records-json, cluster-id, and state-dir are required for host-agent self updater")
|
||||
}
|
||||
unitName := "rap-host-agent-self-updater.service"
|
||||
unitPath := filepath.Join(firstNonEmpty(cfg.UnitDir, DefaultSystemdUnitDir), unitName)
|
||||
@@ -234,7 +237,6 @@ func buildHostAgentSelfUpdateUnit(cfg UpdateServiceConfig) (string, string, stri
|
||||
args := []string{
|
||||
cfg.BinaryInstallPath,
|
||||
"update-host-agent-loop",
|
||||
"--backend-url", runtimeCfg.BackendURL,
|
||||
"--cluster-id", runtimeCfg.ClusterID,
|
||||
"--state-dir", runtimeCfg.StateDir,
|
||||
"--binary-path", firstNonEmpty(cfg.BinaryInstallPath, DefaultHostAgentInstallPath),
|
||||
@@ -243,9 +245,13 @@ func buildHostAgentSelfUpdateUnit(cfg UpdateServiceConfig) (string, string, stri
|
||||
"--initial-delay-seconds", fmt.Sprintf("%d", cfg.InitialDelaySeconds+30),
|
||||
"--jitter", fmt.Sprintf("%.3f", cfg.Jitter),
|
||||
}
|
||||
if runtimeCfg.BackendURL != "" {
|
||||
args = append(args, "--backend-url", runtimeCfg.BackendURL)
|
||||
}
|
||||
if strings.TrimSpace(cfg.Channel) != "" {
|
||||
args = append(args, "--channel", strings.TrimSpace(cfg.Channel))
|
||||
}
|
||||
args = appendFabricUpdateArgs(args, runtimeCfg)
|
||||
return fmt.Sprintf(`[Unit]
|
||||
Description=RAP host-agent self updater
|
||||
After=network-online.target docker.service
|
||||
@@ -265,8 +271,8 @@ WantedBy=multi-user.target
|
||||
|
||||
func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string, error) {
|
||||
runtimeCfg := cfg.RuntimeConfig.Normalize()
|
||||
if runtimeCfg.BackendURL == "" || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
|
||||
return "", "", "", fmt.Errorf("backend-url, cluster-id, and state-dir are required for host monitor")
|
||||
if (runtimeCfg.BackendURL == "" && runtimeCfg.FabricRegistryRecordsJSON == "") || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
|
||||
return "", "", "", fmt.Errorf("backend-url-or-fabric-registry-records-json, cluster-id, and state-dir are required for host monitor")
|
||||
}
|
||||
containers := uniqueTrimmed(append([]string{runtimeCfg.ContainerName}, cfg.MonitorContainers...))
|
||||
if len(containers) == 0 {
|
||||
@@ -277,7 +283,6 @@ func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string,
|
||||
args := []string{
|
||||
cfg.BinaryInstallPath,
|
||||
"monitor-loop",
|
||||
"--backend-url", runtimeCfg.BackendURL,
|
||||
"--cluster-id", runtimeCfg.ClusterID,
|
||||
"--state-dir", runtimeCfg.StateDir,
|
||||
"--current-version", firstNonEmpty(cfg.SelfUpdateVersion, cfg.CurrentVersion),
|
||||
@@ -286,6 +291,9 @@ func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string,
|
||||
"--disk-cleanup-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCleanup, DefaultMonitorDiskCleanupPercent)),
|
||||
"--disk-critical-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCritical, DefaultMonitorDiskCriticalPercent)),
|
||||
}
|
||||
if runtimeCfg.BackendURL != "" {
|
||||
args = append(args, "--backend-url", runtimeCfg.BackendURL)
|
||||
}
|
||||
if cfg.MonitorCleanupDocker {
|
||||
args = append(args, "--cleanup-docker")
|
||||
}
|
||||
@@ -295,6 +303,7 @@ func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string,
|
||||
for _, container := range containers {
|
||||
args = append(args, "--watch-container", container)
|
||||
}
|
||||
args = appendFabricUpdateArgs(args, runtimeCfg)
|
||||
return fmt.Sprintf(`[Unit]
|
||||
Description=RAP host-agent monitor for %s
|
||||
After=network-online.target docker.service
|
||||
@@ -312,6 +321,16 @@ WantedBy=multi-user.target
|
||||
`, runtimeCfg.ContainerName, systemdJoin(args)), unitName, unitPath, nil
|
||||
}
|
||||
|
||||
func appendFabricUpdateArgs(args []string, runtimeCfg RuntimeConfig) []string {
|
||||
if strings.TrimSpace(runtimeCfg.FabricRegistryRecordsJSON) != "" {
|
||||
args = append(args, "--fabric-registry-records-json", strings.TrimSpace(runtimeCfg.FabricRegistryRecordsJSON))
|
||||
}
|
||||
if strings.TrimSpace(runtimeCfg.MeshRegion) != "" {
|
||||
args = append(args, "--mesh-region", strings.TrimSpace(runtimeCfg.MeshRegion))
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
func firstNonZero(values ...int) int {
|
||||
for _, value := range values {
|
||||
if value != 0 {
|
||||
|
||||
@@ -119,7 +119,7 @@ func TestWindowsHostAgentUpdateScriptTargetsWindowsService(t *testing.T) {
|
||||
for _, want := range []string{
|
||||
":loop",
|
||||
"rap-host-agent.exe.next",
|
||||
"update-loop --backend-url",
|
||||
"update-loop --cluster-id",
|
||||
"--backend-url \"http://control/api/v1\"",
|
||||
"--cluster-id \"cluster-1\"",
|
||||
"--node-id \"node-1\"",
|
||||
@@ -139,6 +139,35 @@ func TestWindowsHostAgentUpdateScriptTargetsWindowsService(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindowsHostAgentUpdateScriptOmitsEmptyBackendURL(t *testing.T) {
|
||||
cfg := WindowsInstallConfig{
|
||||
RuntimeConfig: RuntimeConfig{
|
||||
ClusterID: "cluster-1",
|
||||
FabricRegistryRecordsJSON: `[{"record_id":"r1"}]`,
|
||||
MeshRegion: "ru-msk",
|
||||
},
|
||||
AutoUpdateCurrentVersion: "0.1.2",
|
||||
}
|
||||
result := WindowsInstallResult{
|
||||
NodeName: "win-a",
|
||||
StateDir: `C:\ProgramData\RAP\nodes\win-a`,
|
||||
NodeAgentPath: `C:\Program Files\RAP\win-a\rap-node-agent.exe`,
|
||||
TaskName: "RAP Node Agent win-a",
|
||||
}
|
||||
script := windowsHostAgentUpdateScript(`C:\Program Files\RAP\win-a\rap-host-agent.exe`, cfg, result)
|
||||
if strings.Contains(script, "--backend-url") {
|
||||
t.Fatalf("script must not include backend-url when it is empty:\n%s", script)
|
||||
}
|
||||
for _, want := range []string{
|
||||
`--fabric-registry-records-json [{"record_id":"r1"}]`,
|
||||
"--mesh-region ru-msk",
|
||||
} {
|
||||
if !strings.Contains(script, want) {
|
||||
t.Fatalf("script missing %q:\n%s", want, script)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindowsInstallReplaceAllowsExistingNodeWithoutJoinToken(t *testing.T) {
|
||||
result, err := (WindowsManager{}).Install(context.Background(), WindowsInstallConfig{
|
||||
RuntimeConfig: RuntimeConfig{
|
||||
|
||||
@@ -3,6 +3,8 @@ package hostagent
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -17,6 +19,8 @@ import (
|
||||
"time"
|
||||
|
||||
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
@@ -33,23 +37,26 @@ const (
|
||||
var ErrNodeIdentityNotReady = errors.New("node identity is not approved yet")
|
||||
|
||||
type UpdateRequest struct {
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
Product string
|
||||
CurrentVersion string
|
||||
OS string
|
||||
Arch string
|
||||
InstallType string
|
||||
Channel string
|
||||
ContainerName string
|
||||
BinaryPath string
|
||||
WindowsTaskName string
|
||||
SystemdUnitName string
|
||||
HealthTimeout time.Duration
|
||||
DryRun bool
|
||||
AllowProductionMesh bool
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
ClusterAuthorityPublicKey string
|
||||
FabricRegistryRecordsJSON string
|
||||
MeshRegion string
|
||||
Product string
|
||||
CurrentVersion string
|
||||
OS string
|
||||
Arch string
|
||||
InstallType string
|
||||
Channel string
|
||||
ContainerName string
|
||||
BinaryPath string
|
||||
WindowsTaskName string
|
||||
SystemdUnitName string
|
||||
HealthTimeout time.Duration
|
||||
DryRun bool
|
||||
AllowProductionMesh bool
|
||||
}
|
||||
|
||||
type UpdateResult struct {
|
||||
@@ -204,6 +211,9 @@ func (req UpdateRequest) Normalize() UpdateRequest {
|
||||
req.ClusterID = strings.TrimSpace(req.ClusterID)
|
||||
req.NodeID = strings.TrimSpace(req.NodeID)
|
||||
req.StateDir = strings.TrimSpace(req.StateDir)
|
||||
req.ClusterAuthorityPublicKey = strings.TrimSpace(req.ClusterAuthorityPublicKey)
|
||||
req.FabricRegistryRecordsJSON = strings.TrimSpace(req.FabricRegistryRecordsJSON)
|
||||
req.MeshRegion = strings.TrimSpace(req.MeshRegion)
|
||||
req.Product = firstNonEmpty(req.Product, DefaultUpdateProduct)
|
||||
req.OS = firstNonEmpty(req.OS, runtime.GOOS)
|
||||
req.Arch = firstNonEmpty(req.Arch, runtime.GOARCH)
|
||||
@@ -222,8 +232,8 @@ func (req UpdateRequest) Normalize() UpdateRequest {
|
||||
func (req UpdateRequest) Validate() error {
|
||||
req = req.Normalize()
|
||||
var missing []string
|
||||
if req.BackendURL == "" {
|
||||
missing = append(missing, "backend-url")
|
||||
if req.BackendURL == "" && req.FabricRegistryRecordsJSON == "" {
|
||||
missing = append(missing, "backend-url-or-fabric-registry-records-json")
|
||||
}
|
||||
if req.ClusterID == "" {
|
||||
missing = append(missing, "cluster-id")
|
||||
@@ -285,30 +295,30 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
}
|
||||
if plan.Action != "update" {
|
||||
if !req.DryRun {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromNoopPlan(req, plan))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromNoopPlan(req, plan))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
if plan.ProductionForwarding && !req.AllowProductionMesh {
|
||||
err := errors.New("refusing update plan with production forwarding enabled")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact == nil {
|
||||
err := errors.New("update plan has no artifact")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact.InstallType != "" && plan.Artifact.InstallType != DefaultUpdateInstallType {
|
||||
err := fmt.Errorf("unsupported update artifact install type %q", plan.Artifact.InstallType)
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if req.DryRun {
|
||||
result.NewImage = artifactImage(*plan.Artifact, "")
|
||||
return result, nil
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -321,7 +331,7 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
|
||||
current, cfg, err := m.runtimeConfigFromContainer(ctx, runner, docker, req.ContainerName)
|
||||
if err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "inspect", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "inspect", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
result.PreviousImageID = current.Image
|
||||
@@ -339,7 +349,7 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
cfg.JoinToken = ""
|
||||
result.NewImage = cfg.Image
|
||||
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -351,7 +361,7 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
})
|
||||
installed, err := m.Install(ctx, cfg)
|
||||
if err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "apply", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "apply", "failed", err))
|
||||
rollbackErr := m.rollbackContainer(ctx, runner, docker, cfg, current, plan.RollbackAllowed)
|
||||
if rollbackErr == nil && plan.RollbackAllowed {
|
||||
result.RolledBack = true
|
||||
@@ -363,14 +373,14 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
result.ContainerID = installed.ContainerID
|
||||
|
||||
if err := m.waitContainerRunning(ctx, runner, docker, req.ContainerName, req.HealthTimeout); err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "health_check", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "health_check", "failed", err))
|
||||
rollbackErr := m.rollbackContainer(ctx, runner, docker, cfg, current, plan.RollbackAllowed)
|
||||
if rollbackErr == nil && plan.RollbackAllowed {
|
||||
result.RolledBack = true
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -515,7 +525,27 @@ func FetchNodeUpdatePlan(ctx context.Context, req UpdateRequest) (NodeUpdatePlan
|
||||
if req.Channel != "" {
|
||||
values.Set("channel", req.Channel)
|
||||
}
|
||||
endpoint := fmt.Sprintf("%s/clusters/%s/nodes/%s/updates/plan?%s", req.BackendURL, url.PathEscape(req.ClusterID), url.PathEscape(req.NodeID), values.Encode())
|
||||
path := fmt.Sprintf("/clusters/%s/nodes/%s/updates/plan?%s", url.PathEscape(req.ClusterID), url.PathEscape(req.NodeID), values.Encode())
|
||||
if raw, viaFabric, err := updateControlRawViaFabric(ctx, req, client.RawControlRequest{Method: http.MethodGet, Path: path}); viaFabric {
|
||||
if err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
}
|
||||
if raw.StatusCode < 200 || raw.StatusCode >= 300 {
|
||||
return NodeUpdatePlan{}, fmt.Errorf("fetch update plan via fabric: status %d", raw.StatusCode)
|
||||
}
|
||||
var out NodeUpdatePlanResponse
|
||||
if err := json.Unmarshal(raw.Body, &out); err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
}
|
||||
if err := verifyNodeUpdatePlanAuthority(req, out.Plan); err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
}
|
||||
return out.Plan, nil
|
||||
}
|
||||
endpoint := req.BackendURL + path
|
||||
if req.BackendURL == "" {
|
||||
return NodeUpdatePlan{}, errors.New("update plan control API is unavailable: no active fabric route and backend-url is empty")
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
@@ -538,6 +568,110 @@ func FetchNodeUpdatePlan(ctx context.Context, req UpdateRequest) (NodeUpdatePlan
|
||||
return out.Plan, nil
|
||||
}
|
||||
|
||||
func updateControlRawViaFabric(ctx context.Context, req UpdateRequest, rawReq client.RawControlRequest) (client.RawControlResponse, bool, error) {
|
||||
if strings.TrimSpace(req.FabricRegistryRecordsJSON) == "" {
|
||||
return client.RawControlResponse{}, false, nil
|
||||
}
|
||||
publicKey, err := decodeUpdateFabricRegistryPublicKey(req)
|
||||
if err != nil {
|
||||
return client.RawControlResponse{}, false, err
|
||||
}
|
||||
registry, _, err := mesh.LoadFabricRegistryBootstrapRecords(req.FabricRegistryRecordsJSON, mesh.FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: req.ClusterID,
|
||||
TrustedIssuers: []mesh.FabricRegistryTrustedIssuer{{
|
||||
IssuerID: "cluster-authority",
|
||||
Role: mesh.FabricRegistryAuthorityControl,
|
||||
PublicKey: publicKey,
|
||||
Scopes: []string{mesh.FabricRegistryScopeFarm, mesh.FabricRegistryScopeCluster, mesh.FabricRegistryScopeOrganization},
|
||||
Services: []string{mesh.FabricRegistryServiceControlAPI},
|
||||
}},
|
||||
RequiredSignatures: 1,
|
||||
MaxClockSkew: 2 * time.Minute,
|
||||
Now: time.Now().UTC(),
|
||||
}, false)
|
||||
if err != nil {
|
||||
return client.RawControlResponse{}, false, err
|
||||
}
|
||||
transport := mesh.NewQUICFabricTransport(nil)
|
||||
if req.NodeID != "" {
|
||||
transport.SetLocalPeerID(req.NodeID)
|
||||
}
|
||||
registry.VerifyCandidates(ctx, transport, mesh.FabricRegistryLiveProbeRequest{
|
||||
ClusterID: req.ClusterID,
|
||||
PreferredRegion: req.MeshRegion,
|
||||
Timeout: 2 * time.Second,
|
||||
MaxCandidates: 8,
|
||||
Now: time.Now().UTC(),
|
||||
})
|
||||
resolved := registry.ResolveService(mesh.FabricRegistryResolveRequest{
|
||||
ClusterID: req.ClusterID,
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
PreferredRegion: req.MeshRegion,
|
||||
Now: time.Now().UTC(),
|
||||
})
|
||||
if !resolved.Found || len(resolved.Endpoints) == 0 {
|
||||
return client.RawControlResponse{}, false, nil
|
||||
}
|
||||
payload, err := json.Marshal(rawReq)
|
||||
if err != nil {
|
||||
return client.RawControlResponse{}, false, err
|
||||
}
|
||||
var lastErr error
|
||||
for _, endpoint := range resolved.Endpoints {
|
||||
result, err := mesh.SendFabricControlForward(ctx, transport, endpoint, payload, 5*time.Second)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
var envelope struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
if err := json.Unmarshal(result.Payload, &envelope); err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(envelope.Error) != "" {
|
||||
lastErr = errors.New(envelope.Error)
|
||||
continue
|
||||
}
|
||||
var raw client.RawControlResponse
|
||||
if err := json.Unmarshal(envelope.Payload, &raw); err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
return raw, true, nil
|
||||
}
|
||||
if lastErr == nil {
|
||||
lastErr = errors.New("fabric control registry endpoints unavailable")
|
||||
}
|
||||
return client.RawControlResponse{}, false, lastErr
|
||||
}
|
||||
|
||||
func decodeUpdateFabricRegistryPublicKey(req UpdateRequest) (ed25519.PublicKey, error) {
|
||||
value := strings.TrimSpace(req.ClusterAuthorityPublicKey)
|
||||
if value == "" && strings.TrimSpace(req.StateDir) != "" {
|
||||
if identity, err := state.Load(filepath.Join(req.StateDir, state.FileName)); err == nil {
|
||||
value = strings.TrimSpace(identity.ClusterAuthorityPublicKey)
|
||||
}
|
||||
}
|
||||
if value == "" {
|
||||
return nil, errors.New("cluster authority public key is required for fabric registry records")
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(value)
|
||||
if err != nil {
|
||||
decoded, err = base64.RawStdEncoding.DecodeString(value)
|
||||
}
|
||||
if err != nil {
|
||||
decoded, err = base64.RawURLEncoding.DecodeString(value)
|
||||
}
|
||||
if err != nil || len(decoded) != ed25519.PublicKeySize {
|
||||
return nil, errors.New("cluster authority public key must be base64 Ed25519 public key")
|
||||
}
|
||||
return ed25519.PublicKey(decoded), nil
|
||||
}
|
||||
|
||||
func verifyNodeUpdatePlanAuthority(req UpdateRequest, plan NodeUpdatePlan) error {
|
||||
identity, ok := pinnedUpdatePlanAuthority(req)
|
||||
if !ok {
|
||||
@@ -642,6 +776,9 @@ func resolveUpdateRequest(req UpdateRequest) (UpdateRequest, error) {
|
||||
|
||||
func ReportNodeUpdateStatus(ctx context.Context, backendURL, clusterID, nodeID string, request NodeUpdateStatusRequest) error {
|
||||
backendURL = strings.TrimRight(strings.TrimSpace(backendURL), "/")
|
||||
if backendURL == "" {
|
||||
return errors.New("update status control API is unavailable: backend-url is empty")
|
||||
}
|
||||
endpoint := fmt.Sprintf("%s/clusters/%s/nodes/%s/updates/status", backendURL, url.PathEscape(clusterID), url.PathEscape(nodeID))
|
||||
body, err := json.Marshal(request)
|
||||
if err != nil {
|
||||
@@ -663,6 +800,33 @@ func ReportNodeUpdateStatus(ctx context.Context, backendURL, clusterID, nodeID s
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReportNodeUpdateStatusForRequest(ctx context.Context, req UpdateRequest, request NodeUpdateStatusRequest) error {
|
||||
var err error
|
||||
req, err = resolveUpdateRequest(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
body, err := json.Marshal(request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
raw, viaFabric, err := updateControlRawViaFabric(ctx, req, client.RawControlRequest{
|
||||
Method: http.MethodPost,
|
||||
Path: fmt.Sprintf("/clusters/%s/nodes/%s/updates/status", url.PathEscape(req.ClusterID), url.PathEscape(req.NodeID)),
|
||||
Body: body,
|
||||
})
|
||||
if viaFabric {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if raw.StatusCode < 200 || raw.StatusCode >= 300 {
|
||||
return fmt.Errorf("report update status via fabric: status %d", raw.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, request)
|
||||
}
|
||||
|
||||
func (m DockerManager) runtimeConfigFromContainer(ctx context.Context, runner CommandRunner, docker, containerName string) (dockerInspectContainer, RuntimeConfig, error) {
|
||||
out, err := runner.Run(ctx, docker, "inspect", containerName)
|
||||
if err != nil {
|
||||
@@ -686,9 +850,8 @@ func (m DockerManager) runtimeConfigFromContainer(ctx context.Context, runner Co
|
||||
Network: firstNonEmpty(inspected[0].HostConfig.NetworkMode, DefaultNetwork),
|
||||
RestartPolicy: firstNonEmpty(inspected[0].HostConfig.RestartPolicy.Name, "unless-stopped"),
|
||||
WorkloadSupervisionEnabled: parseBool(env["RAP_WORKLOAD_SUPERVISION_ENABLED"]),
|
||||
MeshSyntheticRuntimeEnabled: true,
|
||||
MeshSyntheticRuntimeEnabled: parseBool(env["RAP_MESH_SYNTHETIC_RUNTIME_ENABLED"]),
|
||||
MeshProductionForwardingEnabled: parseBool(env["RAP_MESH_PRODUCTION_FORWARDING_ENABLED"]),
|
||||
MeshFabricSessionEnabled: parseBool(env["RAP_MESH_FABRIC_SESSION_ENABLED"]),
|
||||
VPNFabricSessionTransportEnabled: parseBool(env["RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED"]),
|
||||
MeshQUICFabricEnabled: parseBool(env["RAP_MESH_QUIC_FABRIC_ENABLED"]),
|
||||
MeshQUICFabricListenAddr: env["RAP_MESH_QUIC_FABRIC_LISTEN_ADDR"],
|
||||
|
||||
@@ -4,9 +4,17 @@ import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
cryptorand "crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/sha256"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@@ -16,6 +24,8 @@ import (
|
||||
"time"
|
||||
|
||||
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
@@ -120,6 +130,81 @@ func signHostAgentPayload(t *testing.T, payload json.RawMessage, privateKey ed25
|
||||
}
|
||||
}
|
||||
|
||||
func testHostAgentQUICTLSConfig(t *testing.T) *tls.Config {
|
||||
t.Helper()
|
||||
key, err := rsa.GenerateKey(cryptorand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("generate rsa key: %v", err)
|
||||
}
|
||||
template := x509.Certificate{
|
||||
SerialNumber: big.NewInt(1),
|
||||
Subject: pkix.Name{CommonName: "127.0.0.1"},
|
||||
NotBefore: time.Now().Add(-time.Hour),
|
||||
NotAfter: time.Now().Add(time.Hour),
|
||||
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
|
||||
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
|
||||
IPAddresses: []net.IP{net.ParseIP("127.0.0.1")},
|
||||
}
|
||||
der, err := x509.CreateCertificate(cryptorand.Reader, &template, &template, &key.PublicKey, key)
|
||||
if err != nil {
|
||||
t.Fatalf("create cert: %v", err)
|
||||
}
|
||||
return &tls.Config{
|
||||
Certificates: []tls.Certificate{{Certificate: [][]byte{der}, PrivateKey: key}},
|
||||
NextProtos: []string{"rap-fabric-data-session-v1"},
|
||||
}
|
||||
}
|
||||
|
||||
func testHostAgentQUICCertSHA256(t *testing.T, cfg *tls.Config) string {
|
||||
t.Helper()
|
||||
if len(cfg.Certificates) == 0 || len(cfg.Certificates[0].Certificate) == 0 {
|
||||
t.Fatal("missing test certificate")
|
||||
}
|
||||
sum := sha256.Sum256(cfg.Certificates[0].Certificate[0])
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func signedUpdateControlRegistry(t *testing.T, clusterID, endpoint, certSHA256 string, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey) string {
|
||||
t.Helper()
|
||||
now := time.Now().UTC()
|
||||
issuer := mesh.FabricRegistryTrustedIssuer{IssuerID: "cluster-authority", Role: mesh.FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := mesh.FabricRegistryGossipRecord{
|
||||
SchemaVersion: mesh.FabricRegistryGossipRecordSchema,
|
||||
ClusterID: clusterID,
|
||||
Service: mesh.FabricRegistryServiceControlAPI,
|
||||
Scope: mesh.FabricRegistryScopeCluster,
|
||||
Epoch: 1,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Hour),
|
||||
IssuerNodeID: "cluster-authority",
|
||||
IssuerRole: mesh.FabricRegistryAuthorityControl,
|
||||
Endpoints: []mesh.FabricRegistryEndpoint{{
|
||||
EndpointID: "control-a",
|
||||
Address: endpoint,
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: certSHA256,
|
||||
}},
|
||||
}
|
||||
signed, err := mesh.SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign registry record: %v", err)
|
||||
}
|
||||
raw, err := json.Marshal([]mesh.FabricRegistryGossipRecord{signed})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal registry record: %v", err)
|
||||
}
|
||||
return string(raw)
|
||||
}
|
||||
|
||||
func mustJSONRaw(t *testing.T, value any) json.RawMessage {
|
||||
t.Helper()
|
||||
raw, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal json: %v", err)
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.T) {
|
||||
urls := artifactURLsForBackend(ReleaseArtifact{
|
||||
URL: "/downloads/rap-node-agent-0.2.92.tar",
|
||||
@@ -223,6 +308,111 @@ func TestFetchNodeUpdatePlanAcceptsSignedPlanWithPinnedAuthority(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchNodeUpdatePlanUsesFabricRegistryQUICControlAPI(t *testing.T) {
|
||||
stateDir, publicKey, privateKey := writePinnedAuthorityIdentity(t)
|
||||
plan := map[string]any{
|
||||
"schema_version": "rap.node_update_plan.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"reason": "already_current",
|
||||
"production_forwarding": false,
|
||||
}
|
||||
payload := map[string]any{
|
||||
"schema_version": "rap.node_update_plan_authority.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"target_version": "",
|
||||
"artifact_sha256": "",
|
||||
"control_plane_only": true,
|
||||
"production_forwarding": false,
|
||||
}
|
||||
rawPayload, signature := signedAuthorityPayload(t, publicKey, privateKey, payload)
|
||||
plan["authority_payload"] = json.RawMessage(rawPayload)
|
||||
plan["authority_signature"] = signature
|
||||
tlsConfig := testHostAgentQUICTLSConfig(t)
|
||||
var received client.RawControlRequest
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if err := json.Unmarshal(payload, &received); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if received.Method != http.MethodGet || !strings.HasPrefix(received.Path, "/clusters/cluster-1/nodes/node-1/updates/plan?") {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", received)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 200, Body: mustJSONRaw(t, map[string]any{"node_update_plan": plan})})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||
BackendURL: "http://127.0.0.1:1",
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-1",
|
||||
StateDir: stateDir,
|
||||
FabricRegistryRecordsJSON: signedUpdateControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testHostAgentQUICCertSHA256(t, tlsConfig), publicKey, privateKey),
|
||||
CurrentVersion: "0.1.0",
|
||||
OS: "linux",
|
||||
Arch: "amd64",
|
||||
InstallType: "docker",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("fetch plan via fabric: %v", err)
|
||||
}
|
||||
if got.Action != "none" || got.Reason != "already_current" {
|
||||
t.Fatalf("plan = %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReportNodeUpdateStatusUsesFabricRegistryQUICControlAPI(t *testing.T) {
|
||||
stateDir, publicKey, privateKey := writePinnedAuthorityIdentity(t)
|
||||
tlsConfig := testHostAgentQUICTLSConfig(t)
|
||||
var received client.RawControlRequest
|
||||
server, err := mesh.StartQUICFabricServer(context.Background(), mesh.QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if err := json.Unmarshal(payload, &received); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if received.Method != http.MethodPost || received.Path != "/clusters/cluster-1/nodes/node-1/updates/status" {
|
||||
return nil, fmt.Errorf("unexpected request: %+v", received)
|
||||
}
|
||||
return json.Marshal(client.RawControlResponse{StatusCode: 204, Body: json.RawMessage(`{}`)})
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
err = ReportNodeUpdateStatusForRequest(context.Background(), UpdateRequest{
|
||||
BackendURL: "http://127.0.0.1:1",
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-1",
|
||||
StateDir: stateDir,
|
||||
FabricRegistryRecordsJSON: signedUpdateControlRegistry(t, "cluster-1", "quic://"+server.Addr().String(), testHostAgentQUICCertSHA256(t, tlsConfig), publicKey, privateKey),
|
||||
CurrentVersion: "0.1.0",
|
||||
OS: "linux",
|
||||
Arch: "amd64",
|
||||
InstallType: "docker",
|
||||
}, NodeUpdateStatusRequest{Product: "rap-node-agent", Phase: "download", Status: "started"})
|
||||
if err != nil {
|
||||
t.Fatalf("report status via fabric: %v", err)
|
||||
}
|
||||
if len(received.Body) == 0 || !strings.Contains(string(received.Body), `"phase":"download"`) {
|
||||
t.Fatalf("unexpected status body: %s", string(received.Body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchNodeUpdatePlanAcceptsQuorumSignedPlan(t *testing.T) {
|
||||
stateDir, descriptor, privateKeys := writePinnedQuorumIdentity(t)
|
||||
plan := map[string]any{
|
||||
|
||||
@@ -66,7 +66,6 @@ func WindowsInstallConfigFromProfile(profile WindowsInstallProfile) WindowsInsta
|
||||
WorkloadSupervisionEnabled: profile.WorkloadSupervisionEnabled,
|
||||
MeshSyntheticRuntimeEnabled: profile.MeshSyntheticRuntimeEnabled,
|
||||
MeshProductionForwardingEnabled: profile.MeshProductionForwardingEnabled,
|
||||
MeshFabricSessionEnabled: profile.MeshFabricSessionEnabled,
|
||||
VPNFabricSessionTransportEnabled: profile.VPNFabricSessionTransportEnabled,
|
||||
MeshQUICFabricEnabled: profile.MeshQUICFabricEnabled,
|
||||
MeshQUICFabricListenAddr: profile.MeshQUICFabricListenAddr,
|
||||
|
||||
@@ -48,29 +48,29 @@ func (m WindowsManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upd
|
||||
}
|
||||
status.Payload["task"] = req.WindowsTaskName
|
||||
status.Payload["binary_path"] = req.BinaryPath
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, status)
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, status)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
if plan.ProductionForwarding && !req.AllowProductionMesh {
|
||||
err := errors.New("refusing update plan with production forwarding enabled")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact == nil {
|
||||
err := errors.New("update plan has no artifact")
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if plan.Artifact.InstallType != "" && plan.Artifact.InstallType != WindowsUpdateInstallType {
|
||||
err := fmt.Errorf("unsupported update artifact install type %q", plan.Artifact.InstallType)
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "preflight", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "preflight", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
if req.DryRun {
|
||||
return result, nil
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -81,7 +81,7 @@ func (m WindowsManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upd
|
||||
Payload: map[string]any{"strategy": plan.Strategy, "reason": plan.Reason, "task": req.WindowsTaskName},
|
||||
})
|
||||
urls := artifactURLsForBackend(*plan.Artifact, req.BackendURL)
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -93,7 +93,7 @@ func (m WindowsManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upd
|
||||
})
|
||||
path, err := downloadFirstArtifact(ctx, urls, plan.Artifact.SHA256, plan.Artifact.SizeBytes)
|
||||
if err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "download", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "download", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
defer os.Remove(path)
|
||||
@@ -101,16 +101,16 @@ func (m WindowsManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upd
|
||||
if err := copyFile(path, req.BinaryPath, 0o755); err != nil {
|
||||
m.stopExistingNodeAgent(ctx, req.WindowsTaskName, req.BinaryPath)
|
||||
if retryErr := copyFile(path, req.BinaryPath, 0o755); retryErr != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "apply", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "apply", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
result.Replaced = true
|
||||
if _, err := runner.Run(ctx, "schtasks", "/Run", "/TN", req.WindowsTaskName); err != nil {
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, statusFromError(req, plan, "restart", "failed", err))
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, statusFromError(req, plan, "restart", "failed", err))
|
||||
return result, err
|
||||
}
|
||||
_ = ReportNodeUpdateStatus(ctx, req.BackendURL, req.ClusterID, req.NodeID, NodeUpdateStatusRequest{
|
||||
_ = ReportNodeUpdateStatusForRequest(ctx, req, NodeUpdateStatusRequest{
|
||||
Product: req.Product,
|
||||
CurrentVersion: req.CurrentVersion,
|
||||
TargetVersion: plan.TargetVersion,
|
||||
@@ -290,7 +290,6 @@ func windowsHostAgentUpdateScript(hostAgentPath string, cfg WindowsInstallConfig
|
||||
updateLoopArgs := []string{
|
||||
`"` + hostAgentPath + `"`,
|
||||
"update-loop",
|
||||
"--backend-url", `"` + cfg.RuntimeConfig.BackendURL + `"`,
|
||||
"--cluster-id", `"` + cfg.RuntimeConfig.ClusterID + `"`,
|
||||
"--state-dir", `"` + result.StateDir + `"`,
|
||||
"--current-version", currentVersion,
|
||||
@@ -306,6 +305,10 @@ func windowsHostAgentUpdateScript(hostAgentPath string, cfg WindowsInstallConfig
|
||||
"--host-agent-current-version", currentVersion,
|
||||
"--host-agent-binary-path", `"` + hostAgentPath + `"`,
|
||||
}
|
||||
if strings.TrimSpace(cfg.RuntimeConfig.BackendURL) != "" {
|
||||
updateLoopArgs = append(updateLoopArgs, "--backend-url", `"`+strings.TrimSpace(cfg.RuntimeConfig.BackendURL)+`"`)
|
||||
}
|
||||
updateLoopArgs = appendFabricUpdateArgs(updateLoopArgs, cfg.RuntimeConfig)
|
||||
if strings.TrimSpace(cfg.NodeID) != "" {
|
||||
updateLoopArgs = append(updateLoopArgs, "--node-id", `"`+strings.TrimSpace(cfg.NodeID)+`"`)
|
||||
}
|
||||
|
||||
@@ -6,13 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
@@ -20,38 +14,6 @@ type Client struct {
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
type FabricSessionDialOptions struct {
|
||||
Token string
|
||||
Header http.Header
|
||||
Dialer *websocket.Dialer
|
||||
Timeout time.Duration
|
||||
MaxPayload int
|
||||
}
|
||||
|
||||
type FabricSessionClient struct {
|
||||
conn *websocket.Conn
|
||||
timeout time.Duration
|
||||
maxPayload int
|
||||
readMu sync.Mutex
|
||||
writeMu sync.Mutex
|
||||
}
|
||||
|
||||
type FabricSessionPumpOptions struct {
|
||||
OutboundBuffer int
|
||||
InboundBuffer int
|
||||
ErrorBuffer int
|
||||
}
|
||||
|
||||
type FabricSessionPump struct {
|
||||
session *FabricSessionClient
|
||||
outbound chan fabricproto.Frame
|
||||
inbound chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
cancel context.CancelFunc
|
||||
closeMu sync.Once
|
||||
}
|
||||
|
||||
func NewClient(baseURL string) Client {
|
||||
return Client{
|
||||
BaseURL: baseURL,
|
||||
@@ -147,270 +109,3 @@ func (c Client) SendProduction(ctx context.Context, envelope ProductionEnvelope)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c Client) DialFabricSession(ctx context.Context, opts FabricSessionDialOptions) (*websocket.Conn, *http.Response, error) {
|
||||
target, err := c.fabricSessionWebSocketURL()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
header := cloneHeader(opts.Header)
|
||||
if strings.TrimSpace(opts.Token) != "" {
|
||||
header.Set("X-RAP-Fabric-Session-Token", strings.TrimSpace(opts.Token))
|
||||
}
|
||||
dialer := opts.Dialer
|
||||
if dialer == nil {
|
||||
base := *websocket.DefaultDialer
|
||||
if opts.Timeout > 0 {
|
||||
base.HandshakeTimeout = opts.Timeout
|
||||
}
|
||||
dialer = &base
|
||||
}
|
||||
return dialer.DialContext(ctx, target, header)
|
||||
}
|
||||
|
||||
func (c Client) OpenFabricSession(ctx context.Context, opts FabricSessionDialOptions) (*FabricSessionClient, *http.Response, error) {
|
||||
conn, resp, err := c.DialFabricSession(ctx, opts)
|
||||
if err != nil {
|
||||
if resp != nil {
|
||||
return nil, resp, fmt.Errorf("fabric session websocket rejected with status %d: %w", resp.StatusCode, err)
|
||||
}
|
||||
return nil, resp, err
|
||||
}
|
||||
maxPayload := opts.MaxPayload
|
||||
if maxPayload <= 0 {
|
||||
maxPayload = fabricproto.DefaultMaxPayload
|
||||
}
|
||||
return &FabricSessionClient{
|
||||
conn: conn,
|
||||
timeout: opts.Timeout,
|
||||
maxPayload: maxPayload,
|
||||
}, resp, nil
|
||||
}
|
||||
|
||||
func (c Client) SendFabricSessionFrame(ctx context.Context, opts FabricSessionDialOptions, frame fabricproto.Frame) (fabricproto.Frame, error) {
|
||||
session, _, err := c.OpenFabricSession(ctx, opts)
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
defer session.Close()
|
||||
return session.RoundTrip(ctx, frame)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) Close() error {
|
||||
if c == nil || c.conn == nil {
|
||||
return nil
|
||||
}
|
||||
return c.conn.Close()
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) WriteFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if c == nil || c.conn == nil {
|
||||
return fmt.Errorf("fabric session client is closed")
|
||||
}
|
||||
payload, err := fabricproto.MarshalFrame(frame)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.writeMu.Lock()
|
||||
defer c.writeMu.Unlock()
|
||||
c.applyWriteDeadline(ctx)
|
||||
return c.conn.WriteMessage(websocket.BinaryMessage, payload)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) ReadFrame(ctx context.Context) (fabricproto.Frame, error) {
|
||||
if c == nil || c.conn == nil {
|
||||
return fabricproto.Frame{}, fmt.Errorf("fabric session client is closed")
|
||||
}
|
||||
c.readMu.Lock()
|
||||
defer c.readMu.Unlock()
|
||||
c.applyReadDeadline(ctx)
|
||||
messageType, responsePayload, err := c.conn.ReadMessage()
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
if messageType != websocket.BinaryMessage {
|
||||
return fabricproto.Frame{}, fmt.Errorf("fabric session websocket returned non-binary message type %d", messageType)
|
||||
}
|
||||
return fabricproto.UnmarshalFrame(responsePayload, c.maxPayload)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) RoundTrip(ctx context.Context, frame fabricproto.Frame) (fabricproto.Frame, error) {
|
||||
if err := c.WriteFrame(ctx, frame); err != nil {
|
||||
return fabricproto.Frame{}, err
|
||||
}
|
||||
return c.ReadFrame(ctx)
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) StartPump(ctx context.Context, opts FabricSessionPumpOptions) *FabricSessionPump {
|
||||
if opts.OutboundBuffer <= 0 {
|
||||
opts.OutboundBuffer = 64
|
||||
}
|
||||
if opts.InboundBuffer <= 0 {
|
||||
opts.InboundBuffer = 64
|
||||
}
|
||||
if opts.ErrorBuffer <= 0 {
|
||||
opts.ErrorBuffer = 8
|
||||
}
|
||||
pumpCtx, cancel := context.WithCancel(ctx)
|
||||
pump := &FabricSessionPump{
|
||||
session: c,
|
||||
outbound: make(chan fabricproto.Frame, opts.OutboundBuffer),
|
||||
inbound: make(chan fabricproto.Frame, opts.InboundBuffer),
|
||||
errors: make(chan error, opts.ErrorBuffer),
|
||||
done: make(chan struct{}),
|
||||
cancel: cancel,
|
||||
}
|
||||
go pump.writeLoop(pumpCtx)
|
||||
go pump.readLoop(pumpCtx)
|
||||
return pump
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Send(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if p == nil {
|
||||
return fmt.Errorf("fabric session pump is nil")
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-p.done:
|
||||
return fmt.Errorf("fabric session pump is closed")
|
||||
case p.outbound <- frame:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Frames() <-chan fabricproto.Frame {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return p.inbound
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Errors() <-chan error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return p.errors
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Closed() bool {
|
||||
if p == nil {
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-p.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) Close() error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
var err error
|
||||
p.closeMu.Do(func() {
|
||||
close(p.done)
|
||||
p.cancel()
|
||||
err = p.session.Close()
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) writeLoop(ctx context.Context) {
|
||||
defer p.Close()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
p.reportError(ctx.Err())
|
||||
return
|
||||
case <-p.done:
|
||||
return
|
||||
case frame := <-p.outbound:
|
||||
if err := p.session.WriteFrame(ctx, frame); err != nil {
|
||||
p.reportError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) readLoop(ctx context.Context) {
|
||||
defer p.Close()
|
||||
for {
|
||||
frame, err := p.session.ReadFrame(ctx)
|
||||
if err != nil {
|
||||
p.reportError(err)
|
||||
return
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
p.reportError(ctx.Err())
|
||||
return
|
||||
case <-p.done:
|
||||
return
|
||||
case p.inbound <- frame:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FabricSessionPump) reportError(err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case p.errors <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) applyReadDeadline(ctx context.Context) {
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
_ = c.conn.SetReadDeadline(deadline)
|
||||
} else if c.timeout > 0 {
|
||||
_ = c.conn.SetReadDeadline(time.Now().Add(c.timeout))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FabricSessionClient) applyWriteDeadline(ctx context.Context) {
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
_ = c.conn.SetWriteDeadline(deadline)
|
||||
} else if c.timeout > 0 {
|
||||
_ = c.conn.SetWriteDeadline(time.Now().Add(c.timeout))
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) fabricSessionWebSocketURL() (string, error) {
|
||||
base := strings.TrimSpace(c.BaseURL)
|
||||
if base == "" {
|
||||
return "", fmt.Errorf("mesh base url is required")
|
||||
}
|
||||
parsed, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch parsed.Scheme {
|
||||
case "http":
|
||||
parsed.Scheme = "ws"
|
||||
case "https":
|
||||
parsed.Scheme = "wss"
|
||||
case "ws", "wss":
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported mesh base url scheme %q", parsed.Scheme)
|
||||
}
|
||||
parsed.Path = strings.TrimRight(parsed.Path, "/") + "/mesh/v1/fabric/session/ws"
|
||||
parsed.RawQuery = ""
|
||||
parsed.Fragment = ""
|
||||
return parsed.String(), nil
|
||||
}
|
||||
|
||||
func cloneHeader(header http.Header) http.Header {
|
||||
out := http.Header{}
|
||||
for key, values := range header {
|
||||
for _, value := range values {
|
||||
out.Add(key, value)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
response, err := client.SendFabricSessionFrame(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_clienttest",
|
||||
Timeout: time.Second,
|
||||
}, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 12,
|
||||
Payload: []byte("probe"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send fabric session frame: %v", err)
|
||||
}
|
||||
if response.Type != fabricproto.FramePong || response.Sequence != 12 || string(response.Payload) != "probe" {
|
||||
t.Fatalf("response = %+v, want pong seq 12", response)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_persistent",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
first, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 1,
|
||||
Payload: []byte("first"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("first round trip: %v", err)
|
||||
}
|
||||
second, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 2,
|
||||
Payload: []byte("second"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("second round trip: %v", err)
|
||||
}
|
||||
if first.Type != fabricproto.FramePong || first.Sequence != 1 || string(first.Payload) != "first" {
|
||||
t.Fatalf("first response = %+v, want pong seq 1", first)
|
||||
}
|
||||
if second.Type != fabricproto.FramePong || second.Sequence != 2 || string(second.Payload) != "second" {
|
||||
t.Fatalf("second response = %+v, want pong seq 2", second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_dataacks",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
if err := session.WriteFrame(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
StreamID: 77,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
}); err != nil {
|
||||
t.Fatalf("open stream frame: %v", err)
|
||||
}
|
||||
|
||||
first, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 77,
|
||||
Sequence: 10,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
Payload: []byte("first payload"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("first data round trip: %v", err)
|
||||
}
|
||||
second, err := session.RoundTrip(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 77,
|
||||
Sequence: 11,
|
||||
TrafficClass: fabricproto.TrafficClassInteractive,
|
||||
Payload: []byte("second payload"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("second data round trip: %v", err)
|
||||
}
|
||||
if first.Type != fabricproto.FrameAck || first.StreamID != 77 || first.Sequence != 10 {
|
||||
t.Fatalf("first ack = %+v, want stream 77 seq 10", first)
|
||||
}
|
||||
if second.Type != fabricproto.FrameAck || second.StreamID != 77 || second.Sequence != 11 {
|
||||
t.Fatalf("second ack = %+v, want stream 77 seq 11", second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
session, _, err := client.OpenFabricSession(ctx, FabricSessionDialOptions{
|
||||
Token: "rap_fsn_pump",
|
||||
Timeout: time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("open fabric session: %v", err)
|
||||
}
|
||||
pump := session.StartPump(ctx, FabricSessionPumpOptions{
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
defer pump.Close()
|
||||
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
StreamID: 900,
|
||||
TrafficClass: fabricproto.TrafficClassBulk,
|
||||
}); err != nil {
|
||||
t.Fatalf("send open bulk stream: %v", err)
|
||||
}
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
StreamID: 900,
|
||||
Sequence: 31,
|
||||
TrafficClass: fabricproto.TrafficClassBulk,
|
||||
Payload: []byte("bulk payload"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send bulk data: %v", err)
|
||||
}
|
||||
if err := pump.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 32,
|
||||
Payload: []byte("control ping"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send ping: %v", err)
|
||||
}
|
||||
|
||||
gotAck := false
|
||||
gotPong := false
|
||||
for !gotAck || !gotPong {
|
||||
select {
|
||||
case frame := <-pump.Frames():
|
||||
switch {
|
||||
case frame.Type == fabricproto.FrameAck && frame.StreamID == 900 && frame.Sequence == 31:
|
||||
gotAck = true
|
||||
case frame.Type == fabricproto.FramePong && frame.Sequence == 32 && string(frame.Payload) == "control ping":
|
||||
gotPong = true
|
||||
}
|
||||
case err := <-pump.Errors():
|
||||
t.Fatalf("pump error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatalf("timed out waiting for pump frames: ack=%v pong=%v", gotAck, gotPong)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionReportsRejectedStatus(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_, err := client.SendFabricSessionFrame(ctx, FabricSessionDialOptions{}, fabricproto.Frame{Type: fabricproto.FramePing})
|
||||
if err == nil {
|
||||
t.Fatal("send fabric session without token unexpectedly succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientFabricSessionWebSocketURL(t *testing.T) {
|
||||
cases := []struct {
|
||||
base string
|
||||
want string
|
||||
}{
|
||||
{base: "http://node.example", want: "ws://node.example/mesh/v1/fabric/session/ws"},
|
||||
{base: "https://node.example/base/", want: "wss://node.example/base/mesh/v1/fabric/session/ws"},
|
||||
{base: "ws://node.example", want: "ws://node.example/mesh/v1/fabric/session/ws"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
client := NewClient(tc.base)
|
||||
got, err := client.fabricSessionWebSocketURL()
|
||||
if err != nil {
|
||||
t.Fatalf("fabricSessionWebSocketURL(%q): %v", tc.base, err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Fatalf("fabricSessionWebSocketURL(%q) = %q, want %q", tc.base, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
var fabricControlForwardSequence atomic.Uint64
|
||||
|
||||
type FabricControlForwardResult struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
LatencyMs int64 `json:"latency_ms"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
}
|
||||
|
||||
func FabricTransportTargetFromRegistryEndpoint(endpoint FabricRegistryEndpoint) FabricTransportTarget {
|
||||
return FabricTransportTarget{
|
||||
EndpointID: strings.TrimSpace(endpoint.EndpointID),
|
||||
PeerID: strings.TrimSpace(endpoint.EndpointID),
|
||||
Endpoint: strings.TrimSpace(endpoint.Address),
|
||||
Transport: strings.TrimSpace(endpoint.Transport),
|
||||
PeerCertSHA256: strings.TrimSpace(endpoint.PeerCertSHA256),
|
||||
Timeout: 5 * time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
}
|
||||
}
|
||||
|
||||
func SendFabricControlForward(ctx context.Context, transport FabricTransport, endpoint FabricRegistryEndpoint, payload []byte, timeout time.Duration) (FabricControlForwardResult, error) {
|
||||
if transport == nil {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control transport is unavailable")
|
||||
}
|
||||
if len(payload) == 0 {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control payload is empty")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 5 * time.Second
|
||||
}
|
||||
target := FabricTransportTargetFromRegistryEndpoint(endpoint)
|
||||
target.Timeout = timeout
|
||||
session, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
defer session.Close()
|
||||
sequence := fabricControlForwardSequence.Add(1)
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: FabricControlForwardQUICStreamID,
|
||||
Sequence: sequence,
|
||||
Payload: append([]byte(nil), payload...),
|
||||
}); err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
waitCtx := ctx
|
||||
var cancel context.CancelFunc
|
||||
if timeout > 0 {
|
||||
waitCtx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
startedAt := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
return FabricControlForwardResult{}, waitCtx.Err()
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control session closed")
|
||||
}
|
||||
if err != nil {
|
||||
return FabricControlForwardResult{}, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control session closed")
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID || frame.Sequence != sequence {
|
||||
continue
|
||||
}
|
||||
return FabricControlForwardResult{
|
||||
Payload: append(json.RawMessage(nil), frame.Payload...),
|
||||
LatencyMs: time.Since(startedAt).Milliseconds(),
|
||||
Endpoint: endpoint.Address,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -565,6 +565,43 @@ func TestQUICFabricServerHandlesWebIngressForwardFrames(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFabricControlForwardUsesQUICStream(t *testing.T) {
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
FabricControlHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
if string(payload) != `{"method":"GET","path":"/auth/login"}` {
|
||||
return nil, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
return []byte(`{"status_code":200,"body":{"ok":true}}`), nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
result, err := SendFabricControlForward(ctx, NewQUICFabricTransport(nil), FabricRegistryEndpoint{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Transport: "direct_quic",
|
||||
PeerCertSHA256: testQUICCertSHA256(t, tlsConfig),
|
||||
}, []byte(`{"method":"GET","path":"/auth/login"}`), time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("send fabric control forward: %v", err)
|
||||
}
|
||||
var response quicFabricControlForwardResponse
|
||||
if err := json.Unmarshal(result.Payload, &response); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.Error != "" || string(response.Payload) != `{"status_code":200,"body":{"ok":true}}` {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
}
|
||||
|
||||
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
|
||||
t.Helper()
|
||||
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
|
||||
|
||||
@@ -164,6 +164,7 @@ func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata Fabri
|
||||
case FabricRouteRelay:
|
||||
relayNodeID := firstNonEmpty(strings.TrimSpace(metadata.RelayNodeID), strings.TrimSpace(metadata.ViaNodeID))
|
||||
relayEndpoint := firstNonEmpty(strings.TrimRight(strings.TrimSpace(metadata.RelayEndpoint), "/"), endpoint)
|
||||
relayPeerCertSHA256 := candidatePeerCertSHA256(candidate)
|
||||
hops := []FabricRouteHop{}
|
||||
if localNodeID != "" {
|
||||
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: FabricRouteDirect})
|
||||
@@ -173,7 +174,7 @@ func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata Fabri
|
||||
return hops
|
||||
}
|
||||
hops = append(hops,
|
||||
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint},
|
||||
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint, PeerCertSHA256: relayPeerCertSHA256},
|
||||
FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)},
|
||||
)
|
||||
return hops
|
||||
|
||||
@@ -44,7 +44,13 @@ func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"})
|
||||
metadata, _ := json.Marshal(struct {
|
||||
FabricCandidateMetadata
|
||||
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||
}{
|
||||
FabricCandidateMetadata: FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"},
|
||||
TLSCertSHA256: "relay-cert",
|
||||
})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
@@ -69,6 +75,9 @@ func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T
|
||||
if got := routeSet.Primary.Hops[1].NodeID; got != "node-r" {
|
||||
t.Fatalf("relay hop = %q, want node-r", got)
|
||||
}
|
||||
if got := routeSet.Primary.Hops[1].PeerCertSHA256; got != "relay-cert" {
|
||||
t.Fatalf("relay hop peer cert = %q, want relay-cert", got)
|
||||
}
|
||||
if routeSet.Primary.Capacity != 50 {
|
||||
t.Fatalf("capacity = %d, want 50", routeSet.Primary.Capacity)
|
||||
}
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type FabricSessionPeerManager struct {
|
||||
mu sync.Mutex
|
||||
sessions map[string]*FabricSessionPump
|
||||
stats FabricSessionPeerManagerStats
|
||||
}
|
||||
|
||||
type FabricSessionPeerTarget struct {
|
||||
PeerID string
|
||||
BaseURL string
|
||||
Options FabricSessionDialOptions
|
||||
Pump FabricSessionPumpOptions
|
||||
}
|
||||
|
||||
type FabricSessionPeerManagerStats struct {
|
||||
Opens uint64 `json:"opens"`
|
||||
Reuses uint64 `json:"reuses"`
|
||||
ClosedEvicted uint64 `json:"closed_evicted"`
|
||||
ClosePeerCalls uint64 `json:"close_peer_calls"`
|
||||
CloseAllCalls uint64 `json:"close_all_calls"`
|
||||
}
|
||||
|
||||
type FabricSessionPeerManagerSnapshot struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ActiveCount int `json:"active_count"`
|
||||
ClosedCount int `json:"closed_count"`
|
||||
Stats FabricSessionPeerManagerStats `json:"stats"`
|
||||
}
|
||||
|
||||
func NewFabricSessionPeerManager() *FabricSessionPeerManager {
|
||||
return &FabricSessionPeerManager{
|
||||
sessions: map[string]*FabricSessionPump{},
|
||||
}
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Get(ctx context.Context, target FabricSessionPeerTarget) (*FabricSessionPump, error) {
|
||||
if m == nil {
|
||||
return nil, fmt.Errorf("fabric session peer manager is nil")
|
||||
}
|
||||
key, err := fabricSessionPeerKey(target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.mu.Lock()
|
||||
if pump := m.sessions[key]; pump != nil {
|
||||
if pump.Closed() {
|
||||
delete(m.sessions, key)
|
||||
m.stats.ClosedEvicted++
|
||||
} else {
|
||||
m.stats.Reuses++
|
||||
m.mu.Unlock()
|
||||
return pump, nil
|
||||
}
|
||||
}
|
||||
m.mu.Unlock()
|
||||
|
||||
session, _, err := NewClient(target.BaseURL).OpenFabricSession(ctx, target.Options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pump := session.StartPump(context.Background(), target.Pump)
|
||||
|
||||
m.mu.Lock()
|
||||
if existing := m.sessions[key]; existing != nil {
|
||||
if existing.Closed() {
|
||||
delete(m.sessions, key)
|
||||
m.stats.ClosedEvicted++
|
||||
} else {
|
||||
m.stats.Reuses++
|
||||
m.mu.Unlock()
|
||||
_ = pump.Close()
|
||||
return existing, nil
|
||||
}
|
||||
}
|
||||
if m.sessions == nil {
|
||||
m.sessions = map[string]*FabricSessionPump{}
|
||||
}
|
||||
m.sessions[key] = pump
|
||||
m.stats.Opens++
|
||||
m.mu.Unlock()
|
||||
return pump, nil
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) ClosePeer(target FabricSessionPeerTarget) error {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
key, err := fabricSessionPeerKey(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.stats.ClosePeerCalls++
|
||||
pump := m.sessions[key]
|
||||
delete(m.sessions, key)
|
||||
m.mu.Unlock()
|
||||
if pump == nil {
|
||||
return nil
|
||||
}
|
||||
return pump.Close()
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Close() error {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.stats.CloseAllCalls++
|
||||
sessions := m.sessions
|
||||
m.sessions = map[string]*FabricSessionPump{}
|
||||
m.mu.Unlock()
|
||||
var firstErr error
|
||||
for _, pump := range sessions {
|
||||
if err := pump.Close(); err != nil && firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
return firstErr
|
||||
}
|
||||
|
||||
func (m *FabricSessionPeerManager) Snapshot() FabricSessionPeerManagerSnapshot {
|
||||
if m == nil {
|
||||
return FabricSessionPeerManagerSnapshot{SchemaVersion: "rap.fabric_session_peer_manager.v1"}
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
snapshot := FabricSessionPeerManagerSnapshot{
|
||||
SchemaVersion: "rap.fabric_session_peer_manager.v1",
|
||||
Stats: m.stats,
|
||||
}
|
||||
for _, pump := range m.sessions {
|
||||
if pump == nil || pump.Closed() {
|
||||
snapshot.ClosedCount++
|
||||
continue
|
||||
}
|
||||
snapshot.ActiveCount++
|
||||
}
|
||||
return snapshot
|
||||
}
|
||||
|
||||
func fabricSessionPeerKey(target FabricSessionPeerTarget) (string, error) {
|
||||
peerID := strings.TrimSpace(target.PeerID)
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(target.BaseURL), "/")
|
||||
if peerID == "" || baseURL == "" {
|
||||
return "", fmt.Errorf("fabric session peer id and base url are required")
|
||||
}
|
||||
return peerID + "\x00" + baseURL, nil
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
Pump: FabricSessionPumpOptions{
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first != second {
|
||||
t.Fatal("manager did not reuse peer pump")
|
||||
}
|
||||
if opened != 1 {
|
||||
t.Fatalf("opened sessions = %d, want 1", opened)
|
||||
}
|
||||
snapshot := manager.Snapshot()
|
||||
if snapshot.SchemaVersion != "rap.fabric_session_peer_manager.v1" ||
|
||||
snapshot.ActiveCount != 1 ||
|
||||
snapshot.ClosedCount != 0 ||
|
||||
snapshot.Stats.Opens != 1 ||
|
||||
snapshot.Stats.Reuses != 1 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
if err := first.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 1,
|
||||
Payload: []byte("manager"),
|
||||
}); err != nil {
|
||||
t.Fatalf("send ping: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-first.Frames():
|
||||
if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "manager" {
|
||||
t.Fatalf("frame = %+v", frame)
|
||||
}
|
||||
case err := <-first.Errors():
|
||||
t.Fatalf("pump error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager_reopen",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
if err := manager.ClosePeer(target); err != nil {
|
||||
t.Fatalf("close peer: %v", err)
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatal("manager reused pump after close peer")
|
||||
}
|
||||
if opened != 2 {
|
||||
t.Fatalf("opened sessions = %d, want 2", opened)
|
||||
}
|
||||
if snapshot := manager.Snapshot(); snapshot.Stats.ClosePeerCalls != 1 || snapshot.Stats.Opens != 2 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerReopensClosedPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
manager := NewFabricSessionPeerManager()
|
||||
defer manager.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricSessionPeerTarget{
|
||||
PeerID: "node-a",
|
||||
BaseURL: server.URL,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: "rap_fsn_manager_closed",
|
||||
Timeout: time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
first, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first get: %v", err)
|
||||
}
|
||||
if err := first.Close(); err != nil {
|
||||
t.Fatalf("close first pump: %v", err)
|
||||
}
|
||||
if !first.Closed() {
|
||||
t.Fatal("first pump should report closed")
|
||||
}
|
||||
second, err := manager.Get(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second get: %v", err)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatal("manager reused closed pump")
|
||||
}
|
||||
if opened != 2 {
|
||||
t.Fatalf("opened sessions = %d, want 2", opened)
|
||||
}
|
||||
snapshot := manager.Snapshot()
|
||||
if snapshot.ActiveCount != 1 ||
|
||||
snapshot.Stats.Opens != 2 ||
|
||||
snapshot.Stats.ClosedEvicted != 1 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPeerManagerRejectsIncompleteTarget(t *testing.T) {
|
||||
manager := NewFabricSessionPeerManager()
|
||||
_, err := manager.Get(context.Background(), FabricSessionPeerTarget{PeerID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("incomplete target unexpectedly succeeded")
|
||||
}
|
||||
}
|
||||
@@ -308,7 +308,7 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
Transport: intent.Transport,
|
||||
PeerCertSHA256: intent.BestPeerCertSHA256,
|
||||
}}
|
||||
if intent.DirectCandidate {
|
||||
if intent.DirectCandidate || peerConnectionShouldProbeDirectUpgrade(intent, cacheEntry) {
|
||||
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
||||
}
|
||||
var lastFailure string
|
||||
@@ -354,7 +354,9 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
result.SelectedCandidateID = probePeer.BestCandidateID
|
||||
result.SelectedEndpoint = probePeer.Endpoint
|
||||
result.LatencyMs = latency
|
||||
if intent.RelayCandidate {
|
||||
if probeTargetUsesDirectQUIC(probeTarget) {
|
||||
result.ConnectionState = m.tracker.RecordSuccessForPeer(probePeer, latency, completedAt)
|
||||
} else if intent.RelayCandidate {
|
||||
result.ConnectionState = m.tracker.RecordRelayReady(probePeer, latency, completedAt)
|
||||
} else {
|
||||
result.ConnectionState = m.tracker.RecordSuccessForPeer(probePeer, latency, completedAt)
|
||||
@@ -410,6 +412,10 @@ func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer P
|
||||
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
||||
fallbackPeerCertSHA256 := firstNonEmpty(
|
||||
strings.TrimSpace(cacheEntry.BestPeerCertSHA256),
|
||||
strings.TrimSpace(intent.BestPeerCertSHA256),
|
||||
)
|
||||
add := func(candidateID, endpoint, transport, peerCertSHA256 string) {
|
||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||
if endpoint == "" {
|
||||
@@ -423,6 +429,9 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
if strings.TrimSpace(peerCertSHA256) == "" {
|
||||
peerCertSHA256 = fallbackPeerCertSHA256
|
||||
}
|
||||
out = append(out, peerConnectionProbeTarget{
|
||||
CandidateID: strings.TrimSpace(candidateID),
|
||||
Endpoint: endpoint,
|
||||
@@ -440,6 +449,31 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
||||
return out
|
||||
}
|
||||
|
||||
func peerConnectionShouldProbeDirectUpgrade(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) bool {
|
||||
if intent.DirectCandidate {
|
||||
return true
|
||||
}
|
||||
if strings.TrimSpace(intent.ConnectionState) != PeerConnectionRelayReady &&
|
||||
!intent.RelayCandidate &&
|
||||
strings.TrimSpace(intent.TransportMode) != PeerTransportModeRelayControl {
|
||||
return false
|
||||
}
|
||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||
if candidateUsableForDirectProbe(candidate) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func probeTargetUsesDirectQUIC(target peerConnectionProbeTarget) bool {
|
||||
transport := strings.ToLower(strings.TrimSpace(target.Transport))
|
||||
if strings.Contains(transport, "relay") || strings.Contains(transport, "reverse") || strings.Contains(transport, "outbound") {
|
||||
return false
|
||||
}
|
||||
return peerConnectionTargetIsQUIC(target.Transport, target.Endpoint)
|
||||
}
|
||||
|
||||
func peerConnectionTargetIsQUIC(transport string, endpoint string) bool {
|
||||
return isQUICOnlyCandidateTransport(transport) || strings.HasPrefix(strings.ToLower(strings.TrimSpace(endpoint)), "quic://")
|
||||
}
|
||||
|
||||
@@ -221,6 +221,125 @@ func TestPeerConnectionProbeTargetKeepsPeerForLocalRelayReverseQUIC(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsFallsBackToBestPeerCertSHA256(t *testing.T) {
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "intent-cert",
|
||||
}
|
||||
cacheEntry := PeerCacheEntry{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "cache-cert",
|
||||
BestCandidateID: "node-b-best",
|
||||
BestTransport: "direct_quic",
|
||||
Endpoint: "quic://94.141.118.222:19199",
|
||||
EndpointCandidates: []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19199",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
targets := peerConnectionProbeTargets(intent, cacheEntry)
|
||||
if len(targets) != 1 {
|
||||
t.Fatalf("target count = %d, want 1", len(targets))
|
||||
}
|
||||
for _, target := range targets {
|
||||
if target.Endpoint != "quic://94.141.118.222:19199" {
|
||||
continue
|
||||
}
|
||||
if target.PeerCertSHA256 != "cache-cert" {
|
||||
t.Fatalf("peer cert = %q, want cache-cert", target.PeerCertSHA256)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsUpgradeRelayReadyPeerToDirectQUIC(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
leases := []PeerRendezvousLease{{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "quic://127.0.0.1:1",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
}}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
},
|
||||
},
|
||||
RendezvousLeases: leases,
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
RendezvousLeases: leases,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 1 || cycle.Succeeded != 1 || len(cycle.Results) != 1 {
|
||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||
}
|
||||
result := cycle.Results[0]
|
||||
if result.SelectedCandidateID != "node-b-direct" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||
t.Fatalf("relay-ready peer did not upgrade to direct candidate: %+v", result)
|
||||
}
|
||||
if result.ConnectionState.State != PeerConnectionReady {
|
||||
t.Fatalf("connection state = %q, want ready", result.ConnectionState.State)
|
||||
}
|
||||
if len(result.CandidateResults) == 0 || result.CandidateResults[0].Transport != "direct_quic" || result.CandidateResults[0].LinkStatus != PeerConnectionProbeReachable {
|
||||
t.Fatalf("candidate trail missing direct probe success: %+v", result.CandidateResults)
|
||||
}
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.Ready != 1 || snapshot.RelayReady != 0 {
|
||||
t.Fatalf("unexpected tracker snapshot after direct upgrade: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
|
||||
@@ -102,8 +102,11 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
continue
|
||||
}
|
||||
switch connection.State {
|
||||
case PeerConnectionReady, PeerConnectionRelayReady:
|
||||
case PeerConnectionReady:
|
||||
ready++
|
||||
case PeerConnectionRelayReady:
|
||||
// Relay-ready peers remain valuable for control-plane reachability,
|
||||
// but they do not satisfy the target for direct-ready transport paths.
|
||||
case PeerConnectionDegraded:
|
||||
degraded++
|
||||
case PeerConnectionBackoff:
|
||||
|
||||
@@ -69,7 +69,7 @@ func TestPeerRecoveryPlanAddsRecoverySeedWhenReadyDeficit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
func TestPeerRecoveryPlanTreatsRelayReadyPeersAsRecoveryGap(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
@@ -92,12 +92,15 @@ func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.Mode != PeerRecoveryModeSteady || !plan.Healthy {
|
||||
t.Fatalf("unexpected steady plan: %+v", plan)
|
||||
if plan.Mode != PeerRecoveryModeRecovery || plan.Healthy {
|
||||
t.Fatalf("unexpected relay-ready recovery plan: %+v", plan)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-c", "maintain_ready") {
|
||||
t.Fatalf("relay-ready peer was not maintained: %+v", plan.Candidates)
|
||||
}
|
||||
if plan.ReadyPeerCount != 0 || plan.Deficit != 1 {
|
||||
t.Fatalf("relay-ready peer should not satisfy direct-ready target: %+v", plan)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,713 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
const (
|
||||
FabricRegistryGossipRecordSchema = "rap.fabric.registry.gossip_record.v1"
|
||||
|
||||
FabricRegistryScopeFarm = "farm"
|
||||
FabricRegistryScopeCluster = "cluster"
|
||||
FabricRegistryScopeOrganization = "organization"
|
||||
|
||||
FabricRegistryServiceControlAPI = "control-api"
|
||||
FabricRegistryServiceUpdateStore = "update-store"
|
||||
FabricRegistryServiceUpdateCache = "update-cache"
|
||||
FabricRegistryServiceWebAdmin = "web-admin"
|
||||
FabricRegistryServiceVPNExitPool = "vpn-egress-pool"
|
||||
|
||||
FabricRegistryAuthorityControl = "control-authority"
|
||||
FabricRegistryAuthorityUpdate = "update-authority"
|
||||
FabricRegistryAuthorityStorage = "storage-authority"
|
||||
FabricRegistryAuthorityRoute = "route-authority"
|
||||
)
|
||||
|
||||
type FabricRegistryEndpoint struct {
|
||||
EndpointID string `json:"endpoint_id"`
|
||||
Address string `json:"address"`
|
||||
Transport string `json:"transport"`
|
||||
Reachability string `json:"reachability,omitempty"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
Priority int `json:"priority,omitempty"`
|
||||
Weight int `json:"weight,omitempty"`
|
||||
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||
LastVerifiedAt *time.Time `json:"last_verified_at,omitempty"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistrySignature struct {
|
||||
KeyID string `json:"key_id"`
|
||||
IssuerID string `json:"issuer_id"`
|
||||
Role string `json:"role"`
|
||||
Alg string `json:"alg"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
type FabricRegistryGossipRecord struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
Epoch int64 `json:"epoch"`
|
||||
Generation string `json:"generation,omitempty"`
|
||||
IssuedAt time.Time `json:"issued_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
IssuerNodeID string `json:"issuer_node_id"`
|
||||
IssuerRole string `json:"issuer_role"`
|
||||
Endpoints []FabricRegistryEndpoint `json:"endpoints"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
Signatures []FabricRegistrySignature `json:"signatures,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryTrustedIssuer struct {
|
||||
IssuerID string
|
||||
Role string
|
||||
PublicKey ed25519.PublicKey
|
||||
Scopes []string
|
||||
Services []string
|
||||
}
|
||||
|
||||
type FabricRegistryVerificationPolicy struct {
|
||||
LocalClusterID string
|
||||
TrustedIssuers []FabricRegistryTrustedIssuer
|
||||
RequiredSignatures int
|
||||
MaxClockSkew time.Duration
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type FabricRegistryVerificationResult struct {
|
||||
AcceptedSignatureCount int `json:"accepted_signature_count"`
|
||||
AcceptedIssuers []string `json:"accepted_issuers,omitempty"`
|
||||
RecordHash string `json:"record_hash"`
|
||||
}
|
||||
|
||||
type FabricRegistryEntryState string
|
||||
|
||||
const (
|
||||
FabricRegistryCandidate FabricRegistryEntryState = "candidate"
|
||||
FabricRegistryActive FabricRegistryEntryState = "active"
|
||||
FabricRegistryExpired FabricRegistryEntryState = "expired"
|
||||
FabricRegistryRejected FabricRegistryEntryState = "rejected"
|
||||
)
|
||||
|
||||
type FabricRegistryEntry struct {
|
||||
Record FabricRegistryGossipRecord `json:"record"`
|
||||
State FabricRegistryEntryState `json:"state"`
|
||||
AcceptedAt time.Time `json:"accepted_at"`
|
||||
PromotedAt *time.Time `json:"promoted_at,omitempty"`
|
||||
VerifyResult FabricRegistryVerificationResult `json:"verify_result"`
|
||||
}
|
||||
|
||||
type FabricRegistryBootstrapReport struct {
|
||||
Total int `json:"total"`
|
||||
Active int `json:"active"`
|
||||
Candidate int `json:"candidate"`
|
||||
Rejected int `json:"rejected"`
|
||||
Rejects []string `json:"rejects,omitempty"`
|
||||
RecordKeys []string `json:"record_keys,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryResolveRequest struct {
|
||||
ClusterID string
|
||||
Service string
|
||||
Scope string
|
||||
OrganizationID string
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type FabricRegistryResolvedService struct {
|
||||
Found bool `json:"found"`
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope,omitempty"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
RecordEpoch int64 `json:"record_epoch,omitempty"`
|
||||
RecordHash string `json:"record_hash,omitempty"`
|
||||
Endpoints []FabricRegistryEndpoint `json:"endpoints,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistryLiveProbeRequest struct {
|
||||
ClusterID string
|
||||
PreferredRegion string
|
||||
Timeout time.Duration
|
||||
Now time.Time
|
||||
MaxCandidates int
|
||||
}
|
||||
|
||||
type FabricRegistryLiveProbeResult struct {
|
||||
Service string `json:"service"`
|
||||
Scope string `json:"scope"`
|
||||
OrganizationID string `json:"organization_id,omitempty"`
|
||||
EndpointID string `json:"endpoint_id,omitempty"`
|
||||
Address string `json:"address,omitempty"`
|
||||
Status string `json:"status"`
|
||||
LatencyMs int64 `json:"latency_ms,omitempty"`
|
||||
Promoted bool `json:"promoted"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistrySnapshot struct {
|
||||
Active int `json:"active"`
|
||||
Candidate int `json:"candidate"`
|
||||
ActiveKeys []string `json:"active_keys,omitempty"`
|
||||
CandidateKeys []string `json:"candidate_keys,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRegistry struct {
|
||||
entries map[string]FabricRegistryEntry
|
||||
candidates map[string]FabricRegistryEntry
|
||||
}
|
||||
|
||||
func NewFabricRegistry() *FabricRegistry {
|
||||
return &FabricRegistry{entries: map[string]FabricRegistryEntry{}, candidates: map[string]FabricRegistryEntry{}}
|
||||
}
|
||||
|
||||
func LoadFabricRegistryBootstrapRecords(recordsJSON string, policy FabricRegistryVerificationPolicy, liveVerified bool) (*FabricRegistry, FabricRegistryBootstrapReport, error) {
|
||||
registry := NewFabricRegistry()
|
||||
recordsJSON = strings.TrimSpace(recordsJSON)
|
||||
if recordsJSON == "" {
|
||||
return registry, FabricRegistryBootstrapReport{}, nil
|
||||
}
|
||||
var records []FabricRegistryGossipRecord
|
||||
if err := json.Unmarshal([]byte(recordsJSON), &records); err != nil {
|
||||
return nil, FabricRegistryBootstrapReport{}, fmt.Errorf("decode fabric registry bootstrap records: %w", err)
|
||||
}
|
||||
report := FabricRegistryBootstrapReport{Total: len(records)}
|
||||
for _, record := range records {
|
||||
entry, changed, err := registry.ApplyGossipRecord(record, policy, liveVerified)
|
||||
if err != nil {
|
||||
report.Rejected++
|
||||
report.Rejects = append(report.Rejects, err.Error())
|
||||
continue
|
||||
}
|
||||
if !changed {
|
||||
continue
|
||||
}
|
||||
report.RecordKeys = append(report.RecordKeys, fabricRegistryRecordKey(record))
|
||||
switch entry.State {
|
||||
case FabricRegistryActive:
|
||||
report.Active++
|
||||
case FabricRegistryCandidate:
|
||||
report.Candidate++
|
||||
}
|
||||
}
|
||||
return registry, report, nil
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) ApplyGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy, liveVerified bool) (FabricRegistryEntry, bool, error) {
|
||||
if r == nil {
|
||||
return FabricRegistryEntry{}, false, fmt.Errorf("fabric registry is nil")
|
||||
}
|
||||
result, err := VerifyFabricRegistryGossipRecord(record, policy)
|
||||
if err != nil {
|
||||
return FabricRegistryEntry{}, false, err
|
||||
}
|
||||
now := registryNow(policy.Now)
|
||||
key := fabricRegistryRecordKey(record)
|
||||
current, exists := r.entries[key]
|
||||
if exists && !fabricRegistryRecordNewer(record, current.Record, now) {
|
||||
return current, false, nil
|
||||
}
|
||||
state := FabricRegistryCandidate
|
||||
var promotedAt *time.Time
|
||||
if liveVerified {
|
||||
state = FabricRegistryActive
|
||||
t := now
|
||||
promotedAt = &t
|
||||
}
|
||||
entry := FabricRegistryEntry{
|
||||
Record: normalizeFabricRegistryRecord(record),
|
||||
State: state,
|
||||
AcceptedAt: now,
|
||||
PromotedAt: promotedAt,
|
||||
VerifyResult: result,
|
||||
}
|
||||
if state == FabricRegistryActive {
|
||||
r.entries[key] = entry
|
||||
delete(r.candidates, key)
|
||||
return entry, true, nil
|
||||
}
|
||||
if r.candidates == nil {
|
||||
r.candidates = map[string]FabricRegistryEntry{}
|
||||
}
|
||||
r.candidates[key] = entry
|
||||
return entry, true, nil
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) MarkLiveVerified(clusterID, service, scope, organizationID string, now time.Time) bool {
|
||||
if r == nil {
|
||||
return false
|
||||
}
|
||||
key := fabricRegistryKey(clusterID, service, scope, organizationID)
|
||||
entry, ok := r.candidates[key]
|
||||
if !ok || entry.State == FabricRegistryExpired || entry.State == FabricRegistryRejected {
|
||||
return false
|
||||
}
|
||||
t := registryNow(now)
|
||||
entry.State = FabricRegistryActive
|
||||
entry.PromotedAt = &t
|
||||
r.entries[key] = entry
|
||||
delete(r.candidates, key)
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Active(clusterID, service, scope, organizationID string, now time.Time) (FabricRegistryGossipRecord, bool) {
|
||||
if r == nil {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
entry, ok := r.entries[fabricRegistryKey(clusterID, service, scope, organizationID)]
|
||||
if !ok || entry.State != FabricRegistryActive || !entry.Record.ExpiresAt.After(registryNow(now)) {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
return entry.Record, true
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) ResolveService(req FabricRegistryResolveRequest) FabricRegistryResolvedService {
|
||||
service := strings.ToLower(strings.TrimSpace(req.Service))
|
||||
if service == "" {
|
||||
return FabricRegistryResolvedService{Found: false, Reason: "service_required"}
|
||||
}
|
||||
scopeOrder := fabricRegistryScopeResolutionOrder(req.Scope, req.OrganizationID)
|
||||
for _, scope := range scopeOrder {
|
||||
organizationID := strings.TrimSpace(req.OrganizationID)
|
||||
if scope != FabricRegistryScopeOrganization {
|
||||
organizationID = ""
|
||||
}
|
||||
record, ok := r.Active(req.ClusterID, service, scope, organizationID, req.Now)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
endpoints := selectFabricRegistryEndpoints(record.Endpoints, req.PreferredRegion)
|
||||
if len(endpoints) == 0 {
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Scope: scope, OrganizationID: organizationID, Reason: "no_usable_endpoints"}
|
||||
}
|
||||
result, _ := canonicalFabricRegistryPayload(record)
|
||||
sum := sha256.Sum256(result)
|
||||
return FabricRegistryResolvedService{
|
||||
Found: true,
|
||||
Service: service,
|
||||
Scope: scope,
|
||||
OrganizationID: organizationID,
|
||||
RecordEpoch: record.Epoch,
|
||||
RecordHash: hex.EncodeToString(sum[:]),
|
||||
Endpoints: endpoints,
|
||||
}
|
||||
}
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Reason: "no_active_record"}
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Snapshot(now time.Time) FabricRegistrySnapshot {
|
||||
if r == nil {
|
||||
return FabricRegistrySnapshot{}
|
||||
}
|
||||
now = registryNow(now)
|
||||
out := FabricRegistrySnapshot{}
|
||||
for key, entry := range r.entries {
|
||||
if entry.State == FabricRegistryActive && entry.Record.ExpiresAt.After(now) {
|
||||
out.Active++
|
||||
out.ActiveKeys = append(out.ActiveKeys, key)
|
||||
}
|
||||
}
|
||||
for key, entry := range r.candidates {
|
||||
if entry.State == FabricRegistryCandidate && entry.Record.ExpiresAt.After(now) {
|
||||
out.Candidate++
|
||||
out.CandidateKeys = append(out.CandidateKeys, key)
|
||||
}
|
||||
}
|
||||
sort.Strings(out.ActiveKeys)
|
||||
sort.Strings(out.CandidateKeys)
|
||||
return out
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) VerifyCandidates(ctx context.Context, transport FabricTransport, req FabricRegistryLiveProbeRequest) []FabricRegistryLiveProbeResult {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
now := registryNow(req.Now)
|
||||
timeout := req.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 2 * time.Second
|
||||
}
|
||||
maxCandidates := req.MaxCandidates
|
||||
if maxCandidates <= 0 {
|
||||
maxCandidates = 16
|
||||
}
|
||||
candidates := make([]FabricRegistryEntry, 0, len(r.candidates))
|
||||
for _, entry := range r.candidates {
|
||||
if entry.State != FabricRegistryCandidate || !entry.Record.ExpiresAt.After(now) {
|
||||
continue
|
||||
}
|
||||
if clusterID := strings.TrimSpace(req.ClusterID); clusterID != "" && entry.Record.ClusterID != clusterID {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, entry)
|
||||
}
|
||||
sort.SliceStable(candidates, func(i, j int) bool {
|
||||
if candidates[i].Record.Service != candidates[j].Record.Service {
|
||||
return candidates[i].Record.Service < candidates[j].Record.Service
|
||||
}
|
||||
if candidates[i].Record.Scope != candidates[j].Record.Scope {
|
||||
return candidates[i].Record.Scope < candidates[j].Record.Scope
|
||||
}
|
||||
return candidates[i].Record.Epoch > candidates[j].Record.Epoch
|
||||
})
|
||||
if len(candidates) > maxCandidates {
|
||||
candidates = candidates[:maxCandidates]
|
||||
}
|
||||
results := make([]FabricRegistryLiveProbeResult, 0, len(candidates))
|
||||
for _, entry := range candidates {
|
||||
record := entry.Record
|
||||
result := FabricRegistryLiveProbeResult{
|
||||
Service: record.Service,
|
||||
Scope: record.Scope,
|
||||
OrganizationID: record.OrganizationID,
|
||||
Status: "unreachable",
|
||||
}
|
||||
endpoints := selectFabricRegistryEndpoints(record.Endpoints, req.PreferredRegion)
|
||||
if len(endpoints) == 0 {
|
||||
result.Error = "no_usable_endpoints"
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
for _, endpoint := range endpoints {
|
||||
probeCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
latency, err := probeFabricRegistryEndpoint(probeCtx, transport, endpoint, timeout)
|
||||
cancel()
|
||||
result.EndpointID = endpoint.EndpointID
|
||||
result.Address = endpoint.Address
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
continue
|
||||
}
|
||||
result.Status = "reachable"
|
||||
result.LatencyMs = latency.Milliseconds()
|
||||
result.Promoted = r.MarkLiveVerified(record.ClusterID, record.Service, record.Scope, record.OrganizationID, now)
|
||||
result.Error = ""
|
||||
break
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func SignFabricRegistryGossipRecord(record FabricRegistryGossipRecord, issuer FabricRegistryTrustedIssuer, privateKey ed25519.PrivateKey) (FabricRegistryGossipRecord, error) {
|
||||
payload, err := canonicalFabricRegistryPayload(record)
|
||||
if err != nil {
|
||||
return record, err
|
||||
}
|
||||
sig := ed25519.Sign(privateKey, payload)
|
||||
record.Signatures = append(record.Signatures, FabricRegistrySignature{
|
||||
KeyID: firstNonEmpty(issuer.IssuerID, record.IssuerNodeID),
|
||||
IssuerID: firstNonEmpty(issuer.IssuerID, record.IssuerNodeID),
|
||||
Role: firstNonEmpty(issuer.Role, record.IssuerRole),
|
||||
Alg: "ed25519",
|
||||
Value: hex.EncodeToString(sig),
|
||||
})
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func VerifyFabricRegistryGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy) (FabricRegistryVerificationResult, error) {
|
||||
record = normalizeFabricRegistryRecord(record)
|
||||
if err := validateFabricRegistryGossipRecord(record, policy); err != nil {
|
||||
return FabricRegistryVerificationResult{}, err
|
||||
}
|
||||
payload, err := canonicalFabricRegistryPayload(record)
|
||||
if err != nil {
|
||||
return FabricRegistryVerificationResult{}, err
|
||||
}
|
||||
sum := sha256.Sum256(payload)
|
||||
trusted := map[string]FabricRegistryTrustedIssuer{}
|
||||
for _, issuer := range policy.TrustedIssuers {
|
||||
if strings.TrimSpace(issuer.IssuerID) != "" {
|
||||
trusted[issuer.IssuerID] = issuer
|
||||
}
|
||||
if strings.TrimSpace(issuer.IssuerID) != "" && strings.TrimSpace(issuer.Role) != "" {
|
||||
trusted[issuer.IssuerID+"\x00"+issuer.Role] = issuer
|
||||
}
|
||||
}
|
||||
accepted := map[string]struct{}{}
|
||||
for _, signature := range record.Signatures {
|
||||
if strings.ToLower(strings.TrimSpace(signature.Alg)) != "ed25519" {
|
||||
continue
|
||||
}
|
||||
issuer, ok := trusted[strings.TrimSpace(signature.IssuerID)+"\x00"+strings.TrimSpace(signature.Role)]
|
||||
if !ok {
|
||||
issuer, ok = trusted[strings.TrimSpace(signature.IssuerID)]
|
||||
}
|
||||
if !ok || !fabricRegistryIssuerAllowed(issuer, record) {
|
||||
continue
|
||||
}
|
||||
rawSig, err := hex.DecodeString(strings.TrimSpace(signature.Value))
|
||||
if err != nil || len(rawSig) != ed25519.SignatureSize || len(issuer.PublicKey) != ed25519.PublicKeySize {
|
||||
continue
|
||||
}
|
||||
if ed25519.Verify(issuer.PublicKey, payload, rawSig) {
|
||||
accepted[signature.IssuerID] = struct{}{}
|
||||
}
|
||||
}
|
||||
required := policy.RequiredSignatures
|
||||
if required <= 0 {
|
||||
required = 1
|
||||
}
|
||||
if len(accepted) < required {
|
||||
return FabricRegistryVerificationResult{RecordHash: hex.EncodeToString(sum[:])}, fmt.Errorf("fabric registry gossip record lacks required trusted signatures")
|
||||
}
|
||||
issuers := make([]string, 0, len(accepted))
|
||||
for issuer := range accepted {
|
||||
issuers = append(issuers, issuer)
|
||||
}
|
||||
sort.Strings(issuers)
|
||||
return FabricRegistryVerificationResult{
|
||||
AcceptedSignatureCount: len(accepted),
|
||||
AcceptedIssuers: issuers,
|
||||
RecordHash: hex.EncodeToString(sum[:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func validateFabricRegistryGossipRecord(record FabricRegistryGossipRecord, policy FabricRegistryVerificationPolicy) error {
|
||||
if record.SchemaVersion != FabricRegistryGossipRecordSchema {
|
||||
return fmt.Errorf("fabric registry gossip record schema_version is invalid")
|
||||
}
|
||||
if strings.TrimSpace(record.ClusterID) == "" || (strings.TrimSpace(policy.LocalClusterID) != "" && record.ClusterID != policy.LocalClusterID) {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if strings.TrimSpace(record.Service) == "" || strings.TrimSpace(record.Scope) == "" || strings.TrimSpace(record.IssuerNodeID) == "" || strings.TrimSpace(record.IssuerRole) == "" {
|
||||
return fmt.Errorf("fabric registry gossip record is missing service, scope, or issuer")
|
||||
}
|
||||
if record.Epoch <= 0 || record.IssuedAt.IsZero() || record.ExpiresAt.IsZero() || !record.ExpiresAt.After(record.IssuedAt) {
|
||||
return fmt.Errorf("fabric registry gossip record has invalid epoch or validity window")
|
||||
}
|
||||
now := registryNow(policy.Now)
|
||||
skew := policy.MaxClockSkew
|
||||
if skew <= 0 {
|
||||
skew = time.Minute
|
||||
}
|
||||
if record.IssuedAt.After(now.Add(skew)) || !record.ExpiresAt.After(now) {
|
||||
return fmt.Errorf("fabric registry gossip record is not currently valid")
|
||||
}
|
||||
if len(record.Endpoints) == 0 {
|
||||
return fmt.Errorf("fabric registry gossip record has no endpoints")
|
||||
}
|
||||
for _, endpoint := range record.Endpoints {
|
||||
if strings.TrimSpace(endpoint.EndpointID) == "" || strings.TrimSpace(endpoint.Address) == "" || strings.TrimSpace(endpoint.Transport) == "" {
|
||||
return fmt.Errorf("fabric registry gossip record contains invalid endpoint")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
return fmt.Errorf("fabric registry gossip endpoint must be QUIC-only")
|
||||
}
|
||||
if len(endpoint.Metadata) > 0 && !json.Valid(endpoint.Metadata) {
|
||||
return fmt.Errorf("fabric registry gossip endpoint metadata is invalid")
|
||||
}
|
||||
}
|
||||
if len(record.Metadata) > 0 && !json.Valid(record.Metadata) {
|
||||
return fmt.Errorf("fabric registry gossip metadata is invalid")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func canonicalFabricRegistryPayload(record FabricRegistryGossipRecord) ([]byte, error) {
|
||||
record = normalizeFabricRegistryRecord(record)
|
||||
record.Signatures = nil
|
||||
payload, err := json.Marshal(record)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var compact bytes.Buffer
|
||||
if err := json.Compact(&compact, payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return compact.Bytes(), nil
|
||||
}
|
||||
|
||||
func normalizeFabricRegistryRecord(record FabricRegistryGossipRecord) FabricRegistryGossipRecord {
|
||||
record.SchemaVersion = strings.TrimSpace(record.SchemaVersion)
|
||||
record.ClusterID = strings.TrimSpace(record.ClusterID)
|
||||
record.Service = strings.ToLower(strings.TrimSpace(record.Service))
|
||||
record.Scope = strings.ToLower(strings.TrimSpace(record.Scope))
|
||||
record.OrganizationID = strings.TrimSpace(record.OrganizationID)
|
||||
record.IssuerNodeID = strings.TrimSpace(record.IssuerNodeID)
|
||||
record.IssuerRole = strings.TrimSpace(record.IssuerRole)
|
||||
record.Generation = strings.TrimSpace(record.Generation)
|
||||
for i := range record.Endpoints {
|
||||
record.Endpoints[i].EndpointID = strings.TrimSpace(record.Endpoints[i].EndpointID)
|
||||
record.Endpoints[i].Address = strings.TrimSpace(record.Endpoints[i].Address)
|
||||
record.Endpoints[i].Transport = strings.TrimSpace(record.Endpoints[i].Transport)
|
||||
record.Endpoints[i].Reachability = strings.TrimSpace(record.Endpoints[i].Reachability)
|
||||
record.Endpoints[i].ConnectivityMode = strings.TrimSpace(record.Endpoints[i].ConnectivityMode)
|
||||
record.Endpoints[i].Region = strings.TrimSpace(record.Endpoints[i].Region)
|
||||
record.Endpoints[i].PeerCertSHA256 = normalizeCertSHA256(record.Endpoints[i].PeerCertSHA256)
|
||||
}
|
||||
sort.SliceStable(record.Endpoints, func(i, j int) bool {
|
||||
if record.Endpoints[i].Priority != record.Endpoints[j].Priority {
|
||||
return record.Endpoints[i].Priority < record.Endpoints[j].Priority
|
||||
}
|
||||
return record.Endpoints[i].EndpointID < record.Endpoints[j].EndpointID
|
||||
})
|
||||
sort.SliceStable(record.Signatures, func(i, j int) bool {
|
||||
if record.Signatures[i].IssuerID != record.Signatures[j].IssuerID {
|
||||
return record.Signatures[i].IssuerID < record.Signatures[j].IssuerID
|
||||
}
|
||||
return record.Signatures[i].KeyID < record.Signatures[j].KeyID
|
||||
})
|
||||
return record
|
||||
}
|
||||
|
||||
func fabricRegistryIssuerAllowed(issuer FabricRegistryTrustedIssuer, record FabricRegistryGossipRecord) bool {
|
||||
if strings.TrimSpace(issuer.Role) != "" && issuer.Role != record.IssuerRole {
|
||||
return false
|
||||
}
|
||||
if len(issuer.Scopes) > 0 && !stringInSlice(record.Scope, issuer.Scopes) {
|
||||
return false
|
||||
}
|
||||
if len(issuer.Services) > 0 && !stringInSlice(record.Service, issuer.Services) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func fabricRegistryRecordKey(record FabricRegistryGossipRecord) string {
|
||||
return fabricRegistryKey(record.ClusterID, record.Service, record.Scope, record.OrganizationID)
|
||||
}
|
||||
|
||||
func fabricRegistryScopeResolutionOrder(scope string, organizationID string) []string {
|
||||
scope = strings.ToLower(strings.TrimSpace(scope))
|
||||
switch scope {
|
||||
case FabricRegistryScopeOrganization:
|
||||
if strings.TrimSpace(organizationID) != "" {
|
||||
return []string{FabricRegistryScopeOrganization, FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
}
|
||||
return []string{FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
case FabricRegistryScopeFarm:
|
||||
return []string{FabricRegistryScopeFarm}
|
||||
case FabricRegistryScopeCluster, "":
|
||||
return []string{FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
default:
|
||||
return []string{scope, FabricRegistryScopeCluster, FabricRegistryScopeFarm}
|
||||
}
|
||||
}
|
||||
|
||||
func selectFabricRegistryEndpoints(endpoints []FabricRegistryEndpoint, preferredRegion string) []FabricRegistryEndpoint {
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
out := make([]FabricRegistryEndpoint, 0, len(endpoints))
|
||||
for _, endpoint := range endpoints {
|
||||
if strings.TrimSpace(endpoint.Address) == "" || !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
continue
|
||||
}
|
||||
out = append(out, endpoint)
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
if preferredRegion != "" {
|
||||
iMatch := strings.EqualFold(out[i].Region, preferredRegion)
|
||||
jMatch := strings.EqualFold(out[j].Region, preferredRegion)
|
||||
if iMatch != jMatch {
|
||||
return iMatch
|
||||
}
|
||||
}
|
||||
if out[i].Priority != out[j].Priority {
|
||||
return out[i].Priority < out[j].Priority
|
||||
}
|
||||
if out[i].Weight != out[j].Weight {
|
||||
return out[i].Weight > out[j].Weight
|
||||
}
|
||||
return out[i].EndpointID < out[j].EndpointID
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
func probeFabricRegistryEndpoint(ctx context.Context, transport FabricTransport, endpoint FabricRegistryEndpoint, timeout time.Duration) (time.Duration, error) {
|
||||
if transport == nil {
|
||||
return 0, fmt.Errorf("fabric registry live probe transport is unavailable")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 2 * time.Second
|
||||
}
|
||||
target := FabricTransportTarget{
|
||||
EndpointID: endpoint.EndpointID,
|
||||
PeerID: endpoint.EndpointID,
|
||||
Endpoint: endpoint.Address,
|
||||
Transport: endpoint.Transport,
|
||||
PeerCertSHA256: endpoint.PeerCertSHA256,
|
||||
Timeout: timeout,
|
||||
InboundBuffer: 2,
|
||||
ErrorBuffer: 2,
|
||||
}
|
||||
startedAt := time.Now()
|
||||
session, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer session.Close()
|
||||
sequence := uint64(startedAt.UnixNano())
|
||||
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, TrafficClass: fabricproto.TrafficClassReliable, Sequence: sequence, Payload: []byte("fabric-registry-live-probe")}); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric registry live probe session closed")
|
||||
}
|
||||
if frame.Type == fabricproto.FramePong && frame.Sequence == sequence {
|
||||
return time.Since(startedAt), nil
|
||||
}
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric registry live probe error channel closed")
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return 0, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fabricRegistryKey(clusterID, service, scope, organizationID string) string {
|
||||
return strings.TrimSpace(clusterID) + "\x00" + strings.ToLower(strings.TrimSpace(service)) + "\x00" + strings.ToLower(strings.TrimSpace(scope)) + "\x00" + strings.TrimSpace(organizationID)
|
||||
}
|
||||
|
||||
func fabricRegistryRecordNewer(next, current FabricRegistryGossipRecord, now time.Time) bool {
|
||||
if !current.ExpiresAt.After(now) {
|
||||
return true
|
||||
}
|
||||
if next.Epoch != current.Epoch {
|
||||
return next.Epoch > current.Epoch
|
||||
}
|
||||
if !next.IssuedAt.Equal(current.IssuedAt) {
|
||||
return next.IssuedAt.After(current.IssuedAt)
|
||||
}
|
||||
return strings.TrimSpace(next.Generation) > strings.TrimSpace(current.Generation)
|
||||
}
|
||||
|
||||
func registryNow(now time.Time) time.Time {
|
||||
if now.IsZero() {
|
||||
return time.Now().UTC()
|
||||
}
|
||||
return now.UTC()
|
||||
}
|
||||
|
||||
func stringInSlice(value string, values []string) bool {
|
||||
value = strings.TrimSpace(value)
|
||||
for _, candidate := range values {
|
||||
if strings.TrimSpace(candidate) == value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFabricRegistryGossipRecordRequiresTrustedSignature(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
record := testFabricRegistryGossipRecord(now, 10)
|
||||
issuer := FabricRegistryTrustedIssuer{
|
||||
IssuerID: "authority-1",
|
||||
Role: FabricRegistryAuthorityControl,
|
||||
PublicKey: publicKey,
|
||||
Scopes: []string{FabricRegistryScopeCluster},
|
||||
Services: []string{FabricRegistryServiceControlAPI},
|
||||
}
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(signed, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("verify signed record: %v", err)
|
||||
}
|
||||
tampered := signed
|
||||
tampered.Endpoints[0].Address = "quic://10.10.10.10:19443"
|
||||
if _, err := VerifyFabricRegistryGossipRecord(tampered, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("tampered record verified")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryRejectsLegacyEndpointAndExpiredRecord(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
record := testFabricRegistryGossipRecord(now, 10)
|
||||
record.Endpoints[0].Address = "https://control.example.test/api/v1"
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(signed, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{
|
||||
{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey},
|
||||
},
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("legacy HTTP endpoint was accepted")
|
||||
}
|
||||
expired := testFabricRegistryGossipRecord(now.Add(-2*time.Hour), 11)
|
||||
expired.ExpiresAt = now.Add(-time.Minute)
|
||||
expiredSigned, err := SignFabricRegistryGossipRecord(expired, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign expired record: %v", err)
|
||||
}
|
||||
if _, err := VerifyFabricRegistryGossipRecord(expiredSigned, FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{
|
||||
{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey},
|
||||
},
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("expired record was accepted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryKeepsActiveRecordUntilNewerVerified(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
active, err := SignFabricRegistryGossipRecord(testFabricRegistryGossipRecord(now, 10), issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign active: %v", err)
|
||||
}
|
||||
entry, changed, err := registry.ApplyGossipRecord(active, policy, true)
|
||||
if err != nil || !changed || entry.State != FabricRegistryActive {
|
||||
t.Fatalf("apply active entry changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
old := testFabricRegistryGossipRecord(now.Add(time.Minute), 9)
|
||||
old.Endpoints[0].Address = "quic://192.0.2.9:19443"
|
||||
oldSigned, err := SignFabricRegistryGossipRecord(old, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign old: %v", err)
|
||||
}
|
||||
entry, changed, err = registry.ApplyGossipRecord(oldSigned, policy, true)
|
||||
if err != nil {
|
||||
t.Fatalf("apply old: %v", err)
|
||||
}
|
||||
if changed || entry.Record.Epoch != 10 || entry.Record.Endpoints[0].Address != "quic://192.0.2.10:19443" {
|
||||
t.Fatalf("older record replaced active entry: changed=%t entry=%+v", changed, entry)
|
||||
}
|
||||
newer := testFabricRegistryGossipRecord(now.Add(2*time.Minute), 11)
|
||||
newer.Endpoints[0].Address = "quic://192.0.2.11:19443"
|
||||
newerSigned, err := SignFabricRegistryGossipRecord(newer, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign newer: %v", err)
|
||||
}
|
||||
policy.Now = now.Add(2 * time.Minute)
|
||||
entry, changed, err = registry.ApplyGossipRecord(newerSigned, policy, false)
|
||||
if err != nil || !changed || entry.State != FabricRegistryCandidate {
|
||||
t.Fatalf("apply newer candidate changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
activeRecord, ok := registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now)
|
||||
if !ok || activeRecord.Endpoints[0].Address != "quic://192.0.2.10:19443" {
|
||||
t.Fatalf("unverified newer candidate displaced active fallback: ok=%t record=%+v", ok, activeRecord)
|
||||
}
|
||||
if !registry.MarkLiveVerified("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now.Add(time.Second)) {
|
||||
t.Fatal("mark live verified failed")
|
||||
}
|
||||
activeRecord, ok = registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", policy.Now.Add(time.Second))
|
||||
if !ok || activeRecord.Endpoints[0].Address != "quic://192.0.2.11:19443" {
|
||||
t.Fatalf("newer verified record not active: ok=%t record=%+v", ok, activeRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryResolveServicePrefersVerifiedScopedRegionalEndpoint(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
clusterRecord := testFabricRegistryGossipRecord(now, 10)
|
||||
clusterRecord.Endpoints = []FabricRegistryEndpoint{
|
||||
{EndpointID: "control-eu", Address: "quic://eu.example.test:19443", Transport: "direct_quic", Region: "eu", Priority: 10, Weight: 1},
|
||||
{EndpointID: "control-us", Address: "quic://us.example.test:19443", Transport: "direct_quic", Region: "us", Priority: 10, Weight: 10},
|
||||
}
|
||||
signedCluster, err := SignFabricRegistryGossipRecord(clusterRecord, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign cluster record: %v", err)
|
||||
}
|
||||
if _, _, err := registry.ApplyGossipRecord(signedCluster, policy, true); err != nil {
|
||||
t.Fatalf("apply cluster record: %v", err)
|
||||
}
|
||||
orgRecord := testFabricRegistryGossipRecord(now.Add(time.Minute), 11)
|
||||
orgRecord.Scope = FabricRegistryScopeOrganization
|
||||
orgRecord.OrganizationID = "org-1"
|
||||
orgRecord.Endpoints = []FabricRegistryEndpoint{
|
||||
{EndpointID: "control-org", Address: "quic://org.example.test:19443", Transport: "direct_quic", Region: "eu", Priority: 1, Weight: 1},
|
||||
}
|
||||
signedOrg, err := SignFabricRegistryGossipRecord(orgRecord, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign org record: %v", err)
|
||||
}
|
||||
policy.Now = now.Add(time.Minute)
|
||||
if _, _, err := registry.ApplyGossipRecord(signedOrg, policy, false); err != nil {
|
||||
t.Fatalf("apply org candidate: %v", err)
|
||||
}
|
||||
resolved := registry.ResolveService(FabricRegistryResolveRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeOrganization,
|
||||
OrganizationID: "org-1",
|
||||
PreferredRegion: "us",
|
||||
Now: now.Add(time.Minute),
|
||||
})
|
||||
if !resolved.Found || resolved.Scope != FabricRegistryScopeCluster || resolved.Endpoints[0].EndpointID != "control-us" {
|
||||
t.Fatalf("expected cluster fallback with preferred region endpoint, got %+v", resolved)
|
||||
}
|
||||
if !registry.MarkLiveVerified("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeOrganization, "org-1", now.Add(2*time.Minute)) {
|
||||
t.Fatal("mark org live verified failed")
|
||||
}
|
||||
resolved = registry.ResolveService(FabricRegistryResolveRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeOrganization,
|
||||
OrganizationID: "org-1",
|
||||
Now: now.Add(2 * time.Minute),
|
||||
})
|
||||
if !resolved.Found || resolved.Scope != FabricRegistryScopeOrganization || resolved.Endpoints[0].EndpointID != "control-org" {
|
||||
t.Fatalf("expected verified organization record, got %+v", resolved)
|
||||
}
|
||||
snapshot := registry.Snapshot(now.Add(2 * time.Minute))
|
||||
if snapshot.Active != 2 || snapshot.Candidate != 0 {
|
||||
t.Fatalf("unexpected snapshot: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryVerifyCandidatesPromotesAfterQUICPong(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
listener := startQUICFabricEchoServerWithTLS(t, tlsConfig)
|
||||
defer listener.Close()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
issuer := FabricRegistryTrustedIssuer{IssuerID: "authority-1", Role: FabricRegistryAuthorityControl, PublicKey: publicKey}
|
||||
policy := FabricRegistryVerificationPolicy{
|
||||
LocalClusterID: "cluster-1",
|
||||
TrustedIssuers: []FabricRegistryTrustedIssuer{issuer},
|
||||
RequiredSignatures: 1,
|
||||
Now: now,
|
||||
}
|
||||
record := testFabricRegistryGossipRecord(now, 12)
|
||||
record.Endpoints[0].Address = "quic://" + listener.Addr().String()
|
||||
record.Endpoints[0].PeerCertSHA256 = testQUICCertSHA256(t, tlsConfig)
|
||||
signed, err := SignFabricRegistryGossipRecord(record, issuer, privateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("sign record: %v", err)
|
||||
}
|
||||
registry := NewFabricRegistry()
|
||||
if entry, changed, err := registry.ApplyGossipRecord(signed, policy, false); err != nil || !changed || entry.State != FabricRegistryCandidate {
|
||||
t.Fatalf("apply candidate changed=%t entry=%+v err=%v", changed, entry, err)
|
||||
}
|
||||
results := registry.VerifyCandidates(context.Background(), NewQUICFabricTransport(nil), FabricRegistryLiveProbeRequest{
|
||||
ClusterID: "cluster-1",
|
||||
Timeout: 3 * time.Second,
|
||||
Now: now.Add(time.Second),
|
||||
MaxCandidates: 1,
|
||||
})
|
||||
if len(results) != 1 || results[0].Status != "reachable" || !results[0].Promoted {
|
||||
t.Fatalf("unexpected live probe results: %+v", results)
|
||||
}
|
||||
if _, ok := registry.Active("cluster-1", FabricRegistryServiceControlAPI, FabricRegistryScopeCluster, "", now.Add(time.Second)); !ok {
|
||||
t.Fatal("candidate was not promoted to active")
|
||||
}
|
||||
}
|
||||
|
||||
func testFabricRegistryGossipRecord(now time.Time, epoch int64) FabricRegistryGossipRecord {
|
||||
return FabricRegistryGossipRecord{
|
||||
SchemaVersion: FabricRegistryGossipRecordSchema,
|
||||
ClusterID: "cluster-1",
|
||||
Service: FabricRegistryServiceControlAPI,
|
||||
Scope: FabricRegistryScopeCluster,
|
||||
Epoch: epoch,
|
||||
Generation: "gen",
|
||||
IssuedAt: now,
|
||||
ExpiresAt: now.Add(10 * time.Minute),
|
||||
IssuerNodeID: "authority-1",
|
||||
IssuerRole: FabricRegistryAuthorityControl,
|
||||
Endpoints: []FabricRegistryEndpoint{
|
||||
{
|
||||
EndpointID: "control-a",
|
||||
Address: "quic://192.0.2.10:19443",
|
||||
Transport: "direct_quic",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
type ProductionEnvelopeObserver func(context.Context, ProductionEnvelopeObservation) error
|
||||
@@ -55,6 +54,22 @@ type RemoteWorkspaceFrameSinkSessionMailboxConsumerResume interface {
|
||||
type RemoteWorkspaceFrameSinkSessionMailboxPreflight interface {
|
||||
PreflightAdapterSessionMailboxConsumerResume(adapterSessionID string, consumerID string, resumeFrom string, limit int, now time.Time) (RemoteWorkspaceAdapterMailboxPreflightSnapshot, error)
|
||||
}
|
||||
type FabricSessionEventLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
AcceptedBy string `json:"accepted_by,omitempty"`
|
||||
SessionID string `json:"session_id,omitempty"`
|
||||
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
|
||||
StreamID uint64 `json:"stream_id,omitempty"`
|
||||
Sequence uint64 `json:"sequence,omitempty"`
|
||||
TrafficClass fabricproto.TrafficClass `json:"traffic_class,omitempty"`
|
||||
RemoteAddr string `json:"remote_addr,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
}
|
||||
|
||||
type VPNPacketIngress interface {
|
||||
SendClientPacketBatch(ctx context.Context, clusterID string, vpnConnectionID string, packets [][]byte) error
|
||||
ReceiveClientPacketBatch(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration) ([][]byte, error)
|
||||
@@ -69,24 +84,21 @@ type VPNPacketIngressRoutePreference interface {
|
||||
}
|
||||
|
||||
type Server struct {
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
DisableHTTPDataPlane bool
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
FabricSessionEnabled bool
|
||||
FabricSessionWebSocketEnabled bool
|
||||
FabricSessionLogger FabricSessionEventLogger
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
DisableHTTPDataPlane bool
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
}
|
||||
|
||||
func (s Server) Handler() http.Handler {
|
||||
@@ -94,9 +106,6 @@ func (s Server) Handler() http.Handler {
|
||||
mux.HandleFunc("/mesh/v1/health", s.handleHealth)
|
||||
mux.HandleFunc("/mesh/v1/forward", s.handleForward)
|
||||
mux.HandleFunc("/mesh/v1/synthetic/probe", s.handleSyntheticProbe)
|
||||
if s.FabricSessionEnabled && s.FabricSessionWebSocketEnabled {
|
||||
mux.HandleFunc("/mesh/v1/fabric/session/ws", s.handleFabricSessionWebSocket)
|
||||
}
|
||||
if s.RemoteWorkspaceFrameSink != nil {
|
||||
mux.HandleFunc("/mesh/v1/remote-workspace/adapter-sessions/", s.handleRemoteWorkspaceAdapterSessionControl)
|
||||
}
|
||||
@@ -196,185 +205,6 @@ func (s Server) handleRemoteWorkspaceAdapterSessionSnapshot(w http.ResponseWrite
|
||||
_ = json.NewEncoder(w).Encode(snapshotter.SnapshotAdapterSessions(includeTerminal, limit, time.Now().UTC()))
|
||||
}
|
||||
|
||||
type FabricSessionEventLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
AcceptedBy string `json:"accepted_by,omitempty"`
|
||||
SessionID string `json:"session_id,omitempty"`
|
||||
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
|
||||
StreamID uint64 `json:"stream_id,omitempty"`
|
||||
Sequence uint64 `json:"sequence,omitempty"`
|
||||
TrafficClass fabricproto.TrafficClass `json:"traffic_class,omitempty"`
|
||||
RemoteAddr string `json:"remote_addr,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
}
|
||||
|
||||
type fabricSessionAuthorityPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
SessionID string `json:"session_id"`
|
||||
SourceNodeID string `json:"source_node_id,omitempty"`
|
||||
SelectedEntryNodeID string `json:"selected_entry_node_id,omitempty"`
|
||||
TokenHash string `json:"token_hash"`
|
||||
IssuedAt time.Time `json:"issued_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
}
|
||||
|
||||
type fabricSessionAuthDecision struct {
|
||||
AcceptedBy string
|
||||
SessionID string
|
||||
}
|
||||
|
||||
func (s Server) handleFabricSessionWebSocket(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
decision, ok := s.validateFabricSessionRequest(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
upgrader := websocket.Upgrader{
|
||||
CheckOrigin: func(_ *http.Request) bool { return true },
|
||||
}
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_opened",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
loop := fabricproto.TransportLoop{
|
||||
Session: fabricproto.NewSession(fabricproto.SessionConfig{}),
|
||||
OnEvent: func(event fabricproto.SessionEvent) ([]fabricproto.Frame, error) {
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_event",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
SessionEvent: event.Type,
|
||||
StreamID: event.StreamID,
|
||||
Sequence: event.Sequence,
|
||||
TrafficClass: event.TrafficClass,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
return nil, nil
|
||||
},
|
||||
}
|
||||
err = loop.RunWebSocket(r.Context(), conn, fabricproto.WebSocketTransportConfig{})
|
||||
if err != nil && !errors.Is(err, context.Canceled) {
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_closed",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
Reason: err.Error(),
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
return
|
||||
}
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_websocket_closed",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
NodeID: s.Local.NodeID,
|
||||
AcceptedBy: decision.AcceptedBy,
|
||||
SessionID: decision.SessionID,
|
||||
RemoteAddr: r.RemoteAddr,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
func (s Server) validateFabricSessionRequest(w http.ResponseWriter, r *http.Request) (fabricSessionAuthDecision, bool) {
|
||||
var decision fabricSessionAuthDecision
|
||||
token := fabricSessionBearerToken(r)
|
||||
if !strings.HasPrefix(token, "rap_fsn_") {
|
||||
http.Error(w, "fabric session token is required", http.StatusUnauthorized)
|
||||
return decision, false
|
||||
}
|
||||
payload, err := s.verifyFabricSessionAuthority(r, token)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusForbidden)
|
||||
return decision, false
|
||||
}
|
||||
decision.AcceptedBy = "legacy_unsigned"
|
||||
if payload != nil {
|
||||
decision.AcceptedBy = "signed"
|
||||
decision.SessionID = strings.TrimSpace(payload.SessionID)
|
||||
}
|
||||
return decision, true
|
||||
}
|
||||
|
||||
func (s Server) verifyFabricSessionAuthority(r *http.Request, token string) (*fabricSessionAuthorityPayload, error) {
|
||||
publicKey := strings.TrimSpace(s.ClusterAuthorityPublicKey)
|
||||
payloadHeader := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Authority-Payload"))
|
||||
signatureHeader := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Authority-Signature"))
|
||||
if payloadHeader == "" && signatureHeader == "" {
|
||||
if publicKey != "" {
|
||||
return nil, fmt.Errorf("%w: signed fabric session authority is required", ErrUnauthorizedChannel)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
if publicKey == "" {
|
||||
return nil, ErrUnauthorizedChannel
|
||||
}
|
||||
if payloadHeader == "" || signatureHeader == "" {
|
||||
return nil, fmt.Errorf("%w: fabric session authority payload and signature are required together", ErrUnauthorizedChannel)
|
||||
}
|
||||
payloadRaw, err := decodeHeaderJSON(payloadHeader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority payload", ErrUnauthorizedChannel)
|
||||
}
|
||||
signatureRaw, err := decodeHeaderJSON(signatureHeader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority signature", ErrUnauthorizedChannel)
|
||||
}
|
||||
var signature authority.Signature
|
||||
if err := json.Unmarshal(signatureRaw, &signature); err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority signature", ErrUnauthorizedChannel)
|
||||
}
|
||||
if err := authority.VerifyRaw(publicKey, payloadRaw, signature); err != nil {
|
||||
return nil, fmt.Errorf("%w: fabric session authority signature rejected", ErrUnauthorizedChannel)
|
||||
}
|
||||
var payload fabricSessionAuthorityPayload
|
||||
if err := json.Unmarshal(payloadRaw, &payload); err != nil {
|
||||
return nil, fmt.Errorf("%w: invalid fabric session authority payload", ErrUnauthorizedChannel)
|
||||
}
|
||||
if payload.SchemaVersion != "rap.fabric_session_authority.v1" ||
|
||||
payload.ClusterID != s.Local.ClusterID ||
|
||||
payload.TokenHash != fabricSessionTokenHash(token) ||
|
||||
strings.TrimSpace(payload.SessionID) == "" {
|
||||
return nil, fmt.Errorf("%w: fabric session authority payload mismatch", ErrUnauthorizedChannel)
|
||||
}
|
||||
if payload.SelectedEntryNodeID != "" && s.Local.NodeID != "" && payload.SelectedEntryNodeID != s.Local.NodeID {
|
||||
return nil, fmt.Errorf("%w: fabric session entry node mismatch", ErrUnauthorizedChannel)
|
||||
}
|
||||
if !payload.ExpiresAt.IsZero() && !payload.ExpiresAt.After(time.Now().UTC()) {
|
||||
return nil, fmt.Errorf("%w: fabric session lease expired", ErrUnauthorizedChannel)
|
||||
}
|
||||
return &payload, nil
|
||||
}
|
||||
|
||||
func (s Server) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||
if s.FabricSessionLogger != nil {
|
||||
s.FabricSessionLogger(entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleRemoteWorkspaceAdapterSessionMailbox(w http.ResponseWriter, r *http.Request) {
|
||||
reader, ok := s.RemoteWorkspaceFrameSink.(RemoteWorkspaceFrameSinkSessionMailbox)
|
||||
if !ok {
|
||||
@@ -711,15 +541,15 @@ func parseRemoteWorkspaceAdapterSessionControlPath(path string) (string, bool) {
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if clusterID, vpnConnectionID, ok := parseVPNClientPacketWebSocketPath(r.URL.Path); ok {
|
||||
s.handleVPNPacketWebSocket(w, r, clusterID, "", vpnConnectionID, false, true, "")
|
||||
if isVPNClientPacketWebSocketPath(r.URL.Path) {
|
||||
http.Error(w, "legacy VPN WebSocket dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
clusterID, vpnConnectionID, ok := parseVPNClientPacketPath(r.URL.Path)
|
||||
if !ok {
|
||||
if _, _, ok := parseVPNClientPacketPath(r.URL.Path); !ok {
|
||||
return false
|
||||
}
|
||||
return s.handleVPNPacketHTTP(w, r, clusterID, "", vpnConnectionID, "", false, true, "")
|
||||
http.Error(w, "legacy VPN HTTP dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
@@ -728,7 +558,7 @@ func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.Response
|
||||
return false
|
||||
}
|
||||
if webSocket {
|
||||
http.Error(w, "remote workspace service-channel websocket forwarding is not implemented", http.StatusNotImplemented)
|
||||
http.Error(w, "remote workspace service-channel websocket ingress is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
decision, valid := s.validateFabricServiceChannelRequest(w, r, clusterID, channelID, resourceID, FabricServiceClassRemoteWorkspace, channelClass)
|
||||
@@ -809,7 +639,7 @@ func (s Server) handleFabricServiceChannelRemoteWorkspaceIngress(w http.Response
|
||||
"channel_id": channelID,
|
||||
"resource_id": resourceID,
|
||||
"data_plane": "validated",
|
||||
"payload_flow": "not_implemented",
|
||||
"payload_flow": "validated_only",
|
||||
})
|
||||
return true
|
||||
}
|
||||
@@ -898,7 +728,7 @@ func validateRemoteWorkspaceFrameBatchProbe(payload []byte, requiredChannelClass
|
||||
return decoded, fmt.Errorf("unsupported remote workspace frame batch schema")
|
||||
}
|
||||
if !decoded.ProbeOnly {
|
||||
return decoded, fmt.Errorf("remote workspace payload forwarding is not implemented")
|
||||
return decoded, fmt.Errorf("remote workspace production payload forwarding is disabled; probe_only required")
|
||||
}
|
||||
if strings.TrimSpace(strings.ToLower(decoded.ServiceClass)) != FabricServiceClassRemoteWorkspace {
|
||||
return decoded, fmt.Errorf("remote workspace frame batch service class mismatch")
|
||||
@@ -952,438 +782,6 @@ func isAllowedRemoteWorkspaceAdapterFrameDirection(channel string, direction str
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if clusterID, channelID, vpnConnectionID, ok := parseFabricServiceChannelVPNPacketWebSocketPath(r.URL.Path); ok {
|
||||
decision, valid := s.validateFabricServiceChannelVPNRequest(w, r, clusterID, channelID, vpnConnectionID)
|
||||
if !valid {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelAccess(r, clusterID, channelID, vpnConnectionID, decision)
|
||||
s.preferVPNPacketIngressRoute(decision.PreferredRouteID)
|
||||
s.handleVPNPacketWebSocket(w, r, clusterID, channelID, vpnConnectionID, decision.ForceBackendFallback, decision.BackendFallbackAllowed(), decision.BackendRelayPolicy)
|
||||
return true
|
||||
}
|
||||
clusterID, channelID, vpnConnectionID, ok := parseFabricServiceChannelVPNPacketPath(r.URL.Path)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
decision, valid := s.validateFabricServiceChannelVPNRequest(w, r, clusterID, channelID, vpnConnectionID)
|
||||
if !valid {
|
||||
return true
|
||||
}
|
||||
w.Header().Set("X-RAP-Service-Channel-Accepted-By", decision.AcceptedBy)
|
||||
s.logFabricServiceChannelAccess(r, clusterID, channelID, vpnConnectionID, decision)
|
||||
s.preferVPNPacketIngressRoute(decision.PreferredRouteID)
|
||||
backendPath := "/api/v1/clusters/" + clusterID + "/vpn-connections/" + vpnConnectionID + "/tunnel/client/packets"
|
||||
return s.handleVPNPacketHTTP(w, r, clusterID, channelID, vpnConnectionID, backendPath, decision.ForceBackendFallback, decision.BackendFallbackAllowed(), decision.BackendRelayPolicy)
|
||||
}
|
||||
|
||||
func (s Server) preferVPNPacketIngressRoute(routeID string) {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if routeID == "" || s.VPNPacketIngress == nil {
|
||||
return
|
||||
}
|
||||
if preferred, ok := s.VPNPacketIngress.(VPNPacketIngressRoutePreference); ok {
|
||||
preferred.PreferClientRoute(routeID)
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketHTTP(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string, backendFallbackPath string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) bool {
|
||||
switch r.Method {
|
||||
case http.MethodPost:
|
||||
body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, MaxProductionVPNPacketPayloadBytes))
|
||||
if err != nil {
|
||||
http.Error(w, "invalid vpn packet payload", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
if r.URL.Query().Get("batch") != "true" && len(body) == 0 {
|
||||
http.Error(w, "empty vpn packet payload", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
packets := [][]byte{body}
|
||||
if r.URL.Query().Get("batch") == "true" {
|
||||
packets, err = decodeVPNIngressPacketBatch(body)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid vpn packet batch", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
http.Error(w, "empty vpn packet batch", http.StatusBadRequest)
|
||||
return true
|
||||
}
|
||||
if forceBackendFallback {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, body, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
http.Error(w, ErrRouteNotFound.Error(), vpnIngressStatusCode(ErrRouteNotFound))
|
||||
return true
|
||||
}
|
||||
trafficClass := inferVPNPacketTrafficClass(r.Header.Get("X-RAP-Traffic-Class"), packets)
|
||||
var sendErr error
|
||||
if classIngress, ok := s.VPNPacketIngress.(VPNPacketIngressTrafficClass); ok {
|
||||
sendErr = classIngress.SendClientPacketBatchWithTrafficClass(r.Context(), clusterID, vpnConnectionID, trafficClass, packets)
|
||||
} else {
|
||||
sendErr = s.VPNPacketIngress.SendClientPacketBatch(r.Context(), clusterID, vpnConnectionID, packets)
|
||||
}
|
||||
if sendErr != nil {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, body, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_send_failed_backend_fallback_blocked", sendErr.Error())
|
||||
http.Error(w, sendErr.Error(), vpnIngressStatusCode(sendErr))
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusAccepted)
|
||||
return true
|
||||
case http.MethodGet:
|
||||
if forceBackendFallback {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, nil, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
s.logFabricServiceChannelViolation(r, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
timeout := vpnIngressTimeout(r)
|
||||
packets, err := s.VPNPacketIngress.ReceiveClientPacketBatch(r.Context(), clusterID, vpnConnectionID, timeout)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), vpnIngressStatusCode(err))
|
||||
return true
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
if backendFallbackAllowed && s.proxyVPNPacketIngressToBackendPath(w, r, nil, backendFallbackPath) {
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
if r.URL.Query().Get("batch") == "true" {
|
||||
w.Header().Set("Content-Type", "application/vnd.rap.vpn-packet-batch.v1")
|
||||
_, _ = w.Write(encodeVPNIngressPacketBatch(packets))
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/octet-stream")
|
||||
_, _ = w.Write(packets[0])
|
||||
return true
|
||||
default:
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) handleVPNPacketWebSocket(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if s.VPNPacketIngress == nil {
|
||||
http.Error(w, ErrForwardRuntimeUnavailable.Error(), http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
upgrader := websocket.Upgrader{
|
||||
CheckOrigin: func(_ *http.Request) bool { return true },
|
||||
}
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
conn.SetReadLimit(MaxProductionVPNPacketPayloadBytes)
|
||||
|
||||
ctx, cancel := context.WithCancel(r.Context())
|
||||
defer cancel()
|
||||
trafficClass := r.Header.Get("X-RAP-Traffic-Class")
|
||||
errCh := make(chan error, 2)
|
||||
go func() {
|
||||
errCh <- s.readVPNPacketWebSocket(ctx, conn, clusterID, channelID, vpnConnectionID, trafficClass, forceBackendFallback, backendFallbackAllowed, backendRelayPolicy)
|
||||
}()
|
||||
go func() {
|
||||
errCh <- s.writeVPNPacketWebSocket(ctx, conn, clusterID, channelID, vpnConnectionID, forceBackendFallback, backendFallbackAllowed, backendRelayPolicy)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-errCh:
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) readVPNPacketWebSocket(ctx context.Context, conn *websocket.Conn, clusterID string, channelID string, vpnConnectionID string, trafficClass string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) error {
|
||||
for {
|
||||
messageType, payload, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if messageType != websocket.BinaryMessage {
|
||||
continue
|
||||
}
|
||||
packets, err := decodeVPNIngressPacketBatch(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
if len(packets) == 0 {
|
||||
continue
|
||||
}
|
||||
if forceBackendFallback {
|
||||
if !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
if proxyErr := s.backendVPNPacketPost(ctx, clusterID, vpnConnectionID, payload); proxyErr != nil {
|
||||
return proxyErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
sendErr := s.sendVPNPacketWebSocketBatch(ctx, clusterID, vpnConnectionID, inferVPNPacketTrafficClass(trafficClass, packets), packets, !backendFallbackAllowed)
|
||||
if sendErr != nil {
|
||||
if !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_send_failed_backend_fallback_blocked", sendErr.Error())
|
||||
if isRetryableVPNPacketIngressError(sendErr) {
|
||||
continue
|
||||
}
|
||||
return sendErr
|
||||
}
|
||||
if proxyErr := s.backendVPNPacketPost(ctx, clusterID, vpnConnectionID, payload); proxyErr != nil {
|
||||
return sendErr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) sendVPNPacketWebSocketBatch(ctx context.Context, clusterID string, vpnConnectionID string, trafficClass string, packets [][]byte, retryRouteErrors bool) error {
|
||||
const maxAttempts = 6
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
var sendErr error
|
||||
if classIngress, ok := s.VPNPacketIngress.(VPNPacketIngressTrafficClass); ok {
|
||||
sendErr = classIngress.SendClientPacketBatchWithTrafficClass(ctx, clusterID, vpnConnectionID, trafficClass, packets)
|
||||
} else {
|
||||
sendErr = s.VPNPacketIngress.SendClientPacketBatch(ctx, clusterID, vpnConnectionID, packets)
|
||||
}
|
||||
if sendErr == nil {
|
||||
return nil
|
||||
}
|
||||
lastErr = sendErr
|
||||
if !retryRouteErrors || !isRetryableVPNPacketIngressError(sendErr) {
|
||||
return sendErr
|
||||
}
|
||||
timer := time.NewTimer(time.Duration(75+attempt*50) * time.Millisecond)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func isRetryableVPNPacketIngressError(err error) bool {
|
||||
return errors.Is(err, ErrRouteNotFound) ||
|
||||
errors.Is(err, ErrForwardRuntimeUnavailable) ||
|
||||
errors.Is(err, ErrForwardPeerUnavailable) ||
|
||||
errors.Is(err, ErrSyntheticPeerUnavailable)
|
||||
}
|
||||
|
||||
func (s Server) receiveVPNPacketWebSocketBatch(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration, retryRouteErrors bool) ([][]byte, error) {
|
||||
const maxAttempts = 4
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
packets, err := s.VPNPacketIngress.ReceiveClientPacketBatch(ctx, clusterID, vpnConnectionID, timeout)
|
||||
if err == nil {
|
||||
return packets, nil
|
||||
}
|
||||
lastErr = err
|
||||
if !retryRouteErrors || !isRetryableVPNPacketIngressError(err) {
|
||||
return nil, err
|
||||
}
|
||||
timer := time.NewTimer(time.Duration(75+attempt*50) * time.Millisecond)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
if retryRouteErrors && isRetryableVPNPacketIngressError(lastErr) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func (s Server) writeVPNPacketWebSocket(ctx context.Context, conn *websocket.Conn, clusterID string, channelID string, vpnConnectionID string, forceBackendFallback bool, backendFallbackAllowed bool, backendRelayPolicy string) error {
|
||||
lastPing := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
var packets [][]byte
|
||||
var err error
|
||||
if !forceBackendFallback {
|
||||
packets, err = s.receiveVPNPacketWebSocketBatch(ctx, clusterID, vpnConnectionID, 50*time.Millisecond, !backendFallbackAllowed)
|
||||
}
|
||||
if forceBackendFallback && !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "backend_fallback_blocked_by_policy", ErrRouteNotFound.Error())
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
if err != nil && !backendFallbackAllowed {
|
||||
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_receive_failed_backend_fallback_blocked", err.Error())
|
||||
return err
|
||||
}
|
||||
if backendFallbackAllowed && (forceBackendFallback || err != nil || len(packets) == 0) {
|
||||
backendPackets, proxyErr := s.backendVPNPacketGet(ctx, clusterID, vpnConnectionID, 50*time.Millisecond)
|
||||
if proxyErr != nil && err != nil {
|
||||
return err
|
||||
}
|
||||
if len(backendPackets) > 0 {
|
||||
packets = backendPackets
|
||||
}
|
||||
}
|
||||
if len(packets) > 0 {
|
||||
if err := conn.SetWriteDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := conn.WriteMessage(websocket.BinaryMessage, encodeVPNIngressPacketBatch(packets)); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if time.Since(lastPing) >= 15*time.Second {
|
||||
if err := conn.SetWriteDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := conn.WriteMessage(websocket.PingMessage, []byte("rap-vpn")); err != nil {
|
||||
return err
|
||||
}
|
||||
lastPing = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s Server) backendVPNPacketPost(ctx context.Context, clusterID string, vpnConnectionID string, batchPayload []byte) error {
|
||||
target := strings.TrimRight(strings.TrimSpace(s.BackendProxyBaseURL), "/")
|
||||
if target == "" {
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, target+"/clusters/"+clusterID+"/vpn-connections/"+vpnConnectionID+"/tunnel/client/packets?batch=true", bytes.NewReader(batchPayload))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/octet-stream")
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("backend vpn packet post failed: status=%d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s Server) backendVPNPacketGet(ctx context.Context, clusterID string, vpnConnectionID string, timeout time.Duration) ([][]byte, error) {
|
||||
target := strings.TrimRight(strings.TrimSpace(s.BackendProxyBaseURL), "/")
|
||||
if target == "" {
|
||||
return nil, ErrRouteNotFound
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = 50 * time.Millisecond
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target+"/clusters/"+clusterID+"/vpn-connections/"+vpnConnectionID+"/tunnel/client/packets?batch=true&timeout_ms="+strconv.FormatInt(timeout.Milliseconds(), 10), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Accept", "application/vnd.rap.vpn-packet-batch.v1")
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNoContent {
|
||||
return nil, nil
|
||||
}
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("backend vpn packet get failed: status=%d", resp.StatusCode)
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, MaxProductionVPNPacketPayloadBytes))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
return decodeVPNIngressPacketBatch(body)
|
||||
}
|
||||
|
||||
func (s Server) proxyVPNPacketIngressToBackend(w http.ResponseWriter, r *http.Request, body []byte) bool {
|
||||
return s.proxyVPNPacketIngressToBackendPath(w, r, body, "")
|
||||
}
|
||||
|
||||
func (s Server) proxyVPNPacketIngressToBackendPath(w http.ResponseWriter, r *http.Request, body []byte, backendPath string) bool {
|
||||
if strings.TrimSpace(s.BackendProxyBaseURL) == "" {
|
||||
return false
|
||||
}
|
||||
target, err := url.Parse(s.BackendProxyBaseURL)
|
||||
if err != nil || target.Scheme == "" || target.Host == "" {
|
||||
return false
|
||||
}
|
||||
if strings.EqualFold(target.Host, r.Host) {
|
||||
return false
|
||||
}
|
||||
var reader io.Reader
|
||||
if body != nil {
|
||||
reader = bytes.NewReader(body)
|
||||
}
|
||||
requestURI := r.URL.RequestURI()
|
||||
if backendPath != "" {
|
||||
requestURI = backendPath
|
||||
if r.URL.RawQuery != "" {
|
||||
requestURI += "?" + r.URL.RawQuery
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(r.Context(), r.Method, target.Scheme+"://"+target.Host+requestURI, reader)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, key := range []string{"Accept", "Content-Type"} {
|
||||
if value := r.Header.Get(key); value != "" {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
req.Header.Set("X-RAP-Entry-Node", s.Local.NodeID)
|
||||
req.Header.Set("X-RAP-Entry-Cluster", s.Local.ClusterID)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
for _, key := range []string{"Content-Type"} {
|
||||
if value := resp.Header.Get(key); value != "" {
|
||||
w.Header().Set(key, value)
|
||||
}
|
||||
}
|
||||
w.WriteHeader(resp.StatusCode)
|
||||
_, _ = io.Copy(w, resp.Body)
|
||||
return true
|
||||
}
|
||||
|
||||
type fabricServiceChannelLeaseAuthorityPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
@@ -1443,10 +841,6 @@ func (d fabricServiceChannelRequestDecision) BackendFallbackAllowed() bool {
|
||||
return strings.TrimSpace(d.BackendRelayPolicy) != "disabled"
|
||||
}
|
||||
|
||||
func (s Server) validateFabricServiceChannelVPNRequest(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, vpnConnectionID string) (fabricServiceChannelRequestDecision, bool) {
|
||||
return s.validateFabricServiceChannelRequest(w, r, clusterID, channelID, vpnConnectionID, FabricServiceClassVPNPackets, ProductionChannelVPNPacket)
|
||||
}
|
||||
|
||||
func (s Server) validateFabricServiceChannelRequest(w http.ResponseWriter, r *http.Request, clusterID string, channelID string, resourceID string, expectedServiceClass string, defaultChannelClass string) (fabricServiceChannelRequestDecision, bool) {
|
||||
var decision fabricServiceChannelRequestDecision
|
||||
expectedServiceClass = strings.TrimSpace(strings.ToLower(expectedServiceClass))
|
||||
@@ -1485,7 +879,7 @@ func (s Server) validateFabricServiceChannelRequest(w http.ResponseWriter, r *ht
|
||||
http.Error(w, err.Error(), http.StatusForbidden)
|
||||
return decision, false
|
||||
}
|
||||
decision.AcceptedBy = "legacy_unsigned"
|
||||
decision.AcceptedBy = "token_authorized"
|
||||
decision.ServiceClass = serviceClass
|
||||
decision.ChannelClass = channelClass
|
||||
if payload != nil && (payload.Status == "degraded_fallback" || payload.PrimaryRoute.Status == "missing_route_intent") {
|
||||
@@ -1571,30 +965,6 @@ func (s Server) logFabricServiceChannelAccess(r *http.Request, clusterID string,
|
||||
s.FabricServiceChannelLogger(entry)
|
||||
}
|
||||
|
||||
func (s Server) logFabricServiceChannelViolation(r *http.Request, clusterID string, channelID string, resourceID string, backendRelayPolicy string, status string, reason string) {
|
||||
if s.FabricServiceChannelLogger == nil || strings.TrimSpace(channelID) == "" {
|
||||
return
|
||||
}
|
||||
entry := FabricServiceChannelAccessLogEntry{
|
||||
Event: "fabric_service_channel_data_plane_violation",
|
||||
ClusterID: clusterID,
|
||||
ChannelID: channelID,
|
||||
ResourceID: resourceID,
|
||||
LocalNodeID: s.Local.NodeID,
|
||||
BackendRelayPolicy: strings.TrimSpace(backendRelayPolicy),
|
||||
ViolationStatus: strings.TrimSpace(status),
|
||||
ViolationReason: strings.TrimSpace(reason),
|
||||
OccurredAt: time.Now().UTC(),
|
||||
}
|
||||
if r != nil {
|
||||
entry.Method = r.Method
|
||||
if r.URL != nil {
|
||||
entry.Path = r.URL.Path
|
||||
}
|
||||
}
|
||||
s.FabricServiceChannelLogger(entry)
|
||||
}
|
||||
|
||||
func (s Server) verifyFabricServiceChannelLeaseAuthority(r *http.Request, clusterID string, channelID string, resourceID string, serviceClass string, channelClass string, token string) (*fabricServiceChannelLeaseAuthorityPayload, error) {
|
||||
publicKey := strings.TrimSpace(s.ClusterAuthorityPublicKey)
|
||||
payloadHeader := strings.TrimSpace(r.Header.Get("X-RAP-Service-Channel-Authority-Payload"))
|
||||
@@ -1657,15 +1027,15 @@ func validateFabricServiceChannelDataPlaneContract(contract fabricServiceChannel
|
||||
}
|
||||
requiredFlowClass = strings.TrimSpace(strings.ToLower(requiredFlowClass))
|
||||
if contract.SchemaVersion != "rap.fabric_service_channel_data_plane.v1" ||
|
||||
contract.WorkingDataTransport != "fabric_service_channel" ||
|
||||
contract.WorkingDataTransport != "fabric_quic_route" ||
|
||||
contract.SteadyStateTransport != "fabric_route" ||
|
||||
(contract.BackendRelayPolicy != "degraded_fallback_only" && contract.BackendRelayPolicy != "disabled") ||
|
||||
contract.BackendRelayPolicy != "disabled" ||
|
||||
!contract.ServiceNeutral ||
|
||||
!contract.ProtocolAgnostic ||
|
||||
contract.LogicalFlowMode != "multi_flow_isolated" {
|
||||
return fmt.Errorf("%w: unsupported service channel data-plane contract", ErrUnauthorizedChannel)
|
||||
}
|
||||
if contract.Mode != "" && contract.Mode != "fabric_primary" && contract.Mode != "degraded_backend_fallback" {
|
||||
if contract.Mode != "" && contract.Mode != "fabric_primary" && contract.Mode != "fabric_quic_only" {
|
||||
return fmt.Errorf("%w: unsupported service channel data-plane mode", ErrUnauthorizedChannel)
|
||||
}
|
||||
if requiredFlowClass != "" && len(contract.RequiredFlowIsolationClasses) > 0 && !containsString(contract.RequiredFlowIsolationClasses, requiredFlowClass) {
|
||||
@@ -1796,29 +1166,6 @@ func fabricServiceChannelBearerToken(r *http.Request) string {
|
||||
return strings.TrimSpace(r.URL.Query().Get("service_channel_token"))
|
||||
}
|
||||
|
||||
func fabricSessionTokenHash(token string) string {
|
||||
sum := sha256.Sum256([]byte(strings.TrimSpace(token)))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func fabricSessionBearerToken(r *http.Request) string {
|
||||
if r == nil {
|
||||
return ""
|
||||
}
|
||||
if token := strings.TrimSpace(r.Header.Get("X-RAP-Fabric-Session-Token")); token != "" {
|
||||
return token
|
||||
}
|
||||
auth := strings.TrimSpace(r.Header.Get("Authorization"))
|
||||
if len(auth) > len("Bearer ") && strings.EqualFold(auth[:len("Bearer ")], "Bearer ") {
|
||||
return strings.TrimSpace(auth[len("Bearer "):])
|
||||
}
|
||||
return strings.TrimSpace(r.URL.Query().Get("fabric_session_token"))
|
||||
}
|
||||
|
||||
func isAllowedFabricServiceVPNChannel(channel string) bool {
|
||||
return isAllowedFabricServiceChannelForClass(FabricServiceClassVPNPackets, channel)
|
||||
}
|
||||
|
||||
func isAllowedFabricServiceChannelForClass(serviceClass string, channel string) bool {
|
||||
serviceClass = strings.TrimSpace(strings.ToLower(serviceClass))
|
||||
channel = strings.TrimSpace(strings.ToLower(channel))
|
||||
@@ -1846,25 +1193,6 @@ func containsString(values []string, target string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelVPNPacketWebSocketPath(path string) (string, string, string, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 11 ||
|
||||
parts[0] != "api" ||
|
||||
parts[1] != "v1" ||
|
||||
parts[2] != "clusters" ||
|
||||
parts[4] != "fabric" ||
|
||||
parts[5] != "service-channels" ||
|
||||
parts[7] != "vpn-connections" ||
|
||||
parts[9] != "packets" ||
|
||||
parts[10] != "ws" {
|
||||
return "", "", "", false
|
||||
}
|
||||
if parts[3] == "" || parts[6] == "" || parts[8] == "" {
|
||||
return "", "", "", false
|
||||
}
|
||||
return parts[3], parts[6], parts[8], true
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelRemoteWorkspacePath(path string) (string, string, string, string, bool, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) == 11 &&
|
||||
@@ -1897,6 +1225,34 @@ func parseFabricServiceChannelRemoteWorkspacePath(path string) (string, string,
|
||||
return parts[3], parts[6], parts[8], strings.TrimSpace(strings.ToLower(parts[10])), false, true
|
||||
}
|
||||
|
||||
func (s Server) handleFabricServiceChannelVPNPacketIngress(w http.ResponseWriter, r *http.Request) bool {
|
||||
if isFabricServiceChannelVPNPacketWebSocketPath(r.URL.Path) {
|
||||
http.Error(w, "fabric service-channel WebSocket dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
if _, _, _, ok := parseFabricServiceChannelVPNPacketPath(r.URL.Path); !ok {
|
||||
return false
|
||||
}
|
||||
http.Error(w, "fabric service-channel HTTP dataplane is removed; use QUIC fabric route", http.StatusGone)
|
||||
return true
|
||||
}
|
||||
|
||||
func isFabricServiceChannelVPNPacketWebSocketPath(path string) bool {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 11 ||
|
||||
parts[0] != "api" ||
|
||||
parts[1] != "v1" ||
|
||||
parts[2] != "clusters" ||
|
||||
parts[4] != "fabric" ||
|
||||
parts[5] != "service-channels" ||
|
||||
parts[7] != "vpn-connections" ||
|
||||
parts[9] != "packets" ||
|
||||
parts[10] != "ws" {
|
||||
return false
|
||||
}
|
||||
return parts[3] != "" && parts[6] != "" && parts[8] != ""
|
||||
}
|
||||
|
||||
func parseFabricServiceChannelVPNPacketPath(path string) (string, string, string, bool) {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 10 ||
|
||||
@@ -1915,7 +1271,7 @@ func parseFabricServiceChannelVPNPacketPath(path string) (string, string, string
|
||||
return parts[3], parts[6], parts[8], true
|
||||
}
|
||||
|
||||
func parseVPNClientPacketWebSocketPath(path string) (string, string, bool) {
|
||||
func isVPNClientPacketWebSocketPath(path string) bool {
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) != 10 ||
|
||||
parts[0] != "api" ||
|
||||
@@ -1926,12 +1282,9 @@ func parseVPNClientPacketWebSocketPath(path string) (string, string, bool) {
|
||||
parts[7] != "client" ||
|
||||
parts[8] != "packets" ||
|
||||
parts[9] != "ws" {
|
||||
return "", "", false
|
||||
return false
|
||||
}
|
||||
if parts[3] == "" || parts[5] == "" {
|
||||
return "", "", false
|
||||
}
|
||||
return parts[3], parts[5], true
|
||||
return parts[3] != "" && parts[5] != ""
|
||||
}
|
||||
|
||||
func parseVPNClientPacketPath(path string) (string, string, bool) {
|
||||
@@ -1952,28 +1305,6 @@ func parseVPNClientPacketPath(path string) (string, string, bool) {
|
||||
return parts[3], parts[5], true
|
||||
}
|
||||
|
||||
func vpnIngressTimeout(r *http.Request) time.Duration {
|
||||
timeoutMs, _ := strconv.Atoi(r.URL.Query().Get("timeout_ms"))
|
||||
if timeoutMs <= 0 {
|
||||
timeoutMs = 25000
|
||||
}
|
||||
if timeoutMs > 30000 {
|
||||
timeoutMs = 30000
|
||||
}
|
||||
return time.Duration(timeoutMs) * time.Millisecond
|
||||
}
|
||||
|
||||
func vpnIngressStatusCode(err error) int {
|
||||
switch err {
|
||||
case ErrForwardRuntimeUnavailable, ErrRouteNotFound, ErrForwardPeerUnavailable:
|
||||
return http.StatusServiceUnavailable
|
||||
case ErrUnauthorizedChannel, ErrClusterMismatch, ErrNodeMismatch:
|
||||
return http.StatusForbidden
|
||||
default:
|
||||
return http.StatusBadGateway
|
||||
}
|
||||
}
|
||||
|
||||
func encodeVPNIngressPacketBatch(packets [][]byte) []byte {
|
||||
packets = cleanVPNIngressPacketBatch(packets)
|
||||
total := 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,49 @@
|
||||
package fabricvpn
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLiveFabricControlRequest(t *testing.T) {
|
||||
cfg := strings.TrimSpace(os.Getenv("RAP_LIVE_FABRIC_CONTROL_CONFIG"))
|
||||
if cfg == "" {
|
||||
t.Skip("set RAP_LIVE_FABRIC_CONTROL_CONFIG to run live fabric control test")
|
||||
}
|
||||
path := strings.TrimSpace(os.Getenv("RAP_LIVE_FABRIC_CONTROL_PATH"))
|
||||
if path == "" {
|
||||
path = "/organizations/?user_id=3fded8a8-f19b-4974-919f-44d34ac5f63d"
|
||||
}
|
||||
method := strings.TrimSpace(os.Getenv("RAP_LIVE_FABRIC_CONTROL_METHOD"))
|
||||
if method == "" {
|
||||
method = "GET"
|
||||
}
|
||||
body := strings.TrimSpace(os.Getenv("RAP_LIVE_FABRIC_CONTROL_BODY"))
|
||||
manager := NewManager()
|
||||
if err := manager.Start(cfg); err != nil {
|
||||
t.Fatalf("start manager: %v", err)
|
||||
}
|
||||
defer manager.Stop()
|
||||
request := map[string]any{"method": method, "path": path}
|
||||
if body != "" {
|
||||
var raw json.RawMessage
|
||||
if err := json.Unmarshal([]byte(body), &raw); err != nil {
|
||||
t.Fatalf("invalid request body: %v", err)
|
||||
}
|
||||
request["body"] = raw
|
||||
}
|
||||
payload, err := json.Marshal(request)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
response, err := manager.ControlRequest(string(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("control request failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(response, "status_code") {
|
||||
t.Fatalf("unexpected control response: %s", response)
|
||||
}
|
||||
t.Log(response)
|
||||
}
|
||||
@@ -243,7 +243,7 @@ func (m *Manager) connect(ctx context.Context, cfg runtimeConfig, cancel context
|
||||
if lastErr == nil {
|
||||
lastErr = fmt.Errorf("no QUIC exit endpoints available")
|
||||
}
|
||||
return lastErr
|
||||
return fmt.Errorf("fabric bootstrap failed after %d endpoint candidates: %w", len(cfg.Endpoints), lastErr)
|
||||
}
|
||||
|
||||
func (m *Manager) protectedQUICDialer() func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error) {
|
||||
@@ -447,11 +447,17 @@ func (m *Manager) ControlRequest(payloadJSON string) (string, error) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case err := <-session.Errors():
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return "", fmt.Errorf("fabric control error stream closed")
|
||||
}
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
case frame := <-session.Frames():
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return "", fmt.Errorf("fabric control stream closed")
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != mesh.FabricControlForwardQUICStreamID {
|
||||
continue
|
||||
}
|
||||
@@ -460,7 +466,7 @@ func (m *Manager) ControlRequest(payloadJSON string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
if response.Error != "" {
|
||||
return "", fmt.Errorf(response.Error)
|
||||
return "", fmt.Errorf("%s", response.Error)
|
||||
}
|
||||
return string(response.Payload), nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user