Refactor RDP proxy handling and update related tests

This commit is contained in:
2026-05-17 20:38:35 +03:00
parent 8e9402580f
commit d551e57fd5
172 changed files with 22117 additions and 2509 deletions
@@ -0,0 +1,17 @@
FROM golang:1.25-bookworm AS build
WORKDIR /src
COPY agents/rap-node-agent/go.mod ./
COPY agents/rap-node-agent/go.sum ./
RUN go mod download
COPY agents/rap-node-agent/ ./
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/fabric-loadtest ./cmd/fabric-loadtest
FROM debian:bookworm-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates iproute2 iptables iputils-ping procps \
&& rm -rf /var/lib/apt/lists/*
COPY --from=build /out/fabric-loadtest /usr/local/bin/fabric-loadtest
ENTRYPOINT ["/usr/local/bin/fabric-loadtest"]
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,760 @@
package main
import (
"bytes"
"context"
"strings"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
)
func TestRouteModeCoverageVerdictRequiresMixedModes(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
TopologyProfile: "mixed-public-nat-lan-relay",
Targets: []string{"a", "b", "c", "d"},
FailTarget: -1,
ImpairTarget: -1,
},
SuccessfulStreams: 4,
TargetStats: map[string]targetStats{
"a": {RouteModes: map[string]int{string(mesh.FabricRouteLAN): 1}},
"b": {RouteModes: map[string]int{string(mesh.FabricRouteICE): 1}},
"c": {RouteModes: map[string]int{string(mesh.FabricRouteReverse): 1}},
"d": {RouteModes: map[string]int{}},
},
}
reasons := routeModeCoverageVerdictReasons(report)
if len(reasons) != 1 || !strings.Contains(reasons[0], string(mesh.FabricRouteRelay)) {
t.Fatalf("reasons = %v, want missing relay route mode", reasons)
}
report.TargetStats["d"] = targetStats{RouteModes: map[string]int{string(mesh.FabricRouteRelay): 1}}
if reasons := routeModeCoverageVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want full coverage pass", reasons)
}
}
func TestLegacyRouteModeVerdictRejectsNonQUICModes(t *testing.T) {
report := loadtestReport{
TargetStats: map[string]targetStats{
"a": {RouteModes: map[string]int{
"direct_quic": 4,
"relay": 1,
"outbound_reverse": 2,
"wss": 3,
}},
},
}
reasons := legacyRouteModeVerdictReasons(report)
if len(reasons) != 1 ||
!strings.Contains(reasons[0], "relay:1") ||
!strings.Contains(reasons[0], "outbound_reverse:2") ||
!strings.Contains(reasons[0], "wss:3") {
t.Fatalf("reasons = %v, want legacy route mode failure", reasons)
}
report.TargetStats["a"] = targetStats{RouteModes: map[string]int{
string(mesh.FabricRouteDirect): 1,
string(mesh.FabricRouteLAN): 1,
string(mesh.FabricRouteICE): 1,
string(mesh.FabricRouteReverse): 1,
string(mesh.FabricRouteRelay): 1,
}}
if reasons := legacyRouteModeVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want QUIC modes accepted", reasons)
}
}
func TestTargetEndpointPolicyVerdictRejectsNonQUICTargets(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{
"quic://a:19443",
"http://b:19443",
"ws://c:19443",
"d:19443",
"",
},
},
}
reasons := targetEndpointPolicyVerdictReasons(report)
if len(reasons) != 1 ||
!strings.Contains(reasons[0], "http://b:19443") ||
!strings.Contains(reasons[0], "ws://c:19443") ||
!strings.Contains(reasons[0], "d:19443") ||
!strings.Contains(reasons[0], "<empty>") {
t.Fatalf("reasons = %v, want non-QUIC target failure", reasons)
}
report.Config.Targets = []string{"quic://a:19443", " QUIC://b:19443 "}
if reasons := targetEndpointPolicyVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want QUIC targets accepted", reasons)
}
}
func TestRunClientRejectsNonQUICTargetBeforeDial(t *testing.T) {
_, err := runClient(context.Background(), loadtestConfig{
Targets: []string{"http://127.0.0.1:19443"},
Streams: 1,
Concurrency: 1,
BytesPerStream: 1,
PayloadSize: 1,
})
if err == nil || !strings.Contains(err.Error(), "non_quic_targets=http://127.0.0.1:19443") {
t.Fatalf("err = %v, want non-QUIC target validation error", err)
}
}
func TestFillLoadtestPayloadVariesByStreamAndSequence(t *testing.T) {
first := make([]byte, 128)
second := make([]byte, 128)
third := make([]byte, 128)
fillLoadtestPayload(first, 7, 9, 1, 0)
fillLoadtestPayload(second, 7, 9, 2, int64(len(first)))
fillLoadtestPayload(third, 8, 10, 1, 0)
if bytes.Equal(first, second) {
t.Fatal("payload did not vary by sequence/offset")
}
if bytes.Equal(first, third) {
t.Fatal("payload did not vary by stream")
}
if bytes.Count(first, []byte{first[0]}) == len(first) {
t.Fatal("payload collapsed to a constant byte")
}
}
func TestFillLoadtestPayloadIsDeterministic(t *testing.T) {
first := make([]byte, 128)
second := make([]byte, 128)
fillLoadtestPayload(first, 7, 9, 1, 0)
fillLoadtestPayload(second, 7, 9, 1, 0)
if !bytes.Equal(first, second) {
t.Fatal("payload is not deterministic")
}
}
func TestFillLoadtestPayloadHandlesShortFinalChunk(t *testing.T) {
chunk := make([]byte, 17)
fillLoadtestPayload(chunk, 7, 9, 3, 256)
if bytes.Equal(chunk, make([]byte, len(chunk))) {
t.Fatal("short payload chunk stayed zeroed")
}
}
func TestVerdictFailsSuccessfulStreamAckMismatch(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 2,
AcksReceived: 1,
AckMismatchedStreams: 1,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
found := false
for _, reason := range reasons {
if reason == "ack_mismatched_streams=1" {
found = true
}
}
if !found {
t.Fatalf("reasons = %v, want ack mismatch reason", reasons)
}
}
func TestVerdictFailsAckIntegrityError(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
},
TotalStreams: 1,
FailedStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
AckIntegrityErrors: 1,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
found := false
for _, reason := range reasons {
if reason == "ack_integrity_errors=1" {
found = true
}
}
if !found {
t.Fatalf("reasons = %v, want ack integrity reason", reasons)
}
}
func TestVerdictFailsBelowMinimumThroughput(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
MinThroughputMbps: 100,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
ThroughputBps: 99 * 1000 * 1000,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
found := false
for _, reason := range reasons {
if strings.HasPrefix(reason, "throughput_bps=") {
found = true
}
}
if !found {
t.Fatalf("reasons = %v, want throughput reason", reasons)
}
report.ThroughputBps = 100 * 1000 * 1000
if gotVerdict, reasons := verdict(report); gotVerdict != "pass" {
t.Fatalf("verdict = %q reasons=%v, want pass at threshold", gotVerdict, reasons)
}
}
func TestVerdictFailsBelowMinimumChannelChurn(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
MinChannelChurn: 1000,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
ChannelOpens: 1,
ChannelCloses: 1,
ChannelChurnPerSec: 999,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
found := false
for _, reason := range reasons {
if strings.HasPrefix(reason, "channel_churn_per_sec=") {
found = true
}
}
if !found {
t.Fatalf("reasons = %v, want channel churn reason", reasons)
}
report.ChannelChurnPerSec = 1000
if gotVerdict, reasons := verdict(report); gotVerdict != "pass" {
t.Fatalf("verdict = %q reasons=%v, want pass at threshold", gotVerdict, reasons)
}
}
func TestTargetByteDistributionVerdictDetectsSkew(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"a", "b", "c", "d"},
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
BytesPerStream: 100,
},
SuccessfulStreams: 40,
BytesSent: 4000,
TargetStreams: map[string]int{
"a": 10,
"b": 10,
"c": 10,
"d": 10,
},
TargetBytes: map[string]int64{
"a": 2500,
"b": 500,
"c": 500,
"d": 500,
},
}
reasons := targetByteDistributionVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_byte_distribution_skew=") {
t.Fatalf("reasons = %v, want byte skew reason", reasons)
}
report.TargetBytes = map[string]int64{
"a": 1000,
"b": 1000,
"c": 1000,
"d": 1000,
}
if reasons := targetByteDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want balanced bytes pass", reasons)
}
}
func TestDistributionVerdictChecksSurvivingTargetsAfterFailure(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"quic://a:1", "quic://b:1", "quic://c:1", "quic://d:1"},
FailTarget: 0,
ImpairTarget: -1,
Concurrency: 8,
},
SuccessfulStreams: 90,
TargetStreams: map[string]int{
"quic://b:1": 90,
},
}
reasons := targetDistributionVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_distribution_collapsed=1/3_targets_used") {
t.Fatalf("reasons = %v, want surviving-target collapse", reasons)
}
report.TargetStreams = map[string]int{
"quic://b:1": 30,
"quic://c:1": 30,
"quic://d:1": 30,
}
if reasons := targetDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want balanced surviving targets pass", reasons)
}
}
func TestRoutePressureVerdictChecksSurvivingTargetsAfterFailure(t *testing.T) {
targets := []string{"quic://a:1", "quic://b:1", "quic://c:1", "quic://d:1"}
report := loadtestReport{
Config: loadtestConfig{
Targets: targets,
FailTarget: 0,
ImpairTarget: -1,
Concurrency: 12,
},
RoutePressure: mesh.FabricRoutePressureSnapshot{
MaxActive: map[string]int{
loadtestRouteID(1, targets[1]): 12,
},
MaxActiveTotal: 12,
},
}
reasons := routePressureDistributionVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_collapsed=1/3_targets_used") {
t.Fatalf("reasons = %v, want surviving-route-pressure collapse", reasons)
}
report.RoutePressure.MaxActive = map[string]int{
loadtestRouteID(1, targets[1]): 4,
loadtestRouteID(2, targets[2]): 4,
loadtestRouteID(3, targets[3]): 4,
}
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want balanced surviving route pressure pass", reasons)
}
}
func TestVerdictFailsOverallAckLatencySLO(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
MaxAckP95Ms: 10,
MaxAckP99Ms: 20,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
AckP95Ms: 11,
AckP99Ms: 21,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
foundP95 := false
foundP99 := false
for _, reason := range reasons {
if strings.HasPrefix(reason, "ack_p95_ms=") {
foundP95 = true
}
if strings.HasPrefix(reason, "ack_p99_ms=") {
foundP99 = true
}
}
if !foundP95 || !foundP99 {
t.Fatalf("reasons = %v, want ACK p95 and p99 reasons", reasons)
}
}
func TestTargetAckVerdictDetectsSlowHealthyTarget(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"a", "b"},
FailTarget: -1,
ImpairTarget: -1,
MaxTargetAckMs: 10,
},
TargetStats: map[string]targetStats{
"a": {Streams: 10, MaxAckMs: 4},
"b": {Streams: 10, MaxAckMs: 11},
},
}
reasons := targetAckVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_ack_ms=b:11>10") {
t.Fatalf("reasons = %v, want slow target ack reason", reasons)
}
report.TargetStats["b"] = targetStats{Streams: 10, MaxAckMs: 10}
if reasons := targetAckVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want target ack pass at threshold", reasons)
}
}
func TestVerdictFailsSetupLatencySLO(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
MaxSetupP95Ms: 10,
MaxSetupP99Ms: 20,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
SetupLatencyP95Ms: 11,
SetupLatencyP99Ms: 21,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
foundP95 := false
foundP99 := false
for _, reason := range reasons {
if strings.HasPrefix(reason, "setup_p95_ms=") {
foundP95 = true
}
if strings.HasPrefix(reason, "setup_p99_ms=") {
foundP99 = true
}
}
if !foundP95 || !foundP99 {
t.Fatalf("reasons = %v, want setup p95 and p99 reasons", reasons)
}
}
func TestVerdictFailsRerouteLatencySLO(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 1,
MaxRerouteP95Ms: 10,
MaxRerouteP99Ms: 20,
},
TotalStreams: 1,
SuccessfulStreams: 1,
BytesSent: 1024,
FramesSent: 1,
AcksReceived: 1,
RerouteLatencyP95Ms: 11,
RerouteLatencyP99Ms: 21,
ChannelOpens: 1,
ChannelCloses: 1,
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
}
gotVerdict, reasons := verdict(report)
if gotVerdict != "fail" {
t.Fatalf("verdict = %q, want fail", gotVerdict)
}
foundP95 := false
foundP99 := false
for _, reason := range reasons {
if strings.HasPrefix(reason, "reroute_p95_ms=") {
foundP95 = true
}
if strings.HasPrefix(reason, "reroute_p99_ms=") {
foundP99 = true
}
}
if !foundP95 || !foundP99 {
t.Fatalf("reasons = %v, want reroute p95 and p99 reasons", reasons)
}
}
func TestShouldQuarantineTarget(t *testing.T) {
quarantined := []string{
"ack timeout or session closed",
"deadline exceeded",
"connection refused",
"connection reset by peer",
"no route to host",
}
for _, reason := range quarantined {
if !shouldQuarantineTarget(reason) {
t.Fatalf("shouldQuarantineTarget(%q) = false, want true", reason)
}
}
if shouldQuarantineTarget("ack payload checksum mismatch") {
t.Fatal("checksum mismatch should not quarantine a target")
}
if shouldQuarantineTarget("context deadline exceeded") {
t.Fatal("context deadline should not quarantine a target")
}
}
func TestSpreadStartDistributesQuarantinedSlot(t *testing.T) {
targets := []string{"a", "b", "c", "d"}
health := newTargetHealthTracker()
health.MarkDegraded("a", "connection refused", time.Minute)
counts := map[string]int{}
for index := 0; index < 40; index += len(targets) {
initial, spread := loadtestSpreadStart(index, len(targets))
targetIndex := loadtestPreferredTargetIndex(targets, initial, spread, health, -1)
counts[targets[targetIndex]]++
}
if counts["b"] == 0 || counts["c"] == 0 || counts["d"] == 0 {
t.Fatalf("counts = %v, want degraded slot spread across surviving targets", counts)
}
}
func TestSpreadUsableTargetDistributesRetries(t *testing.T) {
targets := []string{"a", "b", "c", "d"}
health := newTargetHealthTracker()
health.MarkDegraded("a", "connection refused", time.Minute)
counts := map[string]int{}
for cohort := 0; cohort < 90; cohort++ {
targetIndex := loadtestSpreadUsableTargetIndex(targets, cohort, health, 0)
counts[targets[targetIndex]]++
}
if counts["b"] != 30 || counts["c"] != 30 || counts["d"] != 30 {
t.Fatalf("counts = %v, want retry load spread evenly across surviving targets", counts)
}
}
func TestLoadtestLogicalStreamIDAvoidsReservedTransportStreams(t *testing.T) {
for _, index := range []int{-1, 0, 1, 999, 1000, 10_000} {
streamID := loadtestLogicalStreamID(index)
if streamID == mesh.ProductionForwardQUICStreamID || streamID == mesh.SyntheticForwardQUICStreamID {
t.Fatalf("loadtestLogicalStreamID(%d) = %d, collides with reserved transport stream", index, streamID)
}
if streamID < 10_000 {
t.Fatalf("loadtestLogicalStreamID(%d) = %d, want loadtest stream range", index, streamID)
}
}
}
func TestLatencyAwareTargetIndexKeepsSlowWANFromOwningPool(t *testing.T) {
targets := []string{"lan-a", "lan-b", "wan"}
health := newTargetHealthTracker()
health.RecordProbes([]targetProbeResult{
{Target: "lan-a", RTTMs: 4, Usable: true},
{Target: "lan-b", RTTMs: 5, Usable: true},
{Target: "wan", RTTMs: 400, Usable: true},
})
counts := map[string]int{}
for index := 0; index < 300; index++ {
targetIndex := loadtestSpreadUsableTargetIndex(targets, index, health, -1)
counts[targets[targetIndex]]++
}
if counts["wan"] == 0 {
t.Fatalf("counts = %v, want slow WAN to stay represented", counts)
}
if counts["wan"] >= counts["lan-a"] || counts["wan"] >= counts["lan-b"] {
t.Fatalf("counts = %v, want latency-aware placement to prefer LAN capacity", counts)
}
}
func TestLatencyAwarePreferredTargetUsesAbsolutePlacementOrdinal(t *testing.T) {
targets := []string{"lan-a", "lan-b", "lan-c", "wan"}
health := newTargetHealthTracker()
health.RecordProbes([]targetProbeResult{
{Target: "lan-a", RTTMs: 4, Usable: true},
{Target: "lan-b", RTTMs: 4, Usable: true},
{Target: "lan-c", RTTMs: 4, Usable: true},
{Target: "wan", RTTMs: 400, Usable: true},
})
counts := map[string]int{}
for index := 0; index < 500; index++ {
preferred, spread := loadtestSpreadStart(index, len(targets))
targetIndex := loadtestPreferredTargetIndex(targets, preferred, spread, health, -1)
counts[targets[targetIndex]]++
}
if len(counts) < len(targets) {
t.Fatalf("counts = %v, want every probed target represented", counts)
}
if counts["wan"] >= counts["lan-a"] || counts["wan"] >= counts["lan-b"] || counts["wan"] >= counts["lan-c"] {
t.Fatalf("counts = %v, want slow WAN weighted below LAN targets", counts)
}
}
func TestHeterogeneousProbeRTTRelaxesEqualDistributionVerdict(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"lan", "wan"},
Concurrency: 64,
},
SuccessfulStreams: 100,
BytesSent: 100 * 1024,
TargetStreams: map[string]int{
"lan": 96,
"wan": 4,
},
TargetBytes: map[string]int64{
"lan": 96 * 1024,
"wan": 4 * 1024,
},
TargetProbes: []targetProbeResult{
{Target: "lan", RTTMs: 4, Usable: true},
{Target: "wan", RTTMs: 400, Usable: true},
},
RoutePressure: mesh.FabricRoutePressureSnapshot{
MaxActive: map[string]int{
loadtestRouteID(0, "lan"): 32,
loadtestRouteID(1, "wan"): 1,
},
MaxActiveTotal: 32,
},
}
if reasons := targetDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("targetDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
}
if reasons := targetByteDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("targetByteDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
}
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("routePressureDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
}
}
func TestTargetHealthQuarantineExpiresButSnapshotKeepsObservation(t *testing.T) {
health := newTargetHealthTracker()
health.MarkDegraded("a", "ack timeout", time.Nanosecond)
if !health.IsDegraded("a") {
t.Fatal("target should be degraded immediately")
}
time.Sleep(time.Millisecond)
if health.IsDegraded("a") {
t.Fatal("target quarantine did not expire")
}
snapshot := health.Snapshot()
if snapshot["a"] != "ack timeout" {
t.Fatalf("snapshot = %v, want historical degraded observation", snapshot)
}
}
func TestRoutePressureDistributionVerdictDetectsCollapse(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"a", "b", "c", "d"},
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 16,
},
RoutePressure: mesh.FabricRoutePressureSnapshot{
MaxActive: map[string]int{
loadtestRouteID(0, "a"): 16,
},
MaxActiveTotal: 16,
},
}
reasons := routePressureDistributionVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_collapsed=") {
t.Fatalf("reasons = %v, want collapsed route pressure reason", reasons)
}
}
func TestRoutePressureDistributionVerdictDetectsSkew(t *testing.T) {
report := loadtestReport{
Config: loadtestConfig{
Targets: []string{"a", "b", "c", "d"},
FailTarget: -1,
ImpairTarget: -1,
Concurrency: 16,
},
RoutePressure: mesh.FabricRoutePressureSnapshot{
MaxActive: map[string]int{
loadtestRouteID(0, "a"): 14,
loadtestRouteID(1, "b"): 2,
loadtestRouteID(2, "c"): 2,
loadtestRouteID(3, "d"): 2,
},
MaxActiveTotal: 16,
},
}
reasons := routePressureDistributionVerdictReasons(report)
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_skew=") {
t.Fatalf("reasons = %v, want route pressure skew reason", reasons)
}
report.RoutePressure.MaxActive = map[string]int{
loadtestRouteID(0, "a"): 6,
loadtestRouteID(1, "b"): 6,
loadtestRouteID(2, "c"): 5,
loadtestRouteID(3, "d"): 5,
}
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
t.Fatalf("reasons = %v, want balanced route pressure pass", reasons)
}
}
@@ -0,0 +1,199 @@
package main
import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
"flag"
"fmt"
"os"
"strings"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
)
type smokeOutput struct {
OK bool `json:"ok"`
Endpoint string `json:"endpoint"`
EntryNodeID string `json:"entry_node_id"`
NextHopID string `json:"next_hop_node_id"`
RouteID string `json:"route_id"`
ElapsedMS int64 `json:"elapsed_ms"`
Result mesh.ProductionForwardResult `json:"result"`
Error string `json:"error,omitempty"`
EnvelopePath []string `json:"envelope_path,omitempty"`
}
type productionForwardResponse struct {
Result mesh.ProductionForwardResult `json:"result,omitempty"`
Error string `json:"error,omitempty"`
}
func main() {
var (
endpoint = flag.String("endpoint", "", "QUIC fabric endpoint for the entry node, for example quic://host:19131.")
peerCert = flag.String("peer-cert-sha256", "", "Expected entry node QUIC TLS certificate SHA-256 fingerprint.")
clusterID = flag.String("cluster-id", "", "Cluster ID.")
routeID = flag.String("route-id", "", "Configured production route ID.")
sourceNodeID = flag.String("source-node-id", "", "Route source node ID.")
destNodeID = flag.String("destination-node-id", "", "Route destination node ID.")
currentNodeID = flag.String("current-hop-node-id", "", "Current hop node ID expected by the entry node.")
nextHopNodeID = flag.String("next-hop-node-id", "", "Next hop node ID from the entry node.")
routePath = flag.String("route-path", "", "Comma-separated route path.")
channel = flag.String("channel", mesh.ProductionChannelFabricControl, "Production channel class.")
timeout = flag.Duration("timeout", 10*time.Second, "Smoke request timeout.")
payloadText = flag.String("payload", `{"kind":"fabric-production-smoke"}`, "JSON payload string.")
payloadB64 = flag.String("payload-b64", "", "Base64-encoded JSON payload string.")
)
flag.Parse()
if *endpoint == "" || *clusterID == "" || *routeID == "" || *sourceNodeID == "" || *destNodeID == "" || *currentNodeID == "" || *nextHopNodeID == "" {
writeOutput(smokeOutput{OK: false, Error: "endpoint, cluster-id, route-id, source-node-id, destination-node-id, current-hop-node-id and next-hop-node-id are required"})
os.Exit(2)
}
path := splitRoutePath(*routePath)
payloadSource := strings.TrimSpace(*payloadText)
if strings.TrimSpace(*payloadB64) != "" {
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(*payloadB64))
if err != nil {
writeOutput(smokeOutput{OK: false, Error: "payload-b64 must be valid base64"})
os.Exit(2)
}
payloadSource = string(decoded)
}
payload := json.RawMessage(strings.TrimSpace(payloadSource))
if !json.Valid(payload) {
writeOutput(smokeOutput{OK: false, Error: "payload must be valid JSON"})
os.Exit(2)
}
now := time.Now().UTC()
messageType := mesh.ProductionMessageFabricControl
if strings.TrimSpace(*channel) == mesh.ProductionChannelVPNPacket {
messageType = mesh.ProductionMessageVPNPacketBatch
}
sum := sha256.Sum256(payload)
envelope := mesh.ProductionEnvelope{
FabricProtocolVersion: mesh.ProtocolVersion,
MessageID: fmt.Sprintf("fabric-production-smoke-%d", now.UnixNano()),
RouteID: strings.TrimSpace(*routeID),
ClusterID: strings.TrimSpace(*clusterID),
SourceNodeID: strings.TrimSpace(*sourceNodeID),
DestinationNodeID: strings.TrimSpace(*destNodeID),
CurrentHopNodeID: strings.TrimSpace(*currentNodeID),
NextHopNodeID: strings.TrimSpace(*nextHopNodeID),
RoutePath: path,
ChannelClass: strings.TrimSpace(*channel),
MessageType: messageType,
TTL: 8,
HopCount: 0,
CreatedAt: now,
ExpiresAt: now.Add(time.Minute),
PayloadLength: len(payload),
PayloadHash: hex.EncodeToString(sum[:]),
Payload: payload,
}
transport := mesh.NewQUICFabricTransport(nil)
ctx, cancel := context.WithTimeout(context.Background(), *timeout)
defer cancel()
started := time.Now()
result, err := sendProductionEnvelope(ctx, transport, mesh.FabricTransportTarget{
EndpointID: "fabric-production-smoke-entry",
PeerID: envelope.CurrentHopNodeID,
Endpoint: strings.TrimSpace(*endpoint),
Transport: "quic",
PeerCertSHA256: strings.TrimSpace(*peerCert),
Timeout: *timeout,
InboundBuffer: 8,
ErrorBuffer: 4,
}, envelope)
output := smokeOutput{
OK: err == nil && result.Accepted,
Endpoint: *endpoint,
EntryNodeID: envelope.CurrentHopNodeID,
NextHopID: envelope.NextHopNodeID,
RouteID: envelope.RouteID,
ElapsedMS: time.Since(started).Milliseconds(),
Result: result,
EnvelopePath: path,
}
if err != nil {
output.Error = err.Error()
writeOutput(output)
os.Exit(1)
}
writeOutput(output)
}
func sendProductionEnvelope(ctx context.Context, transport *mesh.QUICFabricTransport, target mesh.FabricTransportTarget, envelope mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
session, err := transport.Connect(ctx, target)
if err != nil {
return mesh.ProductionForwardResult{}, err
}
defer session.Close()
payload, err := json.Marshal(envelope)
if err != nil {
return mesh.ProductionForwardResult{}, err
}
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: mesh.ProductionForwardQUICStreamID,
Sequence: 1,
Payload: payload,
}); err != nil {
return mesh.ProductionForwardResult{}, err
}
for {
select {
case <-ctx.Done():
return mesh.ProductionForwardResult{}, ctx.Err()
case err := <-session.Errors():
if err != nil {
return mesh.ProductionForwardResult{}, err
}
case frame := <-session.Frames():
if frame.Type != fabricproto.FrameData || frame.StreamID != mesh.ProductionForwardQUICStreamID || frame.Sequence != 1 {
continue
}
var response productionForwardResponse
if err := json.Unmarshal(frame.Payload, &response); err != nil {
return mesh.ProductionForwardResult{}, err
}
if strings.TrimSpace(response.Error) != "" {
return mesh.ProductionForwardResult{}, errors.New(response.Error)
}
return response.Result, nil
}
}
}
func splitRoutePath(value string) []string {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
parts := strings.Split(value, ",")
out := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
out = append(out, part)
}
}
return out
}
func writeOutput(output smokeOutput) {
payload, err := json.MarshalIndent(output, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "marshal smoke output: %v\n", err)
return
}
fmt.Println(string(payload))
}
@@ -28,6 +28,18 @@ type smokeNode struct {
server *httptest.Server
}
type smokeSyntheticTransport struct {
peers map[string]string
}
func (t smokeSyntheticTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope mesh.SyntheticEnvelope) (mesh.SyntheticEnvelope, error) {
baseURL := t.peers[nextNodeID]
if baseURL == "" {
return mesh.SyntheticEnvelope{}, mesh.ErrSyntheticPeerUnavailable
}
return mesh.NewClient(baseURL).SendSynthetic(ctx, envelope)
}
type smokeReport struct {
Stage string `json:"stage"`
ProductionForwarding bool `json:"production_forwarding"`
@@ -433,7 +445,7 @@ func writeSmokeScopedConfig(local mesh.PeerIdentity, peers map[string]string, ro
func newSmokeNode(local mesh.PeerIdentity) *smokeNode {
node := &smokeNode{Local: local}
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime, FabricSessionEnabled: true}.Handler().ServeHTTP(w, r)
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime, FabricSessionEnabled: true, FabricSessionWebSocketEnabled: true}.Handler().ServeHTTP(w, r)
}))
node.URL = node.server.URL
return node
@@ -454,7 +466,7 @@ func smokeRuntime(local mesh.PeerIdentity, routes []mesh.SyntheticRoute, peers m
mesh.SyntheticChannelFabricControl,
mesh.SyntheticChannelRouteControl,
},
Transport: mesh.NewHTTPPeerTransport(peers),
Transport: smokeSyntheticTransport{peers: peers},
})
}
@@ -217,7 +217,7 @@ func runInstallLinux(ctx context.Context, args []string) error {
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
@@ -230,7 +230,7 @@ func runInstallLinux(ctx context.Context, args []string) error {
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "direct_http"), "Advertised transport.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "linux"), "Region/site hint.")
@@ -305,7 +305,7 @@ func runInstallWindows(ctx context.Context, args []string) error {
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
@@ -318,7 +318,7 @@ func runInstallWindows(ctx context.Context, args []string) error {
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "direct_http"), "Advertised transport.")
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "windows"), "Region/site hint.")
@@ -799,7 +799,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
@@ -812,7 +812,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 0), "Last port used when mesh listen port mode is auto.")
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", ""), "Advertised transport.")
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", ""), "Connectivity mode hint.")
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", ""), "NAT type hint.")
fs.StringVar(&cfg.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint.")
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+12 -6
View File
@@ -2,15 +2,21 @@ module github.com/example/remote-access-platform/agents/rap-node-agent
go 1.25.5
require golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
require (
github.com/gorilla/websocket v1.5.3
github.com/quic-go/quic-go v0.59.1
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
)
require (
github.com/gorilla/websocket v1.5.3 // indirect
github.com/quic-go/quic-go v0.59.1 // indirect
golang.org/x/crypto v0.50.0 // indirect
golang.org/x/net v0.53.0 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/crypto v0.51.0 // indirect
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d // indirect
golang.org/x/mod v0.36.0 // indirect
golang.org/x/net v0.54.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.44.0 // indirect
golang.org/x/time v0.15.0 // indirect
golang.org/x/tools v0.45.0 // indirect
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 // indirect
)
+24 -6
View File
@@ -1,20 +1,38 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/quic-go/quic-go v0.59.1 h1:0Gmua0HW1Tv7ANR7hUYwRyD0MG5OJfgvYSZasGZzBic=
github.com/quic-go/quic-go v0.59.1/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI=
golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d h1:XNPSUMmnREiyj6HdYfJjTJVQIC5c1b3+qV7mbxUjzwk=
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d/go.mod h1:ltIbhcRzKgwHa4ZxKJeiv0nyzcXUUYCqMyO0Y+vPmXw=
golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w=
golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 h1:YUPk0vGbex2+Jk7XXIgLIPG6oEAD9ml0x7wd6i/bmA4=
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943/go.mod h1:xQ2PWgHmWJA/Ph4i1q1jBm39BKhc3W0DXqWoDSyuBOY=
@@ -7,7 +7,7 @@ import (
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
const Version = "0.2.280-fabricsession"
const Version = "0.2.309-latencyaware"
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
return client.EnrollRequest{
@@ -38,9 +38,12 @@ func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) cli
"vpn_local_gateway_shortcut": false,
"vpn_farm_owned_dataplane": true,
"fabric_data_session_v1": true,
"fabric_session_websocket_smoke": true,
"fabric_session_quic_smoke": true,
"vpn_backend_relay_fallback": false,
"fabric_service_channel_required": true,
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
"web_ingress_runtime_enabled": false,
"external_backend_entry_proxy": true,
},
ReportedFacts: map[string]any{
@@ -67,9 +70,12 @@ func HeartbeatPayload() client.HeartbeatRequest {
"vpn_local_gateway_shortcut": false,
"vpn_farm_owned_dataplane": true,
"fabric_data_session_v1": true,
"fabric_session_websocket_smoke": true,
"fabric_session_quic_smoke": true,
"vpn_backend_relay_fallback": false,
"fabric_service_channel_required": true,
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
"web_ingress_runtime_enabled": false,
"external_backend_entry_proxy": true,
},
ServiceStates: map[string]any{
@@ -14,6 +14,8 @@ import (
const (
AuthoritySchemaVersion = "rap.cluster_authority.v1"
SignatureSchemaVersion = "rap.cluster_authority.signature.v1"
QuorumSchemaVersion = "rap.cluster_authority.quorum.v1"
QuorumEnvelopeVersion = "rap.cluster_authority.quorum_envelope.v1"
AlgorithmEd25519 = "ed25519"
)
@@ -30,6 +32,34 @@ type Signature struct {
Signature string `json:"signature"`
}
type QuorumMember struct {
NodeID string `json:"node_id,omitempty"`
Role string `json:"role,omitempty"`
PublicKey string `json:"public_key"`
PublicKeyFingerprint string `json:"public_key_fingerprint"`
Scopes []string `json:"scopes,omitempty"`
}
type QuorumDescriptor struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
Epoch string `json:"epoch"`
Threshold int `json:"threshold"`
Members []QuorumMember `json:"members"`
}
type QuorumEnvelope struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
Epoch string `json:"epoch"`
Threshold int `json:"threshold"`
PayloadSHA256 string `json:"payload_sha256"`
QuorumSHA256 string `json:"quorum_sha256"`
Signatures []Signature `json:"signatures"`
AllowedScopes []string `json:"allowed_scopes,omitempty"`
DecisionReason string `json:"decision_reason,omitempty"`
}
func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature) error {
if signature.SchemaVersion != SignatureSchemaVersion {
return fmt.Errorf("%w: schema_version must be %s", ErrInvalidSignature, SignatureSchemaVersion)
@@ -58,6 +88,86 @@ func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature
return nil
}
func VerifyQuorumRaw(descriptor QuorumDescriptor, payload json.RawMessage, envelope QuorumEnvelope, requiredScope string) error {
if descriptor.SchemaVersion != QuorumSchemaVersion {
return fmt.Errorf("%w: quorum schema_version must be %s", ErrInvalidSignature, QuorumSchemaVersion)
}
if envelope.SchemaVersion != QuorumEnvelopeVersion {
return fmt.Errorf("%w: quorum envelope schema_version must be %s", ErrInvalidSignature, QuorumEnvelopeVersion)
}
if strings.TrimSpace(descriptor.ClusterID) == "" || descriptor.ClusterID != envelope.ClusterID {
return fmt.Errorf("%w: quorum cluster mismatch", ErrInvalidSignature)
}
if strings.TrimSpace(descriptor.Epoch) == "" || descriptor.Epoch != envelope.Epoch {
return fmt.Errorf("%w: quorum epoch mismatch", ErrInvalidSignature)
}
threshold := descriptor.Threshold
if envelope.Threshold > threshold {
threshold = envelope.Threshold
}
if threshold <= 0 || threshold > len(descriptor.Members) {
return fmt.Errorf("%w: invalid quorum threshold", ErrInvalidSignature)
}
payloadHash, err := HashRaw(payload)
if err != nil {
return err
}
if envelope.PayloadSHA256 != payloadHash {
return fmt.Errorf("%w: quorum payload hash mismatch", ErrInvalidSignature)
}
descriptorHash, err := HashRaw(mustMarshalQuorumDescriptor(descriptor))
if err != nil {
return err
}
if envelope.QuorumSHA256 != descriptorHash {
return fmt.Errorf("%w: quorum descriptor hash mismatch", ErrInvalidSignature)
}
members := map[string]QuorumMember{}
for _, member := range descriptor.Members {
fingerprint := strings.TrimSpace(member.PublicKeyFingerprint)
if fingerprint == "" {
publicKey, err := decodePublicKey(member.PublicKey)
if err != nil {
return err
}
fingerprint = Fingerprint(publicKey)
}
if _, exists := members[fingerprint]; exists {
return fmt.Errorf("%w: duplicate quorum member", ErrInvalidSignature)
}
member.PublicKeyFingerprint = fingerprint
members[fingerprint] = member
}
seen := map[string]bool{}
valid := 0
for _, signature := range envelope.Signatures {
fingerprint := strings.TrimSpace(signature.KeyFingerprint)
if seen[fingerprint] {
continue
}
member, ok := members[fingerprint]
if !ok {
return fmt.Errorf("%w: quorum signer is not a member", ErrInvalidSignature)
}
if requiredScope != "" && !memberAllowsScope(member, requiredScope) {
return fmt.Errorf("%w: quorum signer scope mismatch", ErrInvalidSignature)
}
if err := VerifyRaw(member.PublicKey, payload, signature); err != nil {
return err
}
seen[fingerprint] = true
valid++
}
if valid < threshold {
return fmt.Errorf("%w: quorum threshold not met", ErrInvalidSignature)
}
return nil
}
func QuorumDescriptorHash(descriptor QuorumDescriptor) (string, error) {
return HashRaw(mustMarshalQuorumDescriptor(descriptor))
}
func Fingerprint(publicKey ed25519.PublicKey) string {
sum := sha256.Sum256(publicKey)
return "rap-ca-ed25519-" + hex.EncodeToString(sum[:16])
@@ -72,6 +182,28 @@ func HashRaw(raw json.RawMessage) (string, error) {
return hex.EncodeToString(sum[:]), nil
}
func mustMarshalQuorumDescriptor(descriptor QuorumDescriptor) json.RawMessage {
raw, err := json.Marshal(descriptor)
if err != nil {
return nil
}
return raw
}
func memberAllowsScope(member QuorumMember, requiredScope string) bool {
requiredScope = strings.TrimSpace(requiredScope)
if requiredScope == "" {
return true
}
for _, scope := range member.Scopes {
scope = strings.TrimSpace(scope)
if scope == "*" || scope == requiredScope {
return true
}
}
return false
}
func CanonicalJSON(raw json.RawMessage) ([]byte, error) {
if len(raw) == 0 {
return nil, fmt.Errorf("%w: empty payload", ErrInvalidPayload)
@@ -5,6 +5,7 @@ import (
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"testing"
)
@@ -50,3 +51,114 @@ func TestVerifyRawRejectsTamperedPayload(t *testing.T) {
t.Fatalf("err = %v, want ErrInvalidSignature", err)
}
}
func TestVerifyQuorumRawAcceptsThreshold(t *testing.T) {
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
payloadHash, err := HashRaw(payload)
if err != nil {
t.Fatalf("payload hash: %v", err)
}
quorumHash, err := QuorumDescriptorHash(descriptor)
if err != nil {
t.Fatalf("quorum hash: %v", err)
}
envelope := QuorumEnvelope{
SchemaVersion: QuorumEnvelopeVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 2,
PayloadSHA256: payloadHash,
QuorumSHA256: quorumHash,
Signatures: []Signature{
signTestPayload(t, payload, privateKeys[0]),
signTestPayload(t, payload, privateKeys[1]),
},
}
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); err != nil {
t.Fatalf("VerifyQuorumRaw: %v", err)
}
}
func TestVerifyQuorumRawRejectsBelowThreshold(t *testing.T) {
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
payloadHash, _ := HashRaw(payload)
quorumHash, _ := QuorumDescriptorHash(descriptor)
envelope := QuorumEnvelope{
SchemaVersion: QuorumEnvelopeVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 2,
PayloadSHA256: payloadHash,
QuorumSHA256: quorumHash,
Signatures: []Signature{signTestPayload(t, payload, privateKeys[0])},
}
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
t.Fatalf("err = %v, want ErrInvalidSignature", err)
}
}
func TestVerifyQuorumRawRejectsTamperedDescriptor(t *testing.T) {
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
payloadHash, _ := HashRaw(payload)
quorumHash, _ := QuorumDescriptorHash(descriptor)
descriptor.Threshold = 1
envelope := QuorumEnvelope{
SchemaVersion: QuorumEnvelopeVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 2,
PayloadSHA256: payloadHash,
QuorumSHA256: quorumHash,
Signatures: []Signature{
signTestPayload(t, payload, privateKeys[0]),
signTestPayload(t, payload, privateKeys[1]),
},
}
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
t.Fatalf("err = %v, want ErrInvalidSignature", err)
}
}
func testQuorumDescriptor(t *testing.T, members int, threshold int) (QuorumDescriptor, []ed25519.PrivateKey) {
t.Helper()
descriptor := QuorumDescriptor{
SchemaVersion: QuorumSchemaVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: threshold,
}
privateKeys := make([]ed25519.PrivateKey, 0, members)
for i := 0; i < members; i++ {
publicKey, privateKey, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
descriptor.Members = append(descriptor.Members, QuorumMember{
NodeID: fmt.Sprintf("authority-%d", i+1),
Role: "update-authority",
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
PublicKeyFingerprint: Fingerprint(publicKey),
Scopes: []string{"update-authority"},
})
privateKeys = append(privateKeys, privateKey)
}
return descriptor, privateKeys
}
func signTestPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) Signature {
t.Helper()
canonical, err := CanonicalJSON(payload)
if err != nil {
t.Fatalf("CanonicalJSON: %v", err)
}
publicKey := privateKey.Public().(ed25519.PublicKey)
return Signature{
SchemaVersion: SignatureSchemaVersion,
Algorithm: AlgorithmEd25519,
KeyFingerprint: Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
}
}
@@ -9,6 +9,7 @@ import (
"io"
"net/http"
"net/url"
"strings"
"time"
)
@@ -17,6 +18,17 @@ type Client struct {
httpClient *http.Client
}
type RawControlRequest struct {
Method string `json:"method"`
Path string `json:"path"`
Body json.RawMessage `json:"body,omitempty"`
}
type RawControlResponse struct {
StatusCode int `json:"status_code"`
Body json.RawMessage `json:"body,omitempty"`
}
type EnrollRequest struct {
ClusterID string `json:"cluster_id"`
JoinToken string `json:"join_token"`
@@ -46,14 +58,15 @@ type EnrollmentBootstrapResponse struct {
}
type NodeBootstrap struct {
NodeID string `json:"node_id"`
ClusterID string `json:"cluster_id"`
IdentityStatus string `json:"identity_status"`
Certificate map[string]any `json:"certificate"`
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
NodeID string `json:"node_id"`
ClusterID string `json:"cluster_id"`
IdentityStatus string `json:"identity_status"`
Certificate map[string]any `json:"certificate"`
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
ClusterAuthorityQuorum json.RawMessage `json:"cluster_authority_quorum,omitempty"`
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
}
type HeartbeatRequest struct {
@@ -123,6 +136,7 @@ type NodeUpdatePlan struct {
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
AuthorityQuorum *QuorumEnvelope `json:"authority_quorum,omitempty"`
ProductionForwarding bool `json:"production_forwarding"`
}
@@ -293,6 +307,26 @@ type SyntheticMeshConfig struct {
ProductionForwarding bool `json:"production_forwarding"`
}
type AdminRuntimeProjectionRequest struct {
SchemaVersion string `json:"schema_version"`
Method string `json:"method"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
Host string `json:"host,omitempty"`
Scope string `json:"scope"`
ServiceClass string `json:"service_class"`
ObservedAt string `json:"observed_at"`
}
type AdminRuntimeProjectionResponse struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
StatusCode int `json:"status_code"`
Headers map[string]string `json:"headers,omitempty"`
Body json.RawMessage `json:"body,omitempty"`
}
func (c *SyntheticMeshConfig) UnmarshalJSON(data []byte) error {
type syntheticMeshConfigAlias SyntheticMeshConfig
var decoded syntheticMeshConfigAlias
@@ -448,6 +482,18 @@ type ClusterSignature struct {
SignedAt time.Time `json:"signed_at"`
}
type QuorumEnvelope struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
Epoch string `json:"epoch"`
Threshold int `json:"threshold"`
PayloadSHA256 string `json:"payload_sha256"`
QuorumSHA256 string `json:"quorum_sha256"`
Signatures []ClusterSignature `json:"signatures"`
AllowedScopes []string `json:"allowed_scopes,omitempty"`
DecisionReason string `json:"decision_reason,omitempty"`
}
type PeerDirectoryEntry struct {
NodeID string `json:"node_id"`
RouteIDs []string `json:"route_ids,omitempty"`
@@ -744,6 +790,50 @@ func (c *Client) SyntheticMeshConfig(ctx context.Context, clusterID, nodeID stri
return response.Config, nil
}
func (c *Client) AdminRuntimeProjection(ctx context.Context, clusterID, nodeID string, request AdminRuntimeProjectionRequest) (AdminRuntimeProjectionResponse, error) {
var response AdminRuntimeProjectionResponse
path := fmt.Sprintf("/clusters/%s/nodes/%s/admin-runtime/projection", clusterID, nodeID)
if err := c.postJSON(ctx, path, request, &response); err != nil {
return AdminRuntimeProjectionResponse{}, err
}
return response, nil
}
func (c *Client) RawControl(ctx context.Context, request RawControlRequest) (RawControlResponse, error) {
method := strings.ToUpper(strings.TrimSpace(request.Method))
if method == "" {
method = http.MethodGet
}
path := strings.TrimSpace(request.Path)
if !strings.HasPrefix(path, "/") {
return RawControlResponse{}, fmt.Errorf("control path must be relative")
}
var body io.Reader
if len(request.Body) > 0 && string(request.Body) != "null" {
body = bytes.NewReader(request.Body)
}
httpReq, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
if err != nil {
return RawControlResponse{}, err
}
if body != nil {
httpReq.Header.Set("Content-Type", "application/json")
}
httpResp, err := c.httpClient.Do(httpReq)
if err != nil {
return RawControlResponse{}, err
}
defer httpResp.Body.Close()
payload, err := io.ReadAll(io.LimitReader(httpResp.Body, 2*1024*1024))
if err != nil {
return RawControlResponse{}, err
}
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
return RawControlResponse{}, fmt.Errorf("backend returned status %d: %s", httpResp.StatusCode, string(payload))
}
return RawControlResponse{StatusCode: httpResp.StatusCode, Body: json.RawMessage(payload)}, nil
}
func (c *Client) getJSON(ctx context.Context, path string, response any) error {
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
if err != nil {
@@ -21,6 +21,11 @@ type Config struct {
NodeName string
StateDir string
WorkloadSupervisionEnabled bool
WebIngressRuntimeEnabled bool
WebIngressSigningPrivateKey string
WebIngressSigningKeyID string
WebIngressTrustedKeysJSON string
WebIngressRuntimeServiceClasses string
HeartbeatInterval time.Duration
EnrollmentPollInterval time.Duration
EnrollmentPollTimeout time.Duration
@@ -43,6 +48,12 @@ type Config struct {
MeshAdvertiseTransport string
MeshConnectivityMode string
MeshNATType string
MeshLocalSegmentID string
MeshNATGroupID string
MeshSTUNReflexiveEndpoint string
MeshSTUNServer string
MeshRelayNodeID string
MeshRelayEndpoint string
MeshRegion string
MeshSyntheticConfigPath string
MeshPeerEndpointsJSON string
@@ -68,9 +79,14 @@ func Load(args []string, env map[string]string) (Config, error) {
fs.StringVar(&cfg.NodeName, "node-name", getEnv(env, "RAP_NODE_NAME", hostnameOrDefault()), "Node display name.")
fs.StringVar(&cfg.StateDir, "state-dir", getEnv(env, "RAP_NODE_STATE_DIR", defaultStateDir), "Local node-agent state directory.")
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getEnvBool(env, "RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable desired workload polling and status reporting. Disabled by default while service runtime is not implemented.")
fs.BoolVar(&cfg.WebIngressRuntimeEnabled, "web-ingress-runtime-enabled", getEnvBool(env, "RAP_WEB_INGRESS_RUNTIME_ENABLED", false), "Enable the future real 80/443 web ingress listener runtime. Disabled by default; contract probe remains safe without it.")
fs.StringVar(&cfg.WebIngressSigningPrivateKey, "web-ingress-signing-private-key", getEnv(env, "RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY", ""), "Base64 Ed25519 private key used to sign web ingress fabric envelopes. Empty keeps signing disabled.")
fs.StringVar(&cfg.WebIngressSigningKeyID, "web-ingress-signing-key-id", getEnv(env, "RAP_WEB_INGRESS_SIGNING_KEY_ID", ""), "Optional key id for web ingress envelope signatures.")
fs.StringVar(&cfg.WebIngressTrustedKeysJSON, "web-ingress-trusted-keys-json", getEnv(env, "RAP_WEB_INGRESS_TRUSTED_KEYS_JSON", ""), "JSON map or array of trusted Ed25519 public keys for web ingress runtime receiver.")
fs.StringVar(&cfg.WebIngressRuntimeServiceClasses, "web-ingress-runtime-service-classes", getEnv(env, "RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES", ""), "Optional comma-separated allow-list of web ingress runtime service classes accepted by this node.")
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getEnvBool(env, "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable C17A synthetic fabric probe runtime. Disabled by default.")
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getEnvBool(env, "RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production fabric-control direct next-hop forwarding gate. Disabled by default.")
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint. Disabled by default.")
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint. Disabled by default.")
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getEnvBool(env, "RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric session when explicitly enabled. Disabled by default.")
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getEnvBool(env, "RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener. Disabled by default.")
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getEnv(env, "RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "Listen address for QUIC/UDP fabric endpoint, for example :19443.")
@@ -84,9 +100,15 @@ func Load(args []string, env map[string]string) (Config, error) {
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint reported to the Control Plane. Empty disables endpoint reporting.")
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "JSON array of advertised mesh endpoint candidates, including private/corporate endpoints.")
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "direct_tcp_tls"), "Transport label for the advertised mesh endpoint.")
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Transport label for the advertised mesh endpoint.")
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getEnv(env, "RAP_MESH_CONNECTIVITY_MODE", "direct"), "Connectivity mode reported with the advertised mesh endpoint.")
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getEnv(env, "RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint reported with the advertised mesh endpoint.")
fs.StringVar(&cfg.MeshLocalSegmentID, "mesh-local-segment-id", getEnv(env, "RAP_MESH_LOCAL_SEGMENT_ID", ""), "Optional local LAN/site segment ID advertised with QUIC endpoint candidates.")
fs.StringVar(&cfg.MeshNATGroupID, "mesh-nat-group-id", getEnv(env, "RAP_MESH_NAT_GROUP_ID", ""), "Optional NAT group ID advertised with QUIC endpoint candidates.")
fs.StringVar(&cfg.MeshSTUNReflexiveEndpoint, "mesh-stun-reflexive-endpoint", getEnv(env, "RAP_MESH_STUN_REFLEXIVE_ENDPOINT", ""), "Optional STUN-discovered reflexive QUIC endpoint, for example quic://203.0.113.10:19443.")
fs.StringVar(&cfg.MeshSTUNServer, "mesh-stun-server", getEnv(env, "RAP_MESH_STUN_SERVER", ""), "Optional STUN server name used to discover the reflexive endpoint.")
fs.StringVar(&cfg.MeshRelayNodeID, "mesh-relay-node-id", getEnv(env, "RAP_MESH_RELAY_NODE_ID", ""), "Optional relay node ID for relay-required QUIC fallback candidates.")
fs.StringVar(&cfg.MeshRelayEndpoint, "mesh-relay-endpoint", getEnv(env, "RAP_MESH_RELAY_ENDPOINT", ""), "Optional relay QUIC endpoint for relay-required fallback candidates.")
fs.StringVar(&cfg.MeshRegion, "mesh-region", getEnv(env, "RAP_MESH_REGION", ""), "Optional region/site hint for the advertised mesh endpoint.")
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
@@ -129,12 +151,27 @@ func Load(args []string, env map[string]string) (Config, error) {
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
if cfg.MeshAdvertiseTransport == "" {
cfg.MeshAdvertiseTransport = "quic"
}
cfg.MeshAdvertiseTransport = normalizeLegacyAdvertiseTransport(cfg.MeshAdvertiseTransport)
cfg.MeshAdvertiseEndpoint = normalizeLegacyEndpointSchemeToQUIC(cfg.MeshAdvertiseEndpoint)
cfg.MeshConnectivityMode = strings.TrimSpace(cfg.MeshConnectivityMode)
cfg.MeshNATType = strings.TrimSpace(cfg.MeshNATType)
cfg.MeshLocalSegmentID = strings.TrimSpace(cfg.MeshLocalSegmentID)
cfg.MeshNATGroupID = strings.TrimSpace(cfg.MeshNATGroupID)
cfg.MeshSTUNReflexiveEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshSTUNReflexiveEndpoint), "/"))
cfg.MeshSTUNServer = strings.TrimSpace(cfg.MeshSTUNServer)
cfg.MeshRelayNodeID = strings.TrimSpace(cfg.MeshRelayNodeID)
cfg.MeshRelayEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshRelayEndpoint), "/"))
cfg.MeshRegion = strings.TrimSpace(cfg.MeshRegion)
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
cfg.WebIngressSigningPrivateKey = strings.TrimSpace(cfg.WebIngressSigningPrivateKey)
cfg.WebIngressSigningKeyID = strings.TrimSpace(cfg.WebIngressSigningKeyID)
cfg.WebIngressTrustedKeysJSON = strings.TrimSpace(cfg.WebIngressTrustedKeysJSON)
cfg.WebIngressRuntimeServiceClasses = strings.TrimSpace(cfg.WebIngressRuntimeServiceClasses)
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
@@ -176,9 +213,62 @@ func Load(args []string, env map[string]string) (Config, error) {
if cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
return Config{}, errors.New("mesh listen auto port start must be less than or equal to end")
}
if !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
return Config{}, errors.New("mesh advertise transport must be a QUIC transport label")
}
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
return Config{}, errors.New("mesh advertise endpoint must be a QUIC endpoint")
}
if cfg.MeshSTUNReflexiveEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshSTUNReflexiveEndpoint) {
return Config{}, errors.New("mesh STUN reflexive endpoint must be a QUIC endpoint")
}
if cfg.MeshRelayEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshRelayEndpoint) {
return Config{}, errors.New("mesh relay endpoint must be a QUIC endpoint")
}
return cfg, nil
}
func isQUICAdvertiseTransport(label string) bool {
switch strings.ToLower(strings.TrimSpace(label)) {
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
return true
default:
return false
}
}
func normalizeLegacyAdvertiseTransport(label string) string {
switch strings.ToLower(strings.TrimSpace(label)) {
case "direct_http", "direct_https", "direct_tcp_tls", "http", "https", "ws", "wss", "websocket":
return "direct_quic"
case "outbound_reverse", "reverse", "reverse_outbound":
return "reverse_quic"
case "relay", "relay_control":
return "relay_quic"
default:
return strings.TrimSpace(label)
}
}
func normalizeLegacyEndpointSchemeToQUIC(endpoint string) string {
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
lower := strings.ToLower(endpoint)
for _, prefix := range []string{"http://", "https://", "ws://", "wss://"} {
if strings.HasPrefix(lower, prefix) {
return "quic://" + endpoint[len(prefix):]
}
}
return endpoint
}
func hasLegacyEndpointScheme(endpoint string) bool {
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
return strings.HasPrefix(endpoint, "http://") ||
strings.HasPrefix(endpoint, "https://") ||
strings.HasPrefix(endpoint, "ws://") ||
strings.HasPrefix(endpoint, "wss://")
}
func readEnv() map[string]string {
out := map[string]string{}
for _, pair := range os.Environ() {
@@ -15,6 +15,11 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
"RAP_NODE_NAME": "node-a",
"RAP_NODE_STATE_DIR": "/tmp/rap-node",
"RAP_WORKLOAD_SUPERVISION_ENABLED": "true",
"RAP_WEB_INGRESS_RUNTIME_ENABLED": "true",
"RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY": " private-key-b64 ",
"RAP_WEB_INGRESS_SIGNING_KEY_ID": " web-key-1 ",
"RAP_WEB_INGRESS_TRUSTED_KEYS_JSON": ` {"web-key-1":"public-key-b64"} `,
"RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES": " platform_admin, cluster_admin ",
"RAP_HEARTBEAT_INTERVAL_SECONDS": "7",
"RAP_ENROLLMENT_POLL_INTERVAL_SECONDS": "3",
"RAP_ENROLLMENT_POLL_TIMEOUT_SECONDS": "30",
@@ -32,11 +37,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
"RAP_MESH_LISTEN_PORT_MODE": "auto",
"RAP_MESH_LISTEN_AUTO_PORT_START": "19010",
"RAP_MESH_LISTEN_AUTO_PORT_END": "19020",
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443/",
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443/",
"RAP_MESH_ADVERTISE_ENDPOINTS_JSON": `[{"endpoint_id":"node-a-lan","address":"10.10.0.20:19001"}]`,
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
"RAP_MESH_CONNECTIVITY_MODE": "outbound_only",
"RAP_MESH_NAT_TYPE": "symmetric",
"RAP_MESH_LOCAL_SEGMENT_ID": "site-a",
"RAP_MESH_NAT_GROUP_ID": "nat-a",
"RAP_MESH_STUN_REFLEXIVE_ENDPOINT": "quic://203.0.113.20:19443/",
"RAP_MESH_STUN_SERVER": "stun.example.test:3478",
"RAP_MESH_RELAY_NODE_ID": "node-r",
"RAP_MESH_RELAY_ENDPOINT": "quic://node-r.example.test:19443/",
"RAP_MESH_REGION": "eu",
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
@@ -67,6 +78,15 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
if !cfg.WorkloadSupervisionEnabled {
t.Fatal("WorkloadSupervisionEnabled = false, want true")
}
if !cfg.WebIngressRuntimeEnabled {
t.Fatal("WebIngressRuntimeEnabled = false, want true")
}
if cfg.WebIngressSigningPrivateKey != "private-key-b64" ||
cfg.WebIngressSigningKeyID != "web-key-1" ||
cfg.WebIngressTrustedKeysJSON != `{"web-key-1":"public-key-b64"}` ||
cfg.WebIngressRuntimeServiceClasses != "platform_admin, cluster_admin" {
t.Fatalf("unexpected web ingress key config: %+v", cfg)
}
if !cfg.MeshSyntheticRuntimeEnabled {
t.Fatal("MeshSyntheticRuntimeEnabled = false, want true")
}
@@ -100,11 +120,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
if cfg.MeshListenPortMode != "auto" || cfg.MeshListenAutoPortStart != 19010 || cfg.MeshListenAutoPortEnd != 19020 {
t.Fatalf("unexpected mesh listen port config: %+v", cfg)
}
if cfg.MeshAdvertiseEndpoint != "https://node-a.example.test:443" ||
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:19443" ||
cfg.MeshAdvertiseEndpointsJSON == "" ||
cfg.MeshAdvertiseTransport != "wss" ||
cfg.MeshAdvertiseTransport != "direct_quic" ||
cfg.MeshConnectivityMode != "outbound_only" ||
cfg.MeshNATType != "symmetric" ||
cfg.MeshLocalSegmentID != "site-a" ||
cfg.MeshNATGroupID != "nat-a" ||
cfg.MeshSTUNReflexiveEndpoint != "quic://203.0.113.20:19443" ||
cfg.MeshSTUNServer != "stun.example.test:3478" ||
cfg.MeshRelayNodeID != "node-r" ||
cfg.MeshRelayEndpoint != "quic://node-r.example.test:19443" ||
cfg.MeshRegion != "eu" {
t.Fatalf("unexpected mesh advertise config: %+v", cfg)
}
@@ -139,6 +165,9 @@ func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
}
if cfg.WebIngressRuntimeEnabled {
t.Fatalf("web ingress runtime should default disabled: %+v", cfg)
}
}
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
@@ -162,3 +191,33 @@ func TestLoadConfigRejectsTooLargeProductionObservationSinkCapacity(t *testing.T
t.Fatal("Load returned nil error for too-large sink capacity")
}
}
func TestLoadConfigNormalizesLegacyMeshAdvertiseTransport(t *testing.T) {
cfg, err := Load(nil, map[string]string{
"RAP_BACKEND_URL": "http://backend/api/v1",
"RAP_NODE_NAME": "node-a",
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443",
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
})
if err != nil {
t.Fatalf("Load returned error for legacy mesh advertise transport migration: %v", err)
}
if cfg.MeshAdvertiseTransport != "direct_quic" {
t.Fatalf("transport = %q, want direct_quic", cfg.MeshAdvertiseTransport)
}
}
func TestLoadConfigNormalizesLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
cfg, err := Load(nil, map[string]string{
"RAP_BACKEND_URL": "http://backend/api/v1",
"RAP_NODE_NAME": "node-a",
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443",
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
})
if err != nil {
t.Fatalf("Load returned error for legacy mesh advertise endpoint migration: %v", err)
}
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:443" {
t.Fatalf("endpoint = %q, want quic scheme", cfg.MeshAdvertiseEndpoint)
}
}
@@ -1,6 +1,9 @@
package fabricproto
import "errors"
import (
"crypto/sha256"
"errors"
)
var (
ErrUnsupportedSessionFrame = errors.New("unsupported fabric session frame")
@@ -62,6 +65,7 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
TrafficClass: frame.TrafficClass,
StreamID: frame.StreamID,
Sequence: frame.Sequence,
Payload: DataAckPayload(frame.Payload),
}}, nil
case FrameAck:
if err := s.Ack(frame.StreamID, frame.Sequence); err != nil {
@@ -103,6 +107,11 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
}
}
func DataAckPayload(payload []byte) []byte {
sum := sha256.Sum256(payload)
return sum[:]
}
func (s *Session) handleDataFrame(frame Frame) (SessionEvent, error) {
s.mu.Lock()
defer s.mu.Unlock()
@@ -1,6 +1,7 @@
package fabricproto
import (
"bytes"
"errors"
"testing"
)
@@ -36,6 +37,9 @@ func TestHandleFrameOpensStreamAndReceivesData(t *testing.T) {
if len(responses) != 1 || responses[0].Type != FrameAck || responses[0].StreamID != 7 || responses[0].Sequence != 11 {
t.Fatalf("responses = %+v, want ack for stream 7 seq 11", responses)
}
if !bytes.Equal(responses[0].Payload, DataAckPayload([]byte("rdp-input"))) {
t.Fatalf("ack checksum = %x, want sha256 payload checksum", responses[0].Payload)
}
snapshot := session.Snapshot()
if snapshot.FramesReceived != 1 || snapshot.Streams[7].Received != 1 {
t.Fatalf("received metrics = %+v stream=%+v", snapshot, snapshot.Streams[7])
@@ -136,6 +136,12 @@ func (cfg RuntimeConfig) ValidateInstall() error {
if cfg.MeshListenAutoPortStart > 0 && cfg.MeshListenAutoPortEnd > 0 && cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
return errors.New("mesh listen auto port start must be less than or equal to end")
}
if cfg.MeshAdvertiseTransport != "" && !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
return errors.New("mesh advertise transport must be a QUIC transport label")
}
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
return errors.New("mesh advertise endpoint must be a QUIC endpoint")
}
if cfg.ProductionObservationSinkCap < 0 {
return errors.New("production observation sink capacity must not be negative")
}
@@ -153,3 +159,20 @@ func firstNonEmpty(value, fallback string) string {
}
return strings.TrimSpace(value)
}
func isQUICAdvertiseTransport(label string) bool {
switch strings.ToLower(strings.TrimSpace(label)) {
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
return true
default:
return false
}
}
func hasLegacyEndpointScheme(endpoint string) bool {
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
return strings.HasPrefix(endpoint, "http://") ||
strings.HasPrefix(endpoint, "https://") ||
strings.HasPrefix(endpoint, "ws://") ||
strings.HasPrefix(endpoint, "wss://")
}
@@ -73,7 +73,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
VPNFabricQUICMaxStreamsPerConn: 24,
VPNFabricQUICIdleTTLSeconds: 120,
MeshListenAddr: ":19131",
MeshAdvertiseEndpoint: "http://10.0.0.11:19131/",
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443/",
MeshAdvertiseTransport: "direct_quic",
MeshConnectivityMode: "private_lan",
})
@@ -94,7 +95,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
"RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN=24",
"RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS=120",
"RAP_MESH_LISTEN_ADDR=:19131",
"RAP_MESH_ADVERTISE_ENDPOINT=http://10.0.0.11:19131",
"RAP_MESH_ADVERTISE_ENDPOINT=quic://10.0.0.11:19443",
"RAP_MESH_ADVERTISE_TRANSPORT=direct_quic",
"RAP_MESH_CONNECTIVITY_MODE=private_lan",
"rap-node-agent:test",
} {
@@ -384,3 +386,35 @@ func TestValidateRequiresJoinTokenUnlessReplacingExistingState(t *testing.T) {
t.Fatalf("replace update should allow missing join token: %v", err)
}
}
func TestValidateRejectsLegacyMeshAdvertiseTransport(t *testing.T) {
err := RuntimeConfig{
BackendURL: "http://control/api/v1",
ClusterID: "cluster-1",
JoinToken: "join-secret",
NodeName: "node-a",
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443",
MeshAdvertiseTransport: "wss",
MeshQUICFabricEnabled: true,
MeshQUICFabricListenAddr: ":19443",
}.ValidateInstall()
if err == nil || !strings.Contains(err.Error(), "QUIC transport") {
t.Fatalf("expected QUIC transport validation error, got %v", err)
}
}
func TestValidateRejectsLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
err := RuntimeConfig{
BackendURL: "http://control/api/v1",
ClusterID: "cluster-1",
JoinToken: "join-secret",
NodeName: "node-a",
MeshAdvertiseEndpoint: "http://10.0.0.11:19131",
MeshAdvertiseTransport: "direct_quic",
MeshQUICFabricEnabled: true,
MeshQUICFabricListenAddr: ":19443",
}.ValidateInstall()
if err == nil || !strings.Contains(err.Error(), "QUIC endpoint") {
t.Fatalf("expected QUIC endpoint validation error, got %v", err)
}
}
@@ -16,6 +16,7 @@ import (
"strings"
"time"
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
@@ -104,22 +105,37 @@ type NodeUpdatePlanResponse struct {
}
type NodeUpdatePlan struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
Product string `json:"product"`
CurrentVersion string `json:"current_version,omitempty"`
Action string `json:"action"`
Reason string `json:"reason"`
TargetVersion string `json:"target_version,omitempty"`
Channel string `json:"channel,omitempty"`
Strategy string `json:"strategy,omitempty"`
RollbackAllowed bool `json:"rollback_allowed"`
HealthWindowSec int `json:"health_window_seconds,omitempty"`
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
ProductionForwarding bool `json:"production_forwarding"`
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
Product string `json:"product"`
CurrentVersion string `json:"current_version,omitempty"`
Action string `json:"action"`
Reason string `json:"reason"`
TargetVersion string `json:"target_version,omitempty"`
Channel string `json:"channel,omitempty"`
Strategy string `json:"strategy,omitempty"`
RollbackAllowed bool `json:"rollback_allowed"`
HealthWindowSec int `json:"health_window_seconds,omitempty"`
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
AuthorityQuorum *clusterauth.QuorumEnvelope `json:"authority_quorum,omitempty"`
ProductionForwarding bool `json:"production_forwarding"`
}
type nodeUpdatePlanAuthorityPayload struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
Product string `json:"product"`
CurrentVersion string `json:"current_version,omitempty"`
Action string `json:"action"`
TargetVersion string `json:"target_version,omitempty"`
ArtifactSHA256 string `json:"artifact_sha256,omitempty"`
ArtifactURL string `json:"artifact_url,omitempty"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
}
type ReleaseArtifact struct {
@@ -516,9 +532,87 @@ func FetchNodeUpdatePlan(ctx context.Context, req UpdateRequest) (NodeUpdatePlan
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return NodeUpdatePlan{}, err
}
if err := verifyNodeUpdatePlanAuthority(req, out.Plan); err != nil {
return NodeUpdatePlan{}, err
}
return out.Plan, nil
}
func verifyNodeUpdatePlanAuthority(req UpdateRequest, plan NodeUpdatePlan) error {
identity, ok := pinnedUpdatePlanAuthority(req)
if !ok {
return nil
}
if len(identity.ClusterAuthorityQuorum) > 0 {
if plan.AuthorityQuorum == nil {
return errors.New("update plan quorum authority is required by pinned cluster quorum")
}
var descriptor clusterauth.QuorumDescriptor
if err := json.Unmarshal(identity.ClusterAuthorityQuorum, &descriptor); err != nil {
return fmt.Errorf("invalid pinned cluster authority quorum: %w", err)
}
if len(plan.AuthorityPayload) == 0 {
return errors.New("update plan authority payload is required by pinned cluster quorum")
}
if err := clusterauth.VerifyQuorumRaw(descriptor, plan.AuthorityPayload, *plan.AuthorityQuorum, "update-authority"); err != nil {
return fmt.Errorf("update plan quorum authority rejected: %w", err)
}
return verifyNodeUpdatePlanAuthorityPayload(plan)
}
if len(plan.AuthorityPayload) == 0 || len(plan.AuthoritySignature) == 0 {
return errors.New("update plan authority signature is required by pinned cluster authority")
}
var signature clusterauth.Signature
if err := json.Unmarshal(plan.AuthoritySignature, &signature); err != nil {
return fmt.Errorf("invalid update plan authority signature: %w", err)
}
if identity.ClusterAuthorityFingerprint != "" && signature.KeyFingerprint != identity.ClusterAuthorityFingerprint {
return errors.New("update plan authority fingerprint mismatch")
}
if err := clusterauth.VerifyRaw(identity.ClusterAuthorityPublicKey, plan.AuthorityPayload, signature); err != nil {
return fmt.Errorf("update plan authority signature rejected: %w", err)
}
return verifyNodeUpdatePlanAuthorityPayload(plan)
}
func verifyNodeUpdatePlanAuthorityPayload(plan NodeUpdatePlan) error {
var payload nodeUpdatePlanAuthorityPayload
if err := json.Unmarshal(plan.AuthorityPayload, &payload); err != nil {
return fmt.Errorf("invalid update plan authority payload: %w", err)
}
if payload.SchemaVersion != "rap.node_update_plan_authority.v1" ||
payload.ClusterID != plan.ClusterID ||
payload.NodeID != plan.NodeID ||
payload.Product != plan.Product ||
payload.CurrentVersion != plan.CurrentVersion ||
payload.Action != plan.Action ||
payload.TargetVersion != plan.TargetVersion ||
payload.ProductionForwarding != plan.ProductionForwarding {
return errors.New("update plan authority payload mismatch")
}
if plan.Artifact != nil {
if payload.ArtifactSHA256 != plan.Artifact.SHA256 || payload.ArtifactURL != plan.Artifact.URL {
return errors.New("update plan artifact authority payload mismatch")
}
}
return nil
}
func pinnedUpdatePlanAuthority(req UpdateRequest) (state.Identity, bool) {
stateDir := strings.TrimSpace(req.StateDir)
if stateDir == "" {
return state.Identity{}, false
}
identity, err := state.Load(filepath.Join(stateDir, state.FileName))
if err != nil {
return state.Identity{}, false
}
if strings.TrimSpace(identity.ClusterAuthorityPublicKey) == "" {
return state.Identity{}, false
}
return identity, true
}
func resolveUpdateRequest(req UpdateRequest) (UpdateRequest, error) {
req = req.Normalize()
if err := req.Validate(); err != nil {
@@ -2,6 +2,9 @@ package hostagent
import (
"context"
"crypto/ed25519"
cryptorand "crypto/rand"
"encoding/base64"
"encoding/json"
"fmt"
"net/http"
@@ -12,6 +15,7 @@ import (
"testing"
"time"
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
@@ -21,6 +25,101 @@ type updateRunner struct {
inspectJSON string
}
func writePinnedAuthorityIdentity(t *testing.T) (string, ed25519.PublicKey, ed25519.PrivateKey) {
t.Helper()
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
if err != nil {
t.Fatalf("generate authority key: %v", err)
}
dir := t.TempDir()
identity := state.Identity{
NodeID: "node-1",
ClusterID: "cluster-1",
NodeName: "node-a",
IdentityStatus: "active",
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
ClusterAuthorityFingerprint: clusterauth.Fingerprint(publicKey),
}
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
return dir, publicKey, privateKey
}
func writePinnedQuorumIdentity(t *testing.T) (string, clusterauth.QuorumDescriptor, []ed25519.PrivateKey) {
t.Helper()
descriptor := clusterauth.QuorumDescriptor{
SchemaVersion: clusterauth.QuorumSchemaVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 2,
}
privateKeys := make([]ed25519.PrivateKey, 0, 3)
for i := 0; i < 3; i++ {
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
if err != nil {
t.Fatalf("generate authority key: %v", err)
}
descriptor.Members = append(descriptor.Members, clusterauth.QuorumMember{
NodeID: fmt.Sprintf("authority-%d", i+1),
Role: "update-authority",
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
PublicKeyFingerprint: clusterauth.Fingerprint(publicKey),
Scopes: []string{"update-authority"},
})
privateKeys = append(privateKeys, privateKey)
}
rawQuorum, err := json.Marshal(descriptor)
if err != nil {
t.Fatalf("marshal quorum: %v", err)
}
dir := t.TempDir()
identity := state.Identity{
NodeID: "node-1",
ClusterID: "cluster-1",
NodeName: "node-a",
IdentityStatus: "active",
ClusterAuthorityQuorum: rawQuorum,
}
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
t.Fatalf("save identity: %v", err)
}
return dir, descriptor, privateKeys
}
func signedAuthorityPayload(t *testing.T, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey, payload any) (json.RawMessage, clusterauth.Signature) {
t.Helper()
raw, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
canonical, err := clusterauth.CanonicalJSON(raw)
if err != nil {
t.Fatalf("canonical payload: %v", err)
}
return raw, clusterauth.Signature{
SchemaVersion: clusterauth.SignatureSchemaVersion,
Algorithm: clusterauth.AlgorithmEd25519,
KeyFingerprint: clusterauth.Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
}
}
func signHostAgentPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) clusterauth.Signature {
t.Helper()
canonical, err := clusterauth.CanonicalJSON(payload)
if err != nil {
t.Fatalf("canonical payload: %v", err)
}
publicKey := privateKey.Public().(ed25519.PublicKey)
return clusterauth.Signature{
SchemaVersion: clusterauth.SignatureSchemaVersion,
Algorithm: clusterauth.AlgorithmEd25519,
KeyFingerprint: clusterauth.Fingerprint(publicKey),
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
}
}
func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.T) {
urls := artifactURLsForBackend(ReleaseArtifact{
URL: "/downloads/rap-node-agent-0.2.92.tar",
@@ -41,6 +140,161 @@ func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.
}
}
func TestFetchNodeUpdatePlanRejectsUnsignedPlanWithPinnedAuthority(t *testing.T) {
stateDir, _, _ := writePinnedAuthorityIdentity(t)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{
"node_update_plan": map[string]any{
"schema_version": "rap.node_update_plan.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": "0.1.0",
"action": "none",
"reason": "already_current",
"production_forwarding": false,
},
})
}))
defer server.Close()
_, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
BackendURL: server.URL,
ClusterID: "cluster-1",
NodeID: "node-1",
StateDir: stateDir,
CurrentVersion: "0.1.0",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err == nil || !strings.Contains(err.Error(), "authority signature is required") {
t.Fatalf("expected pinned authority rejection, got %v", err)
}
}
func TestFetchNodeUpdatePlanAcceptsSignedPlanWithPinnedAuthority(t *testing.T) {
stateDir, publicKey, privateKey := writePinnedAuthorityIdentity(t)
plan := map[string]any{
"schema_version": "rap.node_update_plan.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": "0.1.0",
"action": "none",
"reason": "already_current",
"production_forwarding": false,
}
payload := map[string]any{
"schema_version": "rap.node_update_plan_authority.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": "0.1.0",
"action": "none",
"target_version": "",
"artifact_sha256": "",
"control_plane_only": true,
"production_forwarding": false,
}
rawPayload, signature := signedAuthorityPayload(t, publicKey, privateKey, payload)
plan["authority_payload"] = json.RawMessage(rawPayload)
plan["authority_signature"] = signature
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
}))
defer server.Close()
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
BackendURL: server.URL,
ClusterID: "cluster-1",
NodeID: "node-1",
StateDir: stateDir,
CurrentVersion: "0.1.0",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("fetch signed plan: %v", err)
}
if got.Action != "none" || got.Reason != "already_current" {
t.Fatalf("unexpected plan: %+v", got)
}
}
func TestFetchNodeUpdatePlanAcceptsQuorumSignedPlan(t *testing.T) {
stateDir, descriptor, privateKeys := writePinnedQuorumIdentity(t)
plan := map[string]any{
"schema_version": "rap.node_update_plan.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": "0.1.0",
"action": "none",
"reason": "already_current",
"production_forwarding": false,
}
payload := map[string]any{
"schema_version": "rap.node_update_plan_authority.v1",
"cluster_id": "cluster-1",
"node_id": "node-1",
"product": "rap-node-agent",
"current_version": "0.1.0",
"action": "none",
"target_version": "",
"artifact_sha256": "",
"control_plane_only": true,
"production_forwarding": false,
}
rawPayload, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal payload: %v", err)
}
payloadHash, err := clusterauth.HashRaw(rawPayload)
if err != nil {
t.Fatalf("payload hash: %v", err)
}
quorumHash, err := clusterauth.QuorumDescriptorHash(descriptor)
if err != nil {
t.Fatalf("quorum hash: %v", err)
}
plan["authority_payload"] = json.RawMessage(rawPayload)
plan["authority_quorum"] = clusterauth.QuorumEnvelope{
SchemaVersion: clusterauth.QuorumEnvelopeVersion,
ClusterID: "cluster-1",
Epoch: "epoch-1",
Threshold: 2,
PayloadSHA256: payloadHash,
QuorumSHA256: quorumHash,
Signatures: []clusterauth.Signature{
signHostAgentPayload(t, rawPayload, privateKeys[0]),
signHostAgentPayload(t, rawPayload, privateKeys[1]),
},
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
}))
defer server.Close()
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
BackendURL: server.URL,
ClusterID: "cluster-1",
NodeID: "node-1",
StateDir: stateDir,
CurrentVersion: "0.1.0",
OS: "linux",
Arch: "amd64",
InstallType: "docker",
})
if err != nil {
t.Fatalf("fetch quorum plan: %v", err)
}
if got.Action != "none" {
t.Fatalf("unexpected plan: %+v", got)
}
}
func (r *updateRunner) Run(_ context.Context, name string, args ...string) (string, error) {
r.calls = append(r.calls, append([]string{name}, args...))
if len(args) >= 2 && args[0] == "inspect" && args[1] == "--format" {
@@ -11,8 +11,9 @@ import (
func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -37,8 +38,9 @@ func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -80,8 +82,9 @@ func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -135,8 +138,9 @@ func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -202,8 +206,9 @@ func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
func TestClientFabricSessionReportsRejectedStatus(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -72,6 +72,10 @@ const (
MaxProductionEnvelopePayloadBytes = 4096
MaxProductionVPNPacketPayloadBytes = 256 * 1024
MaxProductionEnvelopeFutureSkew = time.Minute
ProductionForwardQUICStreamID = 1
WebIngressForwardQUICStreamID = 2
FabricControlForwardQUICStreamID = 3
SyntheticForwardQUICStreamID = 1001
)
type PeerIdentity struct {
@@ -47,6 +47,9 @@ func RankPeerEndpointCandidates(candidates []PeerEndpointCandidate, opts Endpoin
}
out := make([]ScoredPeerEndpointCandidate, 0, len(candidates))
for _, candidate := range candidates {
if endpointHasUnspecifiedHost(candidate.Address) {
continue
}
out = append(out, scorePeerEndpointCandidate(candidate, opts))
}
sort.SliceStable(out, func(i, j int) bool {
@@ -68,25 +71,25 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
score := 100
reasons := []string{"base"}
switch candidate.Transport {
switch strings.ToLower(strings.TrimSpace(candidate.Transport)) {
case "quic", "direct_quic", "udp_quic", "quic_udp":
score += 45
reasons = append(reasons, "transport:quic")
case "direct_tcp_tls", "direct_http", "direct_https":
score += 35
reasons = append(reasons, "transport:direct")
case "wss":
score += 25
reasons = append(reasons, "transport:wss")
case "outbound_reverse":
score += 10
reasons = append(reasons, "transport:outbound_reverse")
case "relay":
case "lan_quic":
score += 42
reasons = append(reasons, "transport:lan_quic")
case "ice_quic":
score += 38
reasons = append(reasons, "transport:ice_quic")
case "reverse_quic":
score += 15
reasons = append(reasons, "transport:reverse_quic")
case "relay_quic":
score += 5
reasons = append(reasons, "transport:relay")
reasons = append(reasons, "transport:relay_quic")
default:
score -= 100
reasons = append(reasons, "transport:unknown")
reasons = append(reasons, "transport:non_quic_rejected")
}
switch candidate.Reachability {
@@ -173,7 +176,8 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
score += 8
reasons = append(reasons, "channel:control-direct")
}
if candidate.Transport == "relay" {
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
if transport == "relay" || transport == "relay_quic" {
score -= 8
reasons = append(reasons, "channel:control-relay-penalty")
}
@@ -234,14 +238,20 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
}
switch {
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
score += 18
score += 24
reasons = append(reasons, "latency:low")
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 150:
score += 8
reasons = append(reasons, "latency:moderate")
case observation.LastLatencyMs > 0:
score -= 10
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 300:
score -= 12
reasons = append(reasons, "latency:high")
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 750:
score -= 32
reasons = append(reasons, "latency:very_high")
case observation.LastLatencyMs > 0:
score -= 60
reasons = append(reasons, "latency:extreme")
}
if observation.ReliabilityScore > 0 {
switch {
@@ -13,7 +13,7 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay",
Transport: "relay_quic",
Address: "relay.example.test/node-b",
Reachability: "relay",
NATType: "symmetric",
@@ -25,8 +25,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -38,8 +38,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
{
EndpointID: "node-b-private-stale",
NodeID: "node-b",
Transport: "wss",
Address: "10.0.0.5:443",
Transport: "lan_quic",
Address: "quic://10.0.0.5:19443",
Reachability: "private",
NATType: "restricted",
ConnectivityMode: "direct",
@@ -74,8 +74,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
{
EndpointID: "endpoint-b",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.21:443",
Transport: "direct_quic",
Address: "quic://203.0.113.21:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -84,8 +84,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
{
EndpointID: "endpoint-a",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -103,10 +103,10 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
candidates := []PeerEndpointCandidate{
{
EndpointID: "node-b-wss",
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "wss",
Address: "wss://node-b.example.test",
Transport: "relay_quic",
Address: "quic://relay.example.test:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -138,14 +138,44 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
}
}
func TestRankPeerEndpointCandidatesDropsUnspecifiedQUICEndpoint(t *testing.T) {
candidates := []PeerEndpointCandidate{
{
EndpointID: "node-b-unspecified",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://[::]:19131",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
Priority: 1,
},
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19131",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
Priority: 10,
},
}
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{})
if len(ranked) != 1 || ranked[0].Candidate.EndpointID != "node-b-public" {
t.Fatalf("unspecified endpoint was not dropped: %+v", ranked)
}
}
func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
candidates := []PeerEndpointCandidate{
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -155,8 +185,8 @@ func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T)
{
EndpointID: "node-b-corp-lan",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "10.24.10.20:19001",
Transport: "lan_quic",
Address: "quic://10.24.10.20:19443",
Reachability: "private",
NATType: "none",
ConnectivityMode: "direct",
@@ -184,7 +214,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
{
EndpointID: "node-b-outbound",
NodeID: "node-b",
Transport: "outbound_reverse",
Transport: "reverse_quic",
Address: "node-b.reverse.local",
Reachability: "outbound_only",
NATType: "symmetric",
@@ -194,7 +224,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay",
Transport: "relay_quic",
Address: "relay.example.test/node-b",
Reachability: "relay",
NATType: "blocked",
@@ -222,18 +252,18 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
{
EndpointID: "node-b-direct",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
Priority: 10,
},
{
EndpointID: "node-b-wss",
EndpointID: "node-b-ice",
NodeID: "node-b",
Transport: "wss",
Address: "node-b.example.test",
Transport: "ice_quic",
Address: "quic://node-b.example.test:19443",
Reachability: "public",
NATType: "restricted",
ConnectivityMode: "direct",
@@ -253,8 +283,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
ReliabilityScore: 50,
ObservedAt: now.Add(-time.Minute),
},
"node-b-wss": {
EndpointID: "node-b-wss",
"node-b-ice": {
EndpointID: "node-b-ice",
LastLatencyMs: 35,
SuccessCount: 8,
ReliabilityScore: 95,
@@ -262,8 +292,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
},
},
})
if ranked[0].Candidate.EndpointID != "node-b-wss" {
t.Fatalf("top endpoint = %q, want node-b-wss: %+v", ranked[0].Candidate.EndpointID, ranked)
if ranked[0].Candidate.EndpointID != "node-b-ice" {
t.Fatalf("top endpoint = %q, want node-b-ice: %+v", ranked[0].Candidate.EndpointID, ranked)
}
if !containsReason(ranked[0].Reasons, "latency:low") || !containsReason(ranked[0].Reasons, "reliability:high") {
t.Fatalf("top reasons missing health hints: %+v", ranked[0].Reasons)
@@ -279,8 +309,8 @@ func TestRankPeerEndpointCandidatesTreatsStaleObservationAsPenalty(t *testing.T)
{
EndpointID: "node-b-direct",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -321,10 +351,10 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-wss",
EndpointID: "node-b-ice",
NodeID: "node-b",
Transport: "wss",
Address: "https://node-b.example.test:443",
Transport: "ice_quic",
Address: "quic://node-b.example.test:19444",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
@@ -345,14 +375,81 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
},
MaxObservationAge: time.Minute,
})
if ranked[0].Candidate.EndpointID != "node-b-wss" {
t.Fatalf("top endpoint = %q, want wss after repeated quic failures: %+v", ranked[0].Candidate.EndpointID, ranked)
if ranked[0].Candidate.EndpointID != "node-b-ice" {
t.Fatalf("top endpoint = %q, want ice_quic after repeated direct QUIC failures: %+v", ranked[0].Candidate.EndpointID, ranked)
}
if containsReason(ranked[1].Reasons, "latency:moderate") {
t.Fatalf("zero latency failure was rewarded as moderate latency: %+v", ranked[1].Reasons)
}
}
func TestRankPeerEndpointCandidatesPenalizesSevereLatencyGradient(t *testing.T) {
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
candidates := []PeerEndpointCandidate{
{
EndpointID: "node-b-lan",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://10.0.0.2:19443",
Reachability: "private",
ConnectivityMode: "direct",
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-wan",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
ConnectivityMode: "direct",
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-bad-relay",
NodeID: "node-b",
Transport: "relay_quic",
Address: "quic://relay.example.test:19443",
Reachability: "relay",
ConnectivityMode: "relay_required",
LastVerifiedAt: &now,
},
}
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
Now: now,
MaxVerificationAge: time.Minute,
MaxObservationAge: time.Minute,
Observations: map[string]EndpointCandidateHealthObservation{
"node-b-lan": {
EndpointID: "node-b-lan",
LastLatencyMs: 4,
ReliabilityScore: 95,
ObservedAt: now,
},
"node-b-wan": {
EndpointID: "node-b-wan",
LastLatencyMs: 420,
ReliabilityScore: 95,
ObservedAt: now,
},
"node-b-bad-relay": {
EndpointID: "node-b-bad-relay",
LastLatencyMs: 900,
ReliabilityScore: 95,
ObservedAt: now,
},
},
})
if ranked[0].Candidate.EndpointID != "node-b-lan" || ranked[1].Candidate.EndpointID != "node-b-wan" || ranked[2].Candidate.EndpointID != "node-b-bad-relay" {
t.Fatalf("ranked endpoints = %+v, want lan, wan, bad relay", ranked)
}
if !containsReason(ranked[1].Reasons, "latency:very_high") {
t.Fatalf("wan reasons = %+v, want latency:very_high", ranked[1].Reasons)
}
if !containsReason(ranked[2].Reasons, "latency:extreme") {
t.Fatalf("relay reasons = %+v, want latency:extreme", ranked[2].Reasons)
}
}
func TestRankPeerEndpointCandidatesTreatsCapacityAsSoftPressure(t *testing.T) {
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
@@ -0,0 +1,217 @@
package mesh
import (
"errors"
"strings"
"time"
)
type FabricChannelRouteEventType string
const (
FabricChannelRouteEventNone FabricChannelRouteEventType = ""
FabricChannelRouteEventOpened FabricChannelRouteEventType = "opened"
FabricChannelRouteEventReroute FabricChannelRouteEventType = "reroute"
)
var ErrFabricRouteRerouteSuppressed = errors.New("fabric route reroute suppressed")
type FabricChannelRouterConfig struct {
SchedulerConfig FabricRouteSchedulerConfig
MaxAckLatencyMs int64
MaxRoutePressure int
MinRerouteInterval time.Duration
ProjectedChannelCost int
}
type FabricChannelRouter struct {
Config FabricChannelRouterConfig
Scheduler FabricRouteScheduler
}
type FabricChannelObservation struct {
ChannelID string
RouteID string
AckLatencyMs int64
Failed bool
BytesSent uint64
BytesRecv uint64
FramesSent uint64
FramesRecv uint64
Reason string
ObservedAt time.Time
}
type FabricChannelRouteEvent struct {
Type FabricChannelRouteEventType
Reason string
PreviousRoute FabricRoute
NextRoute FabricRoute
Choice FabricRouteChoice
Observation FabricChannelObservation
Channel FabricChannel
OccurredAt time.Time
}
func NewFabricChannelRouter(cfg FabricChannelRouterConfig) FabricChannelRouter {
cfg = normalizeFabricChannelRouterConfig(cfg)
return FabricChannelRouter{
Config: cfg,
Scheduler: NewFabricRouteScheduler(cfg.SchedulerConfig),
}
}
func (r FabricChannelRouter) OpenChannel(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
if now.IsZero() {
now = time.Now().UTC()
}
choice, err := r.Scheduler.ChooseRoute(spec, routeSet, now)
if err != nil {
return FabricChannel{}, FabricChannelRouteEvent{}, err
}
channel := FabricChannel{
Spec: spec,
State: FabricChannelOpen,
RouteID: choice.Route.RouteID,
TargetNode: choice.Route.DestinationNodeID,
OpenedAt: now,
}
event := FabricChannelRouteEvent{
Type: FabricChannelRouteEventOpened,
Reason: choice.Reason,
NextRoute: choice.Route,
Choice: choice,
Channel: channel,
OccurredAt: now,
}
return channel, event, nil
}
func (r FabricChannelRouter) ObserveChannel(channel FabricChannel, routeSet FabricRouteSet, observation FabricChannelObservation, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
if now.IsZero() {
now = time.Now().UTC()
}
if observation.ObservedAt.IsZero() {
observation.ObservedAt = now
}
channel.BytesSent += observation.BytesSent
channel.BytesRecv += observation.BytesRecv
channel.FramesSent += observation.FramesSent
channel.FramesRecv += observation.FramesRecv
if channel.State == "" {
channel.State = FabricChannelOpen
}
if !r.shouldReroute(channel, observation, routeSet, now) {
return channel, FabricChannelRouteEvent{Type: FabricChannelRouteEventNone, Observation: observation, Channel: channel, OccurredAt: now}, nil
}
previous, _ := findFabricRoute(routeSet, channel.RouteID)
choice, err := r.chooseAlternativeRoute(channel.Spec, routeSet, channel.RouteID, now)
if err != nil {
return channel, FabricChannelRouteEvent{}, err
}
channel.RouteID = choice.Route.RouteID
channel.TargetNode = choice.Route.DestinationNodeID
channel.LastReroute = now
channel.RerouteCount++
reason := observation.Reason
if strings.TrimSpace(reason) == "" {
reason = rerouteReason(r.Config, observation, previous)
}
event := FabricChannelRouteEvent{
Type: FabricChannelRouteEventReroute,
Reason: reason,
PreviousRoute: previous,
NextRoute: choice.Route,
Choice: choice,
Observation: observation,
Channel: channel,
OccurredAt: now,
}
return channel, event, nil
}
func (r FabricChannelRouter) shouldReroute(channel FabricChannel, observation FabricChannelObservation, routeSet FabricRouteSet, now time.Time) bool {
cfg := normalizeFabricChannelRouterConfig(r.Config)
if cfg.MinRerouteInterval > 0 && !channel.LastReroute.IsZero() && now.Sub(channel.LastReroute) < cfg.MinRerouteInterval {
return false
}
if observation.Failed {
return true
}
if cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs {
return true
}
if cfg.MaxRoutePressure > 0 {
if route, ok := findFabricRoute(routeSet, channel.RouteID); ok && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure {
return true
}
}
return false
}
func (r FabricChannelRouter) chooseAlternativeRoute(spec FabricChannelSpec, routeSet FabricRouteSet, currentRouteID string, now time.Time) (FabricRouteChoice, error) {
routes := flattenFabricRouteSet(routeSet)
alternatives := make([]FabricRoute, 0, len(routes))
for _, route := range routes {
if route.RouteID == currentRouteID {
continue
}
alternatives = append(alternatives, route)
}
if len(alternatives) == 0 {
return FabricRouteChoice{}, ErrFabricRouteNotFound
}
return r.Scheduler.ChooseRoute(spec, routeSetFromRoutes(routeSet, alternatives), now)
}
func normalizeFabricChannelRouterConfig(cfg FabricChannelRouterConfig) FabricChannelRouterConfig {
if cfg.ProjectedChannelCost <= 0 {
cfg.ProjectedChannelCost = 1
}
if cfg.SchedulerConfig.ProjectedChannelCost <= 0 {
cfg.SchedulerConfig.ProjectedChannelCost = cfg.ProjectedChannelCost
}
if cfg.MaxRoutePressure <= 0 {
cfg.MaxRoutePressure = 90
}
return cfg
}
func rerouteReason(cfg FabricChannelRouterConfig, observation FabricChannelObservation, route FabricRoute) string {
cfg = normalizeFabricChannelRouterConfig(cfg)
switch {
case observation.Failed:
return "route_failure"
case cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs:
return "ack_latency_threshold"
case cfg.MaxRoutePressure > 0 && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure:
return "route_capacity_pressure"
default:
return "route_degraded"
}
}
func findFabricRoute(routeSet FabricRouteSet, routeID string) (FabricRoute, bool) {
routeID = strings.TrimSpace(routeID)
if routeID == "" {
return FabricRoute{}, false
}
for _, route := range flattenFabricRouteSet(routeSet) {
if route.RouteID == routeID {
return route, true
}
}
return FabricRoute{}, false
}
func routeSetFromRoutes(template FabricRouteSet, routes []FabricRoute) FabricRouteSet {
out := FabricRouteSet{TargetKind: template.TargetKind, TargetID: template.TargetID}
if len(routes) == 0 {
return out
}
out.Primary = routes[0]
if len(routes) > 1 {
out.WarmStandby = append(out.WarmStandby, routes[1:]...)
}
return out
}
@@ -0,0 +1,151 @@
package mesh
import (
"testing"
"time"
)
func TestFabricChannelRouterOpensOnBestRoute(t *testing.T) {
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
now := time.Now()
channel, event, err := router.OpenChannel(testFabricChannelSpec(FabricChannelTargetNode, "node-b"), FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testFabricRoute("route-slow", "node-b", 80, 100, 0, true),
WarmStandby: []FabricRoute{
testFabricRoute("route-fast", "node-b", 15, 100, 0, true),
},
}, now)
if err != nil {
t.Fatalf("open channel: %v", err)
}
if channel.RouteID != "route-fast" || channel.State != FabricChannelOpen {
t.Fatalf("channel = %+v, want route-fast open", channel)
}
if event.Type != FabricChannelRouteEventOpened || event.NextRoute.RouteID != "route-fast" {
t.Fatalf("event = %+v", event)
}
}
func TestFabricChannelRouterReroutesOnSlowAck(t *testing.T) {
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30})
now := time.Now()
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
WarmStandby: []FabricRoute{
testFabricRoute("route-standby", "node-b", 20, 100, 0, true),
},
}
channel := FabricChannel{
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
State: FabricChannelOpen,
RouteID: "route-primary",
OpenedAt: now.Add(-time.Minute),
}
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: channel.Spec.ChannelID,
RouteID: channel.RouteID,
AckLatencyMs: 120,
BytesSent: 4096,
FramesSent: 4,
}, now)
if err != nil {
t.Fatalf("observe channel: %v", err)
}
if event.Type != FabricChannelRouteEventReroute || event.Reason != "ack_latency_threshold" {
t.Fatalf("event = %+v", event)
}
if updated.RouteID != "route-standby" || updated.RerouteCount != 1 || updated.BytesSent != 4096 || updated.FramesSent != 4 {
t.Fatalf("updated = %+v", updated)
}
}
func TestFabricChannelRouterReroutesPoolTargetOnFailure(t *testing.T) {
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
now := time.Now()
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetPool,
TargetID: "pool-egress",
Primary: testFabricPoolRoute("route-node-b", "node-b", 10, true),
WarmStandby: []FabricRoute{
testFabricPoolRoute("route-node-c", "node-c", 20, true),
},
}
channel := FabricChannel{
Spec: testFabricChannelSpec(FabricChannelTargetPool, "pool-egress"),
State: FabricChannelOpen,
RouteID: "route-node-b",
TargetNode: "node-b",
OpenedAt: now.Add(-time.Minute),
}
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: channel.Spec.ChannelID,
RouteID: channel.RouteID,
Failed: true,
Reason: "target_failed",
}, now)
if err != nil {
t.Fatalf("observe channel: %v", err)
}
if event.Type != FabricChannelRouteEventReroute || event.PreviousRoute.RouteID != "route-node-b" || event.NextRoute.RouteID != "route-node-c" {
t.Fatalf("event = %+v", event)
}
if updated.TargetNode != "node-c" || updated.RouteID != "route-node-c" {
t.Fatalf("updated = %+v", updated)
}
}
func TestFabricChannelRouterSuppressesRerouteInsideHysteresis(t *testing.T) {
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30, MinRerouteInterval: time.Minute})
now := time.Now()
channel := FabricChannel{
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
State: FabricChannelOpen,
RouteID: "route-primary",
LastReroute: now.Add(-10 * time.Second),
}
updated, event, err := router.ObserveChannel(channel, FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
WarmStandby: []FabricRoute{testFabricRoute("route-standby", "node-b", 20, 100, 0, true)},
}, FabricChannelObservation{AckLatencyMs: 120}, now)
if err != nil {
t.Fatalf("observe channel: %v", err)
}
if event.Type != FabricChannelRouteEventNone || updated.RouteID != "route-primary" {
t.Fatalf("event=%+v updated=%+v", event, updated)
}
}
func testFabricChannelSpec(kind FabricChannelTargetKind, targetID string) FabricChannelSpec {
return FabricChannelSpec{
ChannelID: "channel-1",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: kind,
TargetID: targetID,
}
}
func testFabricRoute(routeID string, destination string, latency int, capacity int, active int, healthy bool) FabricRoute {
return FabricRoute{
RouteID: routeID,
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: destination,
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: destination}},
BaseLatencyMs: latency,
Capacity: capacity,
ActiveChannels: active,
Healthy: healthy,
}
}
func testFabricPoolRoute(routeID string, destination string, latency int, healthy bool) FabricRoute {
route := testFabricRoute(routeID, destination, latency, 100, 0, healthy)
route.PoolID = "pool-egress"
return route
}
@@ -0,0 +1,487 @@
package mesh
import (
"context"
"fmt"
"strings"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
type FabricChannelRuntimeConfig struct {
RouterConfig FabricChannelRouterConfig
StreamID uint64
TrafficClass fabricproto.TrafficClass
Timeout time.Duration
MaxPayload int
RouteHealthTTL time.Duration
}
type FabricChannelRuntime struct {
Transport FabricTransport
Router FabricChannelRouter
Pressure *FabricRoutePressureTracker
Health *FabricRouteHealthTracker
Config FabricChannelRuntimeConfig
}
type FabricChannelRuntimeResult struct {
Channel FabricChannel
BytesSent uint64
BytesRecv uint64
FramesSent uint64
FramesRecv uint64
AcksReceived uint64
RouteEvents []FabricChannelRouteEvent
RouteAttempts []string
MigrationEvents int
RoutePressure FabricRoutePressureSnapshot
RouteHealth FabricRouteHealthSnapshot
}
type FabricChannelRequestResponseResult struct {
FabricChannelRuntimeResult
ResponsePayload []byte
}
func NewFabricChannelRuntime(transport FabricTransport, cfg FabricChannelRuntimeConfig) *FabricChannelRuntime {
if cfg.StreamID == 0 {
cfg.StreamID = 2
}
if cfg.TrafficClass == 0 {
cfg.TrafficClass = fabricproto.TrafficClassBulk
}
if cfg.Timeout <= 0 {
cfg.Timeout = 30 * time.Second
}
if cfg.MaxPayload <= 0 {
cfg.MaxPayload = fabricproto.DefaultMaxPayload
}
return &FabricChannelRuntime{
Transport: transport,
Router: NewFabricChannelRouter(cfg.RouterConfig),
Pressure: NewFabricRoutePressureTracker(),
Health: NewFabricRouteHealthTracker(cfg.RouteHealthTTL),
Config: cfg,
}
}
func (r *FabricChannelRuntime) SendReliable(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payloads [][]byte) (FabricChannelRuntimeResult, error) {
if r == nil || r.Transport == nil {
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
}
now := time.Now().UTC()
routeSet = r.routeSetForScheduling(routeSet)
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
if err != nil {
return FabricChannelRuntimeResult{}, err
}
result := FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}}
sequence := uint64(0)
index := 0
for index < len(payloads) {
routeSet = r.routeSetForScheduling(routeSet)
route, ok := findFabricRoute(routeSet, channel.RouteID)
if !ok {
return result, ErrFabricRouteNotFound
}
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
target, err := FabricTransportTargetForRoute(route)
if err != nil {
return result, err
}
releaseRoute := r.acquireRoute(route.RouteID)
session, err := r.Transport.Connect(ctx, target)
if err != nil {
releaseRoute()
r.markRouteFailure(route.RouteID, err)
updated, event, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "connect_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
result.Channel = channel
if event.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, event)
result.MigrationEvents++
continue
}
if rerouteErr != nil {
return result, rerouteErr
}
return result, err
}
migrated, sendErr := r.sendOnSession(ctx, session, &channel, routeSet, route, payloads, &index, &sequence, &result)
_ = session.Close()
releaseRoute()
result.Channel = channel
if sendErr != nil {
return result, sendErr
}
if !migrated {
break
}
}
result.Channel = channel
result.RoutePressure = r.snapshotRoutePressure()
result.RouteHealth = r.snapshotRouteHealth()
return result, nil
}
func (r *FabricChannelRuntime) SendRequestResponse(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (FabricChannelRequestResponseResult, error) {
if r == nil || r.Transport == nil {
return FabricChannelRequestResponseResult{}, ErrForwardRuntimeUnavailable
}
if len(payload) > r.Config.MaxPayload {
return FabricChannelRequestResponseResult{}, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), r.Config.MaxPayload)
}
now := time.Now().UTC()
routeSet = r.routeSetForScheduling(routeSet)
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
if err != nil {
return FabricChannelRequestResponseResult{}, err
}
result := FabricChannelRequestResponseResult{
FabricChannelRuntimeResult: FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}},
}
sequence := uint64(1)
for {
routeSet = r.routeSetForScheduling(routeSet)
route, ok := findFabricRoute(routeSet, channel.RouteID)
if !ok {
return result, ErrFabricRouteNotFound
}
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
target, err := FabricTransportTargetForRoute(route)
if err != nil {
return result, err
}
releaseRoute := r.acquireRoute(route.RouteID)
session, err := r.Transport.Connect(ctx, target)
if err != nil {
releaseRoute()
r.markRouteFailure(route.RouteID, err)
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "connect_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
result.Channel = channel
if routeEvent.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, routeEvent)
result.MigrationEvents++
continue
}
if rerouteErr != nil {
return result, rerouteErr
}
return result, err
}
response, ackMs, sendErr := r.sendRequestResponseOnSession(ctx, session, route.RouteID, spec.ChannelID, payload, sequence)
_ = session.Close()
releaseRoute()
result.Channel = channel
if sendErr == nil {
r.markRouteSuccess(route.RouteID)
result.BytesSent += uint64(len(payload))
result.FramesSent++
result.BytesRecv += uint64(len(response))
result.FramesRecv++
result.AcksReceived++
updated, routeEvent, observeErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
AckLatencyMs: ackMs,
BytesSent: uint64(len(payload)),
FramesSent: 1,
BytesRecv: uint64(len(response)),
FramesRecv: 1,
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
result.Channel = channel
if observeErr != nil {
return result, observeErr
}
if routeEvent.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, routeEvent)
result.MigrationEvents++
}
result.ResponsePayload = response
result.RoutePressure = r.snapshotRoutePressure()
result.RouteHealth = r.snapshotRouteHealth()
return result, nil
}
r.markRouteFailure(route.RouteID, sendErr)
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "response_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
result.Channel = channel
if routeEvent.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, routeEvent)
result.MigrationEvents++
continue
}
if rerouteErr != nil {
return result, rerouteErr
}
return result, sendErr
}
}
func (r *FabricChannelRuntime) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
if r != nil && r.Health != nil {
routeSet = r.Health.Apply(routeSet, time.Now().UTC())
}
return r.routeSetWithActiveChannels(routeSet)
}
func (r *FabricChannelRuntime) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
if r == nil || r.Pressure == nil {
return routeSet
}
return r.Pressure.Apply(routeSet)
}
func (r *FabricChannelRuntime) acquireRoute(routeID string) func() {
if r == nil || r.Pressure == nil {
return func() {}
}
return r.Pressure.Acquire(routeID)
}
func (r *FabricChannelRuntime) snapshotRoutePressure() FabricRoutePressureSnapshot {
if r == nil || r.Pressure == nil {
return FabricRoutePressureSnapshot{}
}
return r.Pressure.SnapshotPressure()
}
func (r *FabricChannelRuntime) snapshotRouteHealth() FabricRouteHealthSnapshot {
if r == nil || r.Health == nil {
return FabricRouteHealthSnapshot{}
}
return r.Health.Snapshot(time.Now().UTC())
}
func (r *FabricChannelRuntime) markRouteFailure(routeID string, err error) {
if r == nil || r.Health == nil || err == nil {
return
}
r.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
}
func (r *FabricChannelRuntime) markRouteSuccess(routeID string) {
if r == nil || r.Health == nil {
return
}
r.Health.MarkSuccess(routeID)
}
func (r *FabricChannelRuntime) sendOnSession(ctx context.Context, session FabricTransportSession, channel *FabricChannel, routeSet FabricRouteSet, route FabricRoute, payloads [][]byte, index *int, sequence *uint64, result *FabricChannelRuntimeResult) (bool, error) {
cfg := r.Config
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameOpenStream,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
}); err != nil {
r.markRouteFailure(route.RouteID, err)
return false, err
}
for *index < len(payloads) {
payload := payloads[*index]
if len(payload) > cfg.MaxPayload {
return false, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), cfg.MaxPayload)
}
(*sequence)++
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
Sequence: *sequence,
Payload: payload,
}); err != nil {
r.markRouteFailure(route.RouteID, err)
return false, err
}
ackOK, ackMs := waitForFabricRuntimeAck(ctx, session, cfg.StreamID, *sequence, cfg.Timeout)
if !ackOK {
r.markRouteFailure(route.RouteID, fmt.Errorf("ack_failed"))
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
ChannelID: channel.Spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "ack_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
*channel = updated
if event.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, event)
result.MigrationEvents++
return true, nil
}
return false, err
}
r.markRouteSuccess(route.RouteID)
*index++
result.BytesSent += uint64(len(payload))
result.FramesSent++
result.AcksReceived++
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
ChannelID: channel.Spec.ChannelID,
RouteID: route.RouteID,
AckLatencyMs: ackMs,
BytesSent: uint64(len(payload)),
FramesSent: 1,
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
*channel = updated
if err != nil {
return false, err
}
if event.Type == FabricChannelRouteEventReroute {
result.RouteEvents = append(result.RouteEvents, event)
result.MigrationEvents++
return true, nil
}
}
_ = session.Send(context.Background(), fabricproto.Frame{
Type: fabricproto.FrameCloseStream,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
})
return false, nil
}
func (r *FabricChannelRuntime) sendRequestResponseOnSession(ctx context.Context, session FabricTransportSession, routeID string, channelID string, payload []byte, sequence uint64) ([]byte, int64, error) {
cfg := r.Config
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameOpenStream,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
}); err != nil {
r.markRouteFailure(routeID, err)
return nil, 0, err
}
started := time.Now()
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
Sequence: sequence,
Payload: payload,
}); err != nil {
r.markRouteFailure(routeID, err)
return nil, 0, err
}
waitCtx := ctx
if cfg.Timeout > 0 {
var cancel context.CancelFunc
waitCtx, cancel = context.WithTimeout(ctx, cfg.Timeout)
defer cancel()
}
for {
select {
case <-waitCtx.Done():
return nil, 0, waitCtx.Err()
case err, ok := <-session.Errors():
if !ok {
return nil, 0, ErrForwardPeerUnavailable
}
if err != nil {
return nil, 0, err
}
case frame, ok := <-session.Frames():
if !ok {
return nil, 0, ErrForwardPeerUnavailable
}
if frame.Type != fabricproto.FrameData || frame.StreamID != cfg.StreamID || frame.Sequence != sequence {
continue
}
_ = session.Send(context.Background(), fabricproto.Frame{
Type: fabricproto.FrameCloseStream,
TrafficClass: cfg.TrafficClass,
StreamID: cfg.StreamID,
})
return append([]byte(nil), frame.Payload...), time.Since(started).Milliseconds(), nil
}
}
}
func FabricTransportTargetForRoute(route FabricRoute) (FabricTransportTarget, error) {
if strings.TrimSpace(route.RouteID) == "" {
return FabricTransportTarget{}, ErrFabricRouteNotFound
}
if route.RelayCount > 0 {
for _, hop := range route.Hops {
if hop.Mode != FabricRouteRelay {
continue
}
if target, ok := fabricTransportTargetForHop(hop); ok {
return target, nil
}
}
}
for i := len(route.Hops) - 1; i >= 0; i-- {
if target, ok := fabricTransportTargetForHop(route.Hops[i]); ok {
return target, nil
}
}
return FabricTransportTarget{}, fmt.Errorf("%w: route %s has no transport endpoint", ErrFabricRouteNotFound, route.RouteID)
}
func fabricTransportTargetForHop(hop FabricRouteHop) (FabricTransportTarget, bool) {
endpoint := strings.TrimSpace(hop.Address)
if endpoint == "" {
return FabricTransportTarget{}, false
}
transport := string(hop.Mode)
if transport == "" {
transport = "quic"
}
return FabricTransportTarget{
EndpointID: hop.EndpointID,
PeerID: strings.TrimSpace(hop.NodeID),
Endpoint: endpoint,
Transport: transport,
PeerCertSHA256: strings.TrimSpace(hop.PeerCertSHA256),
}, true
}
func waitForFabricRuntimeAck(ctx context.Context, session FabricTransportSession, streamID uint64, sequence uint64, timeout time.Duration) (bool, int64) {
started := time.Now()
if timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
for {
select {
case <-ctx.Done():
return false, 0
case err, ok := <-session.Errors():
if !ok || err != nil {
return false, 0
}
case frame, ok := <-session.Frames():
if !ok {
return false, 0
}
if frame.Type == fabricproto.FrameAck && frame.StreamID == streamID && frame.Sequence == sequence {
return true, time.Since(started).Milliseconds()
}
}
}
}
@@ -0,0 +1,495 @@
package mesh
import (
"context"
"strings"
"sync"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
func TestFabricChannelRuntimeMigratesSlowAckToStandbyRoute(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://slow.example.test:19443": 60 * time.Millisecond,
"quic://fast.example.test:19443": 0,
})
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
},
}
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{
[]byte("one"),
[]byte("two"),
[]byte("three"),
})
if err != nil {
t.Fatalf("send reliable: %v", err)
}
if result.MigrationEvents != 1 {
t.Fatalf("migration events = %d, want 1: %+v", result.MigrationEvents, result.RouteEvents)
}
if result.Channel.RouteID != "route-fast" || result.Channel.RerouteCount != 1 {
t.Fatalf("channel = %+v", result.Channel)
}
if result.BytesSent != uint64(len("one")+len("two")+len("three")) || result.AcksReceived != 3 {
t.Fatalf("result = %+v", result)
}
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
t.Fatalf("slow connect count = %d, want 1", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
t.Fatalf("fast connect count = %d, want 1", got)
}
if result.RoutePressure.AcquiredTotal != 2 || result.RoutePressure.ReleasedTotal != 2 || result.RoutePressure.MaxActiveTotal == 0 {
t.Fatalf("route pressure = %+v", result.RoutePressure)
}
}
func TestFabricChannelRuntimeReroutesOnConnectFailure(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://fast.example.test:19443": 0,
})
transport.failConnect["quic://dead.example.test:19443"] = true
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
},
}
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
if err != nil {
t.Fatalf("send reliable: %v", err)
}
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
t.Fatalf("result = %+v", result)
}
}
func TestFabricChannelRuntimeQuarantinesFailedRouteAcrossChannels(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://fast.example.test:19443": 0,
})
transport.failConnect["quic://dead.example.test:19443"] = true
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
RouteHealthTTL: time.Minute,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
},
}
first, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("first")})
if err != nil {
t.Fatalf("first send reliable: %v", err)
}
if first.Channel.RouteID != "route-fast" || first.RouteHealth.Quarantined["route-dead"].Failures != 1 {
t.Fatalf("first result = %+v", first)
}
second, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("second")})
if err != nil {
t.Fatalf("second send reliable: %v", err)
}
if second.Channel.RouteID != "route-fast" {
t.Fatalf("second route = %s, want route-fast", second.Channel.RouteID)
}
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
t.Fatalf("dead connect count = %d, want one attempt before quarantine", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
t.Fatalf("fast connect count = %d, want both channels on healthy route", got)
}
}
func TestFabricChannelRuntimeReroutesOnAckTimeout(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://slow.example.test:19443": 100 * time.Millisecond,
"quic://fast.example.test:19443": 0,
})
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
Timeout: 10 * time.Millisecond,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
},
}
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
if err != nil {
t.Fatalf("send reliable: %v", err)
}
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
t.Fatalf("result = %+v", result)
}
}
func TestFabricChannelRuntimeSpreadsConcurrentChannelsBySharedPressure(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://route-a.example.test:19443": 80 * time.Millisecond,
"quic://route-b.example.test:19443": 0,
})
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{StreamID: 9})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testRuntimeRoute("route-a", "node-b", "quic://route-a.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-b", "node-b", "quic://route-b.example.test:19443", 11),
},
}
firstDone := make(chan error, 1)
go func() {
_, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("one")})
firstDone <- err
}()
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("two")})
if err != nil {
t.Fatalf("second send reliable: %v", err)
}
if result.Channel.RouteID != "route-b" {
t.Fatalf("second route = %s, want route-b", result.Channel.RouteID)
}
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
t.Fatalf("route-b connect count = %d, want 1", got)
}
if err := <-firstDone; err != nil {
t.Fatalf("first send reliable: %v", err)
}
}
func TestFabricChannelRuntimeRequestResponseReturnsPayload(t *testing.T) {
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
"quic://runtime.example.test:19443": []byte(`{"status":"ok"}`),
})
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetPool,
TargetID: "pool-admin-runtime",
Primary: testRuntimePoolRoute("route-runtime", "pool-admin-runtime", "node-runtime", "quic://runtime.example.test:19443", 10),
}
result, err := runtime.SendRequestResponse(context.Background(), FabricChannelSpec{
ChannelID: "channel-web-1",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetPool,
TargetID: "pool-admin-runtime",
TrafficClass: "control",
CreatedAt: time.Now().UTC(),
}, routeSet, []byte(`{"request":true}`))
if err != nil {
t.Fatalf("request response: %v", err)
}
if string(result.ResponsePayload) != `{"status":"ok"}` {
t.Fatalf("response payload = %s", string(result.ResponsePayload))
}
if result.Channel.RouteID != "route-runtime" ||
result.BytesSent != uint64(len(`{"request":true}`)) ||
result.BytesRecv != uint64(len(`{"status":"ok"}`)) ||
result.FramesSent != 1 ||
result.FramesRecv != 1 ||
result.AcksReceived != 1 {
t.Fatalf("result = %+v", result)
}
}
func TestFabricChannelRuntimeRequestResponseReroutesOnResponseFailure(t *testing.T) {
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
"quic://fast.example.test:19443": []byte(`{"status":"ok"}`),
})
transport.failResponse["quic://slow.example.test:19443"] = true
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
StreamID: 9,
Timeout: 10 * time.Millisecond,
})
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-runtime",
Primary: testRuntimeRoute("route-slow", "node-runtime", "quic://slow.example.test:19443", 10),
WarmStandby: []FabricRoute{
testRuntimeRoute("route-fast", "node-runtime", "quic://fast.example.test:19443", 20),
},
}
result, err := runtime.SendRequestResponse(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-runtime"), routeSet, []byte(`{"request":true}`))
if err != nil {
t.Fatalf("request response: %v", err)
}
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || string(result.ResponsePayload) != `{"status":"ok"}` {
t.Fatalf("result = %+v", result)
}
}
func TestFabricTransportTargetForRouteUsesLastAddressedHop(t *testing.T) {
target, err := FabricTransportTargetForRoute(FabricRoute{
RouteID: "route-1",
Hops: []FabricRouteHop{
{NodeID: "node-a"},
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443"},
{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-quic", Address: "quic://node-b.example.test:19443"},
},
})
if err != nil {
t.Fatalf("target for route: %v", err)
}
if target.PeerID != "node-b" || target.EndpointID != "node-b-quic" || target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != string(FabricRouteDirect) {
t.Fatalf("target = %+v", target)
}
}
type fakeFabricRequestResponseTransport struct {
mu sync.Mutex
responses map[string][]byte
failResponse map[string]bool
connects map[string]int
}
func newFakeFabricRequestResponseTransport(responses map[string][]byte) *fakeFabricRequestResponseTransport {
return &fakeFabricRequestResponseTransport{
responses: responses,
failResponse: map[string]bool{},
connects: map[string]int{},
}
}
func (t *fakeFabricRequestResponseTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
endpoint := target.Endpoint
t.mu.Lock()
t.connects[endpoint]++
response := append([]byte(nil), t.responses[endpoint]...)
failResponse := t.failResponse[endpoint]
t.mu.Unlock()
return &fakeFabricRequestResponseSession{
response: response,
failResponse: failResponse,
frames: make(chan fabricproto.Frame, 16),
errors: make(chan error, 1),
done: make(chan struct{}),
}, nil
}
func (t *fakeFabricRequestResponseTransport) Close() error {
return nil
}
type fakeFabricRequestResponseSession struct {
response []byte
failResponse bool
frames chan fabricproto.Frame
errors chan error
done chan struct{}
once sync.Once
}
func (s *fakeFabricRequestResponseSession) Send(_ context.Context, frame fabricproto.Frame) error {
if frame.Type != fabricproto.FrameData || s.failResponse {
return nil
}
response := append([]byte(nil), s.response...)
go func() {
select {
case <-s.done:
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence, Payload: response}:
}
}()
return nil
}
func (s *fakeFabricRequestResponseSession) Frames() <-chan fabricproto.Frame {
return s.frames
}
func (s *fakeFabricRequestResponseSession) Errors() <-chan error {
return s.errors
}
func (s *fakeFabricRequestResponseSession) Close() error {
s.once.Do(func() {
close(s.done)
})
return nil
}
func (s *fakeFabricRequestResponseSession) Closed() bool {
select {
case <-s.done:
return true
default:
return false
}
}
func TestFabricTransportTargetForRouteUsesRelayHopForRelayRoute(t *testing.T) {
target, err := FabricTransportTargetForRoute(FabricRoute{
RouteID: "route-relay",
RelayCount: 1,
Hops: []FabricRouteHop{
{NodeID: "node-a"},
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443", PeerCertSHA256: "relay-cert"},
{NodeID: "node-b", Mode: FabricRouteRelay, EndpointID: "node-b-private", Address: "quic://10.0.0.2:19443", PeerCertSHA256: "node-b-cert"},
},
})
if err != nil {
t.Fatalf("target for relay route: %v", err)
}
if target.PeerID != "node-r" || target.EndpointID != "relay-1" || target.Endpoint != "quic://relay.example.test:19443" || target.PeerCertSHA256 != "relay-cert" {
t.Fatalf("target = %+v", target)
}
}
type fakeFabricRuntimeTransport struct {
mu sync.Mutex
delays map[string]time.Duration
failConnect map[string]bool
connects map[string]int
}
func newFakeFabricRuntimeTransport(delays map[string]time.Duration) *fakeFabricRuntimeTransport {
return &fakeFabricRuntimeTransport{
delays: delays,
failConnect: map[string]bool{},
connects: map[string]int{},
}
}
func (t *fakeFabricRuntimeTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
endpoint := target.Endpoint
t.mu.Lock()
t.connects[endpoint]++
fail := t.failConnect[endpoint]
delay := t.delays[endpoint]
t.mu.Unlock()
if fail {
return nil, ErrForwardPeerUnavailable
}
return &fakeFabricRuntimeSession{
endpoint: endpoint,
delay: delay,
frames: make(chan fabricproto.Frame, 64),
errors: make(chan error, 1),
done: make(chan struct{}),
}, nil
}
func (t *fakeFabricRuntimeTransport) Close() error {
return nil
}
func (t *fakeFabricRuntimeTransport) connectCount(endpoint string) int {
t.mu.Lock()
defer t.mu.Unlock()
return t.connects[endpoint]
}
func (t *fakeFabricRuntimeTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
tb.Helper()
deadline := time.Now().Add(time.Second)
for {
t.mu.Lock()
got := t.connects[endpoint]
t.mu.Unlock()
if got >= count {
return
}
if time.Now().After(deadline) {
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
}
time.Sleep(time.Millisecond)
}
}
type fakeFabricRuntimeSession struct {
endpoint string
delay time.Duration
frames chan fabricproto.Frame
errors chan error
done chan struct{}
once sync.Once
}
func (s *fakeFabricRuntimeSession) Send(_ context.Context, frame fabricproto.Frame) error {
if frame.Type != fabricproto.FrameData {
return nil
}
delay := s.delay
go func() {
if delay > 0 {
time.Sleep(delay)
}
select {
case <-s.done:
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameAck, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence}:
}
}()
return nil
}
func (s *fakeFabricRuntimeSession) Frames() <-chan fabricproto.Frame {
return s.frames
}
func (s *fakeFabricRuntimeSession) Errors() <-chan error {
return s.errors
}
func (s *fakeFabricRuntimeSession) Close() error {
s.once.Do(func() {
close(s.done)
})
return nil
}
func (s *fakeFabricRuntimeSession) Closed() bool {
select {
case <-s.done:
return true
default:
return false
}
}
func testRuntimeRoute(routeID string, destination string, endpoint string, latency int) FabricRoute {
route := testFabricRoute(routeID, destination, latency, 100, 0, true)
route.Hops[len(route.Hops)-1].Address = endpoint
route.Hops[len(route.Hops)-1].EndpointID = strings.TrimPrefix(routeID, "route-")
route.Hops[len(route.Hops)-1].Mode = FabricRouteDirect
return route
}
func testRuntimePoolRoute(routeID string, poolID string, destination string, endpoint string, latency int) FabricRoute {
route := testRuntimeRoute(routeID, destination, endpoint, latency)
route.PoolID = poolID
return route
}
@@ -0,0 +1,390 @@
package mesh
import (
"errors"
"sort"
"strings"
"time"
)
type FabricChannelTargetKind string
const (
FabricChannelTargetNode FabricChannelTargetKind = "node"
FabricChannelTargetPool FabricChannelTargetKind = "pool"
)
type FabricChannelLifecycleState string
const (
FabricChannelOpening FabricChannelLifecycleState = "opening"
FabricChannelOpen FabricChannelLifecycleState = "open"
FabricChannelDraining FabricChannelLifecycleState = "draining"
FabricChannelClosed FabricChannelLifecycleState = "closed"
)
type FabricRouteMode string
const (
FabricRouteDirect FabricRouteMode = "direct_quic"
FabricRouteLAN FabricRouteMode = "lan_quic"
FabricRouteReverse FabricRouteMode = "reverse_quic"
FabricRouteRelay FabricRouteMode = "relay_quic"
FabricRouteICE FabricRouteMode = "ice_quic"
)
var (
ErrFabricChannelInvalid = errors.New("fabric channel request is invalid")
ErrFabricRouteNotFound = errors.New("fabric route not found")
)
type FabricChannelSpec struct {
ChannelID string
ClusterID string
SourceNodeID string
TargetKind FabricChannelTargetKind
TargetID string
TrafficClass string
MinBandwidth int64
StickyKey string
CreatedAt time.Time
ForbiddenHops []string
}
type FabricServiceChannelTarget struct {
Kind FabricChannelTargetKind
PoolIDs []string
NodeIDs []string
SelectedNodeID string
ServiceRole string
SelectionPolicy string
SingleMemberPool bool
}
type FabricServiceChannelRequest struct {
SchemaVersion string
ChannelID string
ClusterID string
OrganizationID string
UserID string
ResourceID string
SourceNodeID string
SourceRole string
ServiceClass string
Target FabricServiceChannelTarget
TrafficClass string
CreatedAt time.Time
}
type FabricChannel struct {
Spec FabricChannelSpec
State FabricChannelLifecycleState
RouteID string
TargetNode string
OpenedAt time.Time
LastReroute time.Time
BytesSent uint64
BytesRecv uint64
FramesSent uint64
FramesRecv uint64
RerouteCount uint64
}
type FabricRouteHop struct {
NodeID string
Mode FabricRouteMode
EndpointID string
Address string
PeerCertSHA256 string
}
type FabricRoute struct {
RouteID string
ClusterID string
SourceNodeID string
DestinationNodeID string
PoolID string
Hops []FabricRouteHop
BaseLatencyMs int
JitterMs int
LossPermille int
Capacity int
ActiveChannels int
RelayCount int
LastUpdatedAt time.Time
Healthy bool
Degraded bool
}
type FabricRouteSet struct {
TargetKind FabricChannelTargetKind
TargetID string
Primary FabricRoute
WarmStandby []FabricRoute
ColdFallbacks []FabricRoute
}
type FabricAdjacency struct {
FromNodeID string
ToNodeID string
Mode FabricRouteMode
RTTMs int
JitterMs int
LossPermille int
Capacity int
ActiveChannels int
ThroughputBps int64
PressurePercent int
Healthy bool
PassiveOutbound bool
LocalSegmentID string
NATGroupID string
LastObservedAt time.Time
LastFailureReason string
}
type FabricRouteChoice struct {
Route FabricRoute
Score int
Reason string
PressureBefore int
PressureAfter int
}
type FabricRouteSchedulerConfig struct {
LatencyWeight int
JitterWeight int
LossWeight int
PressureWeight int
HopPenalty int
RelayPenalty int
DegradedPenalty int
ProjectedChannelCost int
HardMaxRoutePressure int
}
type FabricRouteScheduler struct {
Config FabricRouteSchedulerConfig
}
func NewFabricRouteScheduler(cfg FabricRouteSchedulerConfig) FabricRouteScheduler {
return FabricRouteScheduler{Config: normalizeFabricRouteSchedulerConfig(cfg)}
}
func (s FabricRouteScheduler) ChooseRoute(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricRouteChoice, error) {
if err := ValidateFabricChannelSpec(spec); err != nil {
return FabricRouteChoice{}, err
}
routes := flattenFabricRouteSet(routeSet)
if len(routes) == 0 {
return FabricRouteChoice{}, ErrFabricRouteNotFound
}
forbidden := stringSet(spec.ForbiddenHops)
choices := make([]FabricRouteChoice, 0, len(routes))
for _, route := range routes {
if !fabricRouteUsable(spec, route, forbidden, now) {
continue
}
choice := s.scoreRoute(route)
if s.Config.HardMaxRoutePressure > 0 && choice.PressureAfter > s.Config.HardMaxRoutePressure {
continue
}
choice.Route = route
choices = append(choices, choice)
}
if len(choices) == 0 {
return FabricRouteChoice{}, ErrFabricRouteNotFound
}
sort.SliceStable(choices, func(i, j int) bool {
if choices[i].Score != choices[j].Score {
return choices[i].Score < choices[j].Score
}
if choices[i].PressureAfter != choices[j].PressureAfter {
return choices[i].PressureAfter < choices[j].PressureAfter
}
if choices[i].Route.BaseLatencyMs != choices[j].Route.BaseLatencyMs {
return choices[i].Route.BaseLatencyMs < choices[j].Route.BaseLatencyMs
}
return choices[i].Route.RouteID < choices[j].Route.RouteID
})
return choices[0], nil
}
func ValidateFabricChannelSpec(spec FabricChannelSpec) error {
if strings.TrimSpace(spec.ChannelID) == "" || strings.TrimSpace(spec.ClusterID) == "" || strings.TrimSpace(spec.SourceNodeID) == "" || strings.TrimSpace(spec.TargetID) == "" {
return ErrFabricChannelInvalid
}
switch spec.TargetKind {
case FabricChannelTargetNode, FabricChannelTargetPool:
return nil
default:
return ErrFabricChannelInvalid
}
}
func FabricChannelSpecFromServiceRequest(req FabricServiceChannelRequest, localNodeID string, now time.Time) (FabricChannelSpec, error) {
if now.IsZero() {
now = time.Now().UTC()
}
sourceNodeID := firstNonEmpty(strings.TrimSpace(req.SourceNodeID), strings.TrimSpace(localNodeID))
targetKind := req.Target.Kind
if targetKind == "" {
targetKind = FabricChannelTargetPool
}
targetID := firstNonEmpty(firstString(req.Target.PoolIDs), strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs))
if targetKind == FabricChannelTargetNode {
targetID = firstNonEmpty(strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs), targetID)
}
spec := FabricChannelSpec{
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), strings.TrimSpace(req.ResourceID)),
ClusterID: strings.TrimSpace(req.ClusterID),
SourceNodeID: sourceNodeID,
TargetKind: targetKind,
TargetID: targetID,
TrafficClass: firstNonEmpty(strings.TrimSpace(req.TrafficClass), serviceClassDefaultTrafficClass(req.ServiceClass)),
StickyKey: strings.TrimSpace(req.ResourceID),
CreatedAt: now,
}
if err := ValidateFabricChannelSpec(spec); err != nil {
return FabricChannelSpec{}, err
}
return spec, nil
}
func serviceClassDefaultTrafficClass(serviceClass string) string {
switch strings.TrimSpace(strings.ToLower(serviceClass)) {
case FabricServiceClassVPNPackets:
return FabricServiceChannelBulk
case FabricServiceClassRemoteWorkspace:
return FabricServiceChannelInteractive
default:
return FabricServiceChannelReliable
}
}
func firstString(values []string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func (s FabricRouteScheduler) scoreRoute(route FabricRoute) FabricRouteChoice {
cfg := normalizeFabricRouteSchedulerConfig(s.Config)
pressureBefore := fabricRoutePressurePercent(route, 0)
pressureAfter := fabricRoutePressurePercent(route, cfg.ProjectedChannelCost)
score := route.BaseLatencyMs*cfg.LatencyWeight +
route.JitterMs*cfg.JitterWeight +
route.LossPermille*cfg.LossWeight +
pressureAfter*cfg.PressureWeight +
len(route.Hops)*cfg.HopPenalty +
route.RelayCount*cfg.RelayPenalty
if route.Degraded {
score += cfg.DegradedPenalty
}
reason := "latency_load_score"
if pressureAfter >= 90 {
reason = "capacity_pressure_avoidance"
}
if route.RelayCount > 0 {
reason = "relay_fallback_available"
}
return FabricRouteChoice{Score: score, Reason: reason, PressureBefore: pressureBefore, PressureAfter: pressureAfter}
}
func normalizeFabricRouteSchedulerConfig(cfg FabricRouteSchedulerConfig) FabricRouteSchedulerConfig {
if cfg.LatencyWeight <= 0 {
cfg.LatencyWeight = 10
}
if cfg.JitterWeight <= 0 {
cfg.JitterWeight = 4
}
if cfg.LossWeight <= 0 {
cfg.LossWeight = 8
}
if cfg.PressureWeight <= 0 {
cfg.PressureWeight = 12
}
if cfg.HopPenalty <= 0 {
cfg.HopPenalty = 5
}
if cfg.RelayPenalty <= 0 {
cfg.RelayPenalty = 25
}
if cfg.DegradedPenalty <= 0 {
cfg.DegradedPenalty = 500
}
if cfg.ProjectedChannelCost <= 0 {
cfg.ProjectedChannelCost = 1
}
if cfg.HardMaxRoutePressure < 0 {
cfg.HardMaxRoutePressure = 0
}
return cfg
}
func flattenFabricRouteSet(routeSet FabricRouteSet) []FabricRoute {
routes := make([]FabricRoute, 0, 1+len(routeSet.WarmStandby)+len(routeSet.ColdFallbacks))
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
routes = append(routes, routeSet.Primary)
}
routes = append(routes, routeSet.WarmStandby...)
routes = append(routes, routeSet.ColdFallbacks...)
return routes
}
func fabricRouteUsable(spec FabricChannelSpec, route FabricRoute, forbidden map[string]struct{}, now time.Time) bool {
if strings.TrimSpace(route.RouteID) == "" || !route.Healthy {
return false
}
if route.ClusterID != "" && spec.ClusterID != "" && route.ClusterID != spec.ClusterID {
return false
}
if route.SourceNodeID != "" && route.SourceNodeID != spec.SourceNodeID {
return false
}
switch spec.TargetKind {
case FabricChannelTargetNode:
if route.DestinationNodeID != "" && route.DestinationNodeID != spec.TargetID {
return false
}
case FabricChannelTargetPool:
if route.PoolID != "" && route.PoolID != spec.TargetID {
return false
}
}
for _, hop := range route.Hops {
if _, blocked := forbidden[hop.NodeID]; blocked {
return false
}
}
return true
}
func fabricRoutePressurePercent(route FabricRoute, projected int) int {
if route.Capacity <= 0 {
return 100
}
active := route.ActiveChannels + projected
if active <= 0 {
return 0
}
pressure := (active * 100) / route.Capacity
if pressure > 100 {
return 100
}
return pressure
}
func stringSet(values []string) map[string]struct{} {
out := make(map[string]struct{}, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
out[value] = struct{}{}
}
}
return out
}
@@ -0,0 +1,244 @@
package mesh
import (
"errors"
"testing"
"time"
)
func TestFabricRouteSchedulerAvoidsSaturatedShortestRoute(t *testing.T) {
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
spec := FabricChannelSpec{
ChannelID: "channel-1",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
}
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: FabricRoute{
RouteID: "short-saturated",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-b"}},
BaseLatencyMs: 10,
Capacity: 10,
ActiveChannels: 10,
Healthy: true,
},
WarmStandby: []FabricRoute{{
RouteID: "slightly-longer-free",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
BaseLatencyMs: 18,
Capacity: 100,
ActiveChannels: 5,
RelayCount: 1,
Healthy: true,
}},
}, time.Now())
if err != nil {
t.Fatalf("choose route: %v", err)
}
if choice.Route.RouteID != "slightly-longer-free" {
t.Fatalf("route = %q, want slightly-longer-free score=%d pressure=%d", choice.Route.RouteID, choice.Score, choice.PressureAfter)
}
}
func TestFabricChannelSpecFromServiceRequestTargetsPool(t *testing.T) {
spec, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
ChannelID: "vpn-1",
ClusterID: "cluster-1",
ResourceID: "vpn-1",
ServiceClass: FabricServiceClassVPNPackets,
Target: FabricServiceChannelTarget{
Kind: FabricChannelTargetPool,
PoolIDs: []string{"home-ipv4"},
ServiceRole: "ipv4-egress",
},
}, "android-node", time.Now())
if err != nil {
t.Fatalf("service request spec: %v", err)
}
if spec.SourceNodeID != "android-node" || spec.TargetKind != FabricChannelTargetPool || spec.TargetID != "home-ipv4" || spec.TrafficClass != FabricServiceChannelBulk {
t.Fatalf("unexpected spec: %+v", spec)
}
}
func TestFabricChannelSpecFromServiceRequestKeepsServiceOutOfEndpointSelection(t *testing.T) {
_, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
ChannelID: "rdp-1",
ClusterID: "cluster-1",
ServiceClass: FabricServiceClassRemoteWorkspace,
Target: FabricServiceChannelTarget{
Kind: FabricChannelTargetPool,
ServiceRole: "rdp-gateway",
},
}, "client-node", time.Now())
if !errors.Is(err, ErrFabricChannelInvalid) {
t.Fatalf("err = %v, want invalid without pool/node target id", err)
}
}
func TestFabricRouteSchedulerPoolSkipsFailedEndpoint(t *testing.T) {
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
spec := FabricChannelSpec{
ChannelID: "channel-pool",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetPool,
TargetID: "pool-egress",
}
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
TargetKind: FabricChannelTargetPool,
TargetID: "pool-egress",
Primary: FabricRoute{
RouteID: "pool-node-dead",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
PoolID: "pool-egress",
Capacity: 100,
Healthy: false,
},
WarmStandby: []FabricRoute{{
RouteID: "pool-node-live",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-c",
PoolID: "pool-egress",
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-c"}},
BaseLatencyMs: 25,
Capacity: 100,
Healthy: true,
}},
}, time.Now())
if err != nil {
t.Fatalf("choose route: %v", err)
}
if choice.Route.DestinationNodeID != "node-c" {
t.Fatalf("destination = %q, want node-c", choice.Route.DestinationNodeID)
}
}
func TestFabricRouteSchedulerHonorsForbiddenHops(t *testing.T) {
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
spec := FabricChannelSpec{
ChannelID: "channel-1",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
ForbiddenHops: []string{"node-r"},
}
_, err := scheduler.ChooseRoute(spec, FabricRouteSet{
Primary: FabricRoute{
RouteID: "blocked",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
Capacity: 100,
Healthy: true,
},
}, time.Now())
if !errors.Is(err, ErrFabricRouteNotFound) {
t.Fatalf("err = %v, want ErrFabricRouteNotFound", err)
}
}
func TestFabricRouteSchedulerRejectsRoutesAboveHardPressureLimit(t *testing.T) {
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 80})
spec := FabricChannelSpec{
ChannelID: "channel-pressure",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
}
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
Primary: FabricRoute{
RouteID: "too-busy",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Capacity: 10,
ActiveChannels: 9,
Healthy: true,
},
WarmStandby: []FabricRoute{{
RouteID: "admissible",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Capacity: 10,
ActiveChannels: 5,
Healthy: true,
}},
}, time.Now())
if err != nil {
t.Fatalf("choose route: %v", err)
}
if choice.Route.RouteID != "admissible" {
t.Fatalf("route = %q, want admissible", choice.Route.RouteID)
}
}
func TestFabricRouteSchedulerKeepsHighLatencyRouteAsFallbackUntilFastRouteSaturates(t *testing.T) {
spec := FabricChannelSpec{
ChannelID: "channel-latency-aware",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
TargetKind: FabricChannelTargetPool,
TargetID: "pool-egress",
}
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetPool,
TargetID: "pool-egress",
Primary: FabricRoute{
RouteID: "lan-fast",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-lan",
PoolID: "pool-egress",
BaseLatencyMs: 4,
Capacity: 100,
ActiveChannels: 85,
Healthy: true,
},
WarmStandby: []FabricRoute{{
RouteID: "wan-slow",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-wan",
PoolID: "pool-egress",
BaseLatencyMs: 420,
Capacity: 100,
ActiveChannels: 0,
Healthy: true,
}},
}
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 90})
choice, err := scheduler.ChooseRoute(spec, routeSet, time.Now())
if err != nil {
t.Fatalf("choose route: %v", err)
}
if choice.Route.RouteID != "lan-fast" {
t.Fatalf("route = %q, want fast LAN before hard pressure limit", choice.Route.RouteID)
}
routeSet.Primary.ActiveChannels = 90
choice, err = scheduler.ChooseRoute(spec, routeSet, time.Now())
if err != nil {
t.Fatalf("choose fallback route: %v", err)
}
if choice.Route.RouteID != "wan-slow" {
t.Fatalf("route = %q, want WAN only after LAN reaches hard pressure limit", choice.Route.RouteID)
}
}
@@ -0,0 +1,130 @@
package mesh
import (
"context"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
type FabricOverlayTransportConfig struct {
ClusterID string
LocalNodeID string
RouterConfig FabricChannelRouterConfig
Timeout time.Duration
}
type FabricOverlayTransport struct {
Runtime *FabricChannelRuntime
RouteSets map[string]FabricRouteSet
Config FabricOverlayTransportConfig
sequence atomic.Uint64
}
type FabricOverlayTransportSnapshot struct {
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
}
type FabricOverlaySendRequest struct {
ChannelID string
TargetKind FabricChannelTargetKind
TargetID string
TrafficClass fabricproto.TrafficClass
Payloads [][]byte
StickyKey string
}
func NewFabricOverlayTransport(transport FabricTransport, routeSets map[string]FabricRouteSet, cfg FabricOverlayTransportConfig) *FabricOverlayTransport {
if cfg.Timeout <= 0 {
cfg.Timeout = 30 * time.Second
}
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
RouterConfig: cfg.RouterConfig,
Timeout: cfg.Timeout,
})
normalized := make(map[string]FabricRouteSet, len(routeSets))
for targetID, routeSet := range routeSets {
targetID = strings.TrimSpace(targetID)
if targetID != "" {
normalized[targetID] = routeSet
}
}
return &FabricOverlayTransport{
Runtime: runtime,
RouteSets: normalized,
Config: cfg,
}
}
func (t *FabricOverlayTransport) Send(ctx context.Context, req FabricOverlaySendRequest) (FabricChannelRuntimeResult, error) {
if t == nil || t.Runtime == nil {
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
}
targetID := strings.TrimSpace(req.TargetID)
if targetID == "" {
return FabricChannelRuntimeResult{}, ErrFabricChannelInvalid
}
routeSet, ok := t.RouteSets[targetID]
if !ok {
return FabricChannelRuntimeResult{}, ErrFabricRouteNotFound
}
targetKind := req.TargetKind
if targetKind == "" {
targetKind = routeSet.TargetKind
}
if targetKind == "" {
targetKind = FabricChannelTargetNode
}
trafficClass := req.TrafficClass
if trafficClass == 0 {
trafficClass = fabricproto.TrafficClassReliable
}
t.Runtime.Config.TrafficClass = trafficClass
spec := FabricChannelSpec{
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), fmt.Sprintf("fabric-overlay-%d", t.sequence.Add(1))),
ClusterID: strings.TrimSpace(t.Config.ClusterID),
SourceNodeID: strings.TrimSpace(t.Config.LocalNodeID),
TargetKind: targetKind,
TargetID: targetID,
TrafficClass: loadFabricTrafficClassName(trafficClass),
StickyKey: strings.TrimSpace(req.StickyKey),
CreatedAt: time.Now().UTC(),
}
return t.Runtime.SendReliable(ctx, spec, routeSet, req.Payloads)
}
func (t *FabricOverlayTransport) SnapshotPressure() FabricRoutePressureSnapshot {
if t == nil || t.Runtime == nil || t.Runtime.Pressure == nil {
return FabricRoutePressureSnapshot{}
}
return t.Runtime.Pressure.SnapshotPressure()
}
func (t *FabricOverlayTransport) Snapshot() FabricOverlayTransportSnapshot {
if t == nil || t.Runtime == nil {
return FabricOverlayTransportSnapshot{}
}
return FabricOverlayTransportSnapshot{
RoutePressure: t.Runtime.snapshotRoutePressure(),
RouteHealth: t.Runtime.snapshotRouteHealth(),
}
}
func loadFabricTrafficClassName(trafficClass fabricproto.TrafficClass) string {
switch trafficClass {
case fabricproto.TrafficClassControl:
return "control"
case fabricproto.TrafficClassInteractive:
return "interactive"
case fabricproto.TrafficClassBulk:
return "bulk"
case fabricproto.TrafficClassReliable:
return "reliable"
default:
return fmt.Sprintf("traffic_class_%d", trafficClass)
}
}
@@ -0,0 +1,49 @@
package mesh
import (
"context"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
func TestFabricOverlayTransportSendsThroughRouteSet(t *testing.T) {
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
"quic://node-b:19443": 0,
})
overlay := NewFabricOverlayTransport(transport, map[string]FabricRouteSet{
"node-b": {
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: FabricRoute{
RouteID: "node-b-direct",
ClusterID: "cluster-1",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
Hops: []FabricRouteHop{{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-direct", Address: "quic://node-b:19443"}},
Capacity: 100,
Healthy: true,
},
},
}, FabricOverlayTransportConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, err := overlay.Send(ctx, FabricOverlaySendRequest{
TargetID: "node-b",
TrafficClass: fabricproto.TrafficClassReliable,
Payloads: [][]byte{[]byte("payload")},
})
if err != nil {
t.Fatalf("send: %v", err)
}
if result.BytesSent != uint64(len("payload")) || result.AcksReceived != 1 {
t.Fatalf("result = %+v", result)
}
if pressure := overlay.SnapshotPressure(); pressure.ActiveTotal != 0 || pressure.AcquiredTotal != pressure.ReleasedTotal {
t.Fatalf("pressure leak: %+v", pressure)
}
if snapshot := overlay.Snapshot(); snapshot.RoutePressure.AcquiredTotal != 1 || len(snapshot.RouteHealth.Quarantined) != 0 {
t.Fatalf("snapshot = %+v", snapshot)
}
}
@@ -3,28 +3,50 @@ package mesh
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
"github.com/quic-go/quic-go"
)
type QUICFabricServer struct {
listener *quic.Listener
logger FabricSessionEventLogger
done chan struct{}
closeOnce sync.Once
listener *quic.Listener
logger FabricSessionEventLogger
reverseMu sync.RWMutex
reverseTransport *QUICFabricTransport
fabricFrameHandler FabricFrameHandler
productionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
webIngressForwardHandler func(context.Context, []byte) ([]byte, error)
fabricControlHandler func(context.Context, []byte) ([]byte, error)
syntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
done chan struct{}
closeOnce sync.Once
}
type QUICFabricServerConfig struct {
ListenAddr string
TLSConfig *tls.Config
QUICConfig *quic.Config
Logger FabricSessionEventLogger
ListenAddr string
TLSConfig *tls.Config
QUICConfig *quic.Config
Logger FabricSessionEventLogger
ReverseTransport *QUICFabricTransport
FabricFrameHandler FabricFrameHandler
ProductionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
WebIngressForwardHandler func(context.Context, []byte) ([]byte, error)
FabricControlHandler func(context.Context, []byte) ([]byte, error)
SyntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
}
type FabricFrameSender interface {
SendFrame(context.Context, fabricproto.Frame) error
}
type FabricFrameHandler func(context.Context, FabricFrameSender, fabricproto.Frame) (bool, error)
func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QUICFabricServer, error) {
if cfg.ListenAddr == "" {
return nil, fmt.Errorf("quic fabric listen addr is required")
@@ -42,9 +64,15 @@ func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QU
return nil, err
}
server := &QUICFabricServer{
listener: listener,
logger: cfg.Logger,
done: make(chan struct{}),
listener: listener,
logger: cfg.Logger,
reverseTransport: cfg.ReverseTransport,
fabricFrameHandler: cfg.FabricFrameHandler,
productionForwardHandler: cfg.ProductionForwardHandler,
webIngressForwardHandler: cfg.WebIngressForwardHandler,
fabricControlHandler: cfg.FabricControlHandler,
syntheticForwardHandler: cfg.SyntheticForwardHandler,
done: make(chan struct{}),
}
go server.acceptLoop(ctx)
return server, nil
@@ -57,6 +85,15 @@ func (s *QUICFabricServer) Addr() net.Addr {
return s.listener.Addr()
}
func (s *QUICFabricServer) SetReverseTransport(transport *QUICFabricTransport) {
if s == nil {
return
}
s.reverseMu.Lock()
s.reverseTransport = transport
s.reverseMu.Unlock()
}
func (s *QUICFabricServer) Close() error {
if s == nil {
return nil
@@ -95,6 +132,8 @@ func (s *QUICFabricServer) handleConn(ctx context.Context, conn *quic.Conn) {
func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
session := fabricproto.NewSession(fabricproto.SessionConfig{})
sender := quicStreamFrameSender{stream: stream}
defer func() { _ = stream.Close() }()
s.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_quic_stream_opened",
AcceptedBy: "quic",
@@ -116,6 +155,29 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
if err != nil {
return
}
s.registerReverseHelloFrame(conn, frame)
if s.handleProductionForwardFrame(ctx, stream, frame) {
continue
}
if s.handleWebIngressForwardFrame(ctx, stream, frame) {
continue
}
if s.handleFabricControlForwardFrame(ctx, stream, frame) {
continue
}
if s.handleSyntheticForwardFrame(ctx, conn, stream, frame) {
continue
}
if s.fabricFrameHandler != nil {
handled, err := s.fabricFrameHandler(ctx, sender, frame)
if err != nil {
_ = conn.CloseWithError(2, err.Error())
return
}
if handled {
continue
}
}
event, responses, err := session.HandleFrame(frame)
if err != nil {
_ = conn.CloseWithError(2, err.Error())
@@ -140,6 +202,196 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
}
}
type quicStreamFrameSender struct {
stream *quic.Stream
mu sync.Mutex
}
func (s quicStreamFrameSender) SendFrame(ctx context.Context, frame fabricproto.Frame) error {
if s.stream == nil {
return fmt.Errorf("quic fabric stream is closed")
}
s.mu.Lock()
defer s.mu.Unlock()
if deadline, ok := ctx.Deadline(); ok {
_ = s.stream.SetWriteDeadline(deadline)
} else {
_ = s.stream.SetWriteDeadline(time.Now().Add(30 * time.Second))
}
return fabricproto.WriteFrame(s.stream, frame)
}
func (s *QUICFabricServer) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
reverseTransport := s.getReverseTransport()
if s == nil || reverseTransport == nil || conn == nil || frame.Type != fabricproto.FramePing {
return
}
payload := string(frame.Payload)
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
return
}
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
reverseTransport.RegisterReverseConn(peerID, conn)
s.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_quic_reverse_registered",
AcceptedBy: "quic_reverse_hello",
RemoteAddr: conn.RemoteAddr().String(),
PeerID: peerID,
})
}
type quicProductionForwardResponse struct {
Result ProductionForwardResult `json:"result,omitempty"`
Error string `json:"error,omitempty"`
}
type quicSyntheticForwardResponse struct {
Envelope SyntheticEnvelope `json:"envelope,omitempty"`
Error string `json:"error,omitempty"`
}
type quicWebIngressForwardResponse struct {
Payload json.RawMessage `json:"payload,omitempty"`
Error string `json:"error,omitempty"`
}
type quicFabricControlForwardResponse struct {
Payload json.RawMessage `json:"payload,omitempty"`
Error string `json:"error,omitempty"`
}
func (s *QUICFabricServer) handleProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
return false
}
response := quicProductionForwardResponse{}
if s == nil || s.productionForwardHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else {
var envelope ProductionEnvelope
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
response.Error = "invalid production mesh envelope"
} else if result, err := s.productionForwardHandler(ctx, envelope); err != nil {
response.Error = err.Error()
} else {
response.Result = result
}
}
payload, err := json.Marshal(response)
if err != nil {
return true
}
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: ProductionForwardQUICStreamID,
Sequence: frame.Sequence,
Payload: payload,
})
return true
}
func (s *QUICFabricServer) handleWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
return false
}
response := quicWebIngressForwardResponse{}
if s == nil || s.webIngressForwardHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else if payload, err := s.webIngressForwardHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
response.Error = err.Error()
} else {
response.Payload = append(json.RawMessage(nil), payload...)
}
payload, err := json.Marshal(response)
if err != nil {
return true
}
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: WebIngressForwardQUICStreamID,
Sequence: frame.Sequence,
Payload: payload,
})
return true
}
func (s *QUICFabricServer) handleFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
return false
}
response := quicFabricControlForwardResponse{}
if s == nil || s.fabricControlHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else if payload, err := s.fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
response.Error = err.Error()
} else {
response.Payload = append(json.RawMessage(nil), payload...)
}
payload, err := json.Marshal(response)
if err != nil {
return true
}
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: FabricControlForwardQUICStreamID,
Sequence: frame.Sequence,
Payload: payload,
})
return true
}
func (s *QUICFabricServer) handleSyntheticForwardFrame(ctx context.Context, conn *quic.Conn, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
return false
}
response := quicSyntheticForwardResponse{}
if s == nil || s.syntheticForwardHandler == nil {
response.Error = ErrMeshRuntimeDisabled.Error()
} else {
var envelope SyntheticEnvelope
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
response.Error = "invalid synthetic mesh envelope"
} else if ack, err := s.syntheticForwardHandler(ctx, envelope); err != nil {
response.Error = err.Error()
} else {
s.registerReversePeerConn(envelope.From.NodeID, conn)
response.Envelope = ack
}
}
payload, err := json.Marshal(response)
if err != nil {
return true
}
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: SyntheticForwardQUICStreamID,
Sequence: frame.Sequence,
Payload: payload,
})
return true
}
func (s *QUICFabricServer) registerReversePeerConn(peerID string, conn *quic.Conn) {
reverseTransport := s.getReverseTransport()
if s == nil || reverseTransport == nil || conn == nil {
return
}
reverseTransport.RegisterReverseConn(peerID, conn)
}
func (s *QUICFabricServer) getReverseTransport() *QUICFabricTransport {
if s == nil {
return nil
}
s.reverseMu.RLock()
defer s.reverseMu.RUnlock()
return s.reverseTransport
}
func (s *QUICFabricServer) logFabricSession(entry FabricSessionEventLogEntry) {
if s != nil && s.logger != nil {
s.logger(entry)
@@ -6,7 +6,9 @@ import (
"crypto/tls"
"crypto/x509"
"encoding/hex"
"encoding/json"
"fmt"
"net"
"sort"
"strings"
"sync"
@@ -17,6 +19,7 @@ import (
)
const fabricQUICNextProto = "rap-fabric-data-session-v1"
const fabricQUICReverseHelloPrefix = "rap-fabric-reverse-hello-v1:"
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
const defaultQUICFabricMaxStreamsPerConn = 64
const ErrQUICFabricStreamLimitReached = quicFabricError("quic fabric stream limit reached")
@@ -28,17 +31,29 @@ func (e quicFabricError) Error() string {
}
type QUICFabricTransport struct {
Config *quic.Config
IdleTTL time.Duration
MaxStreamsPerConn int
mu sync.Mutex
conns map[string]*quicFabricConnEntry
stats QUICFabricTransportStats
Config *quic.Config
LocalPeerID string
IdleTTL time.Duration
MaxStreamsPerConn int
DialAddr func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error)
mu sync.Mutex
conns map[string]*quicFabricConnEntry
reverseConns map[string]*quicFabricConnEntry
inboundProductionHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
inboundWebIngressHandler func(context.Context, []byte) ([]byte, error)
inboundFabricControlHandler func(context.Context, []byte) ([]byte, error)
inboundSyntheticHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
logger FabricSessionEventLogger
stats QUICFabricTransportStats
}
type QUICFabricTransportStats struct {
Opens uint64 `json:"opens"`
Reuses uint64 `json:"reuses"`
ReverseHelloSent uint64 `json:"reverse_hello_sent"`
ReverseHelloFailed uint64 `json:"reverse_hello_failed"`
ReverseRegisters uint64 `json:"reverse_registers"`
ReverseReuses uint64 `json:"reverse_reuses"`
OpenFailures uint64 `json:"open_failures"`
ClosedEvicted uint64 `json:"closed_evicted"`
CloseAllCalls uint64 `json:"close_all_calls"`
@@ -50,6 +65,7 @@ type QUICFabricTransportStats struct {
type QUICFabricTransportSnapshot struct {
SchemaVersion string `json:"schema_version"`
LocalPeerID string `json:"local_peer_id,omitempty"`
ActiveCount int `json:"active_count"`
ActiveStreams int `json:"active_streams"`
MaxStreamsPerConn int `json:"max_streams_per_conn"`
@@ -63,6 +79,7 @@ type QUICFabricConnSnapshot struct {
PeerID string `json:"peer_id,omitempty"`
Endpoint string `json:"endpoint,omitempty"`
CertSHA256 string `json:"cert_sha256,omitempty"`
Direction string `json:"direction,omitempty"`
ActiveStreams int `json:"active_streams"`
MaxStreams int `json:"max_streams"`
CapacityPressurePercent int `json:"capacity_pressure_percent"`
@@ -92,7 +109,41 @@ type quicFabricConnEntry struct {
}
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}}
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}, reverseConns: map[string]*quicFabricConnEntry{}}
}
func (t *QUICFabricTransport) SetInboundHandlers(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
t.SetInboundHandlersWithWebIngress(production, nil, synthetic, logger)
}
func (t *QUICFabricTransport) SetInboundHandlersWithWebIngress(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), webIngress func(context.Context, []byte) ([]byte, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
if t == nil {
return
}
t.mu.Lock()
t.inboundProductionHandler = production
t.inboundWebIngressHandler = webIngress
t.inboundSyntheticHandler = synthetic
t.logger = logger
t.mu.Unlock()
}
func (t *QUICFabricTransport) SetInboundFabricControlHandler(handler func(context.Context, []byte) ([]byte, error)) {
if t == nil {
return
}
t.mu.Lock()
t.inboundFabricControlHandler = handler
t.mu.Unlock()
}
func (t *QUICFabricTransport) SetLocalPeerID(peerID string) {
if t == nil {
return
}
t.mu.Lock()
t.LocalPeerID = strings.TrimSpace(peerID)
t.mu.Unlock()
}
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
@@ -186,9 +237,12 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, nil)
return conn, "", true, err
}
if conn, key, ok := t.reverseConnForTarget(target); ok {
return conn, key, false, nil
}
key := quicFabricConnKey(target)
if key == "" {
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
return conn, "", true, err
}
t.mu.Lock()
@@ -207,7 +261,7 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
}
t.mu.Unlock()
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
if err != nil {
t.mu.Lock()
t.stats.OpenFailures++
@@ -235,16 +289,339 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
t.conns[key] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
t.stats.Opens++
t.mu.Unlock()
go t.acceptInboundStreams(context.Background(), conn)
go t.sendReverseHello(context.Background(), conn)
return conn, key, false, nil
}
func (t *QUICFabricTransport) dialAddr(ctx context.Context, endpoint string, tlsConfig *tls.Config) (*quic.Conn, error) {
if t != nil && t.DialAddr != nil {
return t.DialAddr(ctx, endpoint, tlsConfig, t.Config)
}
return quic.DialAddr(ctx, endpoint, tlsConfig, t.Config)
}
func DialQUICAddrWithPacketConn(ctx context.Context, endpoint string, packetConn net.PacketConn, tlsConfig *tls.Config, config *quic.Config) (*quic.Conn, error) {
if packetConn == nil {
return nil, fmt.Errorf("quic packet connection is required")
}
addr, err := net.ResolveUDPAddr("udp", strings.TrimPrefix(strings.TrimSpace(endpoint), "quic://"))
if err != nil {
_ = packetConn.Close()
return nil, err
}
transport := &quic.Transport{Conn: packetConn}
conn, err := transport.Dial(ctx, addr, tlsConfig, config)
if err != nil {
_ = transport.Close()
return nil, err
}
go func() {
<-conn.Context().Done()
_ = transport.Close()
}()
return conn, nil
}
func (t *QUICFabricTransport) sendReverseHello(ctx context.Context, conn *quic.Conn) {
if t == nil || conn == nil {
return
}
localPeerID := t.localPeerID()
if localPeerID == "" {
t.mu.Lock()
t.stats.ReverseHelloFailed++
t.mu.Unlock()
return
}
helloCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
stream, err := conn.OpenStreamSync(helloCtx)
if err != nil {
t.mu.Lock()
t.stats.ReverseHelloFailed++
t.mu.Unlock()
return
}
defer func() { _ = stream.Close() }()
if err := fabricproto.WriteFrame(stream, fabricproto.Frame{
Type: fabricproto.FramePing,
Sequence: 1,
Payload: []byte(fabricQUICReverseHelloPrefix + localPeerID),
}); err != nil {
t.mu.Lock()
t.stats.ReverseHelloFailed++
t.mu.Unlock()
return
}
t.mu.Lock()
t.stats.ReverseHelloSent++
t.mu.Unlock()
_, _ = fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
}
func (t *QUICFabricTransport) acceptInboundStreams(ctx context.Context, conn *quic.Conn) {
if t == nil || conn == nil {
return
}
for {
stream, err := conn.AcceptStream(ctx)
if err != nil {
return
}
go t.handleInboundStream(ctx, conn, stream)
}
}
func (t *QUICFabricTransport) handleInboundStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
session := fabricproto.NewSession(fabricproto.SessionConfig{})
defer func() { _ = stream.Close() }()
t.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_quic_reverse_stream_opened",
AcceptedBy: "quic_reverse",
RemoteAddr: conn.RemoteAddr().String(),
})
defer t.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_quic_reverse_stream_closed",
AcceptedBy: "quic_reverse",
RemoteAddr: conn.RemoteAddr().String(),
})
for {
select {
case <-ctx.Done():
_ = stream.Close()
return
default:
}
frame, err := fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
if err != nil {
return
}
t.registerReverseHelloFrame(conn, frame)
if t.handleInboundProductionForwardFrame(ctx, stream, frame) {
continue
}
if t.handleInboundWebIngressForwardFrame(ctx, stream, frame) {
continue
}
if t.handleInboundFabricControlForwardFrame(ctx, stream, frame) {
continue
}
if t.handleInboundSyntheticForwardFrame(ctx, stream, frame) {
continue
}
event, responses, err := session.HandleFrame(frame)
if err != nil {
_ = stream.Close()
return
}
if event.Type != fabricproto.SessionEventNone {
t.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_reverse_event",
SessionEvent: event.Type,
StreamID: event.StreamID,
Sequence: event.Sequence,
TrafficClass: event.TrafficClass,
AcceptedBy: "quic_reverse",
RemoteAddr: conn.RemoteAddr().String(),
})
}
for _, response := range responses {
if err := fabricproto.WriteFrame(stream, response); err != nil {
return
}
}
}
}
func (t *QUICFabricTransport) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
if t == nil || conn == nil || frame.Type != fabricproto.FramePing {
return
}
payload := string(frame.Payload)
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
return
}
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
t.RegisterReverseConn(peerID, conn)
t.logFabricSession(FabricSessionEventLogEntry{
Event: "fabric_session_quic_reverse_registered",
AcceptedBy: "quic_reverse_hello",
RemoteAddr: conn.RemoteAddr().String(),
PeerID: peerID,
})
}
func (t *QUICFabricTransport) handleInboundProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
return false
}
response := quicProductionForwardResponse{}
productionHandler, _, _, _, _ := t.inboundHandlers()
if productionHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else {
var envelope ProductionEnvelope
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
response.Error = "invalid production mesh envelope"
} else if result, err := productionHandler(ctx, envelope); err != nil {
response.Error = err.Error()
} else {
response.Result = result
}
}
payload, err := json.Marshal(response)
if err == nil {
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
}
return true
}
func (t *QUICFabricTransport) handleInboundWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
return false
}
response := quicWebIngressForwardResponse{}
_, webIngressHandler, _, _, _ := t.inboundHandlers()
if webIngressHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else if payload, err := webIngressHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
response.Error = err.Error()
} else {
response.Payload = append(json.RawMessage(nil), payload...)
}
payload, err := json.Marshal(response)
if err == nil {
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: WebIngressForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
}
return true
}
func (t *QUICFabricTransport) handleInboundFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
return false
}
response := quicFabricControlForwardResponse{}
_, _, fabricControlHandler, _, _ := t.inboundHandlers()
if fabricControlHandler == nil {
response.Error = ErrForwardRuntimeUnavailable.Error()
} else if payload, err := fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
response.Error = err.Error()
} else {
response.Payload = append(json.RawMessage(nil), payload...)
}
payload, err := json.Marshal(response)
if err == nil {
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: FabricControlForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
}
return true
}
func (t *QUICFabricTransport) handleInboundSyntheticForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
return false
}
response := quicSyntheticForwardResponse{}
_, _, _, syntheticHandler, _ := t.inboundHandlers()
if syntheticHandler == nil {
response.Error = ErrMeshRuntimeDisabled.Error()
} else {
var envelope SyntheticEnvelope
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
response.Error = "invalid synthetic mesh envelope"
} else if ack, err := syntheticHandler(ctx, envelope); err != nil {
response.Error = err.Error()
} else {
response.Envelope = ack
}
}
payload, err := json.Marshal(response)
if err == nil {
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: SyntheticForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
}
return true
}
func (t *QUICFabricTransport) inboundHandlers() (func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), func(context.Context, []byte) ([]byte, error), func(context.Context, []byte) ([]byte, error), func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), FabricSessionEventLogger) {
if t == nil {
return nil, nil, nil, nil, nil
}
t.mu.Lock()
defer t.mu.Unlock()
return t.inboundProductionHandler, t.inboundWebIngressHandler, t.inboundFabricControlHandler, t.inboundSyntheticHandler, t.logger
}
func (t *QUICFabricTransport) localPeerID() string {
if t == nil {
return ""
}
t.mu.Lock()
defer t.mu.Unlock()
return strings.TrimSpace(t.LocalPeerID)
}
func (t *QUICFabricTransport) logFabricSession(entry FabricSessionEventLogEntry) {
_, _, _, _, logger := t.inboundHandlers()
if logger != nil {
logger(entry)
}
}
func (t *QUICFabricTransport) RegisterReverseConn(peerID string, conn *quic.Conn) {
if t == nil || conn == nil {
return
}
peerID = strings.TrimSpace(peerID)
if peerID == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
if t.reverseConns == nil {
t.reverseConns = map[string]*quicFabricConnEntry{}
}
if existing := t.reverseConns[peerID]; existing != nil && existing.conn != nil && existing.conn != conn {
select {
case <-existing.conn.Context().Done():
default:
_ = existing.conn.CloseWithError(0, "reverse connection replaced")
}
}
t.reverseConns[peerID] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
t.stats.ReverseRegisters++
}
func (t *QUICFabricTransport) reverseConnForTarget(target FabricTransportTarget) (*quic.Conn, string, bool) {
peerID := strings.TrimSpace(target.PeerID)
if t == nil || peerID == "" || !fabricTransportPrefersReverseConn(target.Transport) {
return nil, "", false
}
t.mu.Lock()
defer t.mu.Unlock()
t.pruneIdleLocked(time.Now())
entry := t.reverseConns[peerID]
if entry == nil || entry.conn == nil {
return nil, "", false
}
select {
case <-entry.conn.Context().Done():
delete(t.reverseConns, peerID)
t.stats.ClosedEvicted++
return nil, "", false
default:
entry.lastUsed = time.Now()
t.stats.ReverseReuses++
return entry.conn, quicFabricReverseConnKey(peerID), true
}
}
func (t *QUICFabricTransport) reserveStream(key string, conn *quic.Conn) error {
if t == nil || key == "" {
return nil
}
t.mu.Lock()
defer t.mu.Unlock()
entry := t.conns[key]
entry := t.connEntryLocked(key)
if entry == nil || entry.conn != conn {
return fmt.Errorf("quic fabric connection is not cached")
}
@@ -267,16 +644,26 @@ func (t *QUICFabricTransport) releaseStream(key string) {
return
}
t.mu.Lock()
if entry := t.conns[key]; entry != nil {
if entry := t.connEntryLocked(key); entry != nil {
if entry.activeStreams > 0 {
entry.activeStreams--
}
entry.lastUsed = time.Now()
t.stats.StreamCloses++
}
t.stats.StreamCloses++
t.mu.Unlock()
}
func (t *QUICFabricTransport) connEntryLocked(key string) *quicFabricConnEntry {
if t == nil || key == "" {
return nil
}
if strings.HasPrefix(key, "reverse\x00") {
return t.reverseConns[strings.TrimPrefix(key, "reverse\x00")]
}
return t.conns[key]
}
func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic.Conn) {
if t == nil || conn == nil {
return
@@ -315,6 +702,20 @@ func (t *QUICFabricTransport) pruneIdleLocked(now time.Time) {
t.stats.IdleEvicted++
}
}
for peerID, entry := range t.reverseConns {
if entry == nil || entry.conn == nil {
delete(t.reverseConns, peerID)
continue
}
if !entry.lastUsed.IsZero() && now.Sub(entry.lastUsed) > ttl {
if entry.activeStreams > 0 {
continue
}
_ = entry.conn.CloseWithError(0, "idle reverse")
delete(t.reverseConns, peerID)
t.stats.IdleEvicted++
}
}
}
func quicFabricConnKey(target FabricTransportTarget) string {
@@ -340,6 +741,23 @@ func parseQUICFabricConnKey(key string) (peerID string, endpoint string, certSHA
return peerID, endpoint, certSHA256
}
func quicFabricReverseConnKey(peerID string) string {
peerID = strings.TrimSpace(peerID)
if peerID == "" {
return ""
}
return "reverse\x00" + peerID
}
func fabricTransportPrefersReverseConn(transport string) bool {
switch strings.ToLower(strings.TrimSpace(transport)) {
case "reverse_quic", "relay_quic":
return true
default:
return false
}
}
func (t *QUICFabricTransport) Close() error {
if t == nil {
return nil
@@ -348,12 +766,19 @@ func (t *QUICFabricTransport) Close() error {
t.stats.CloseAllCalls++
conns := t.conns
t.conns = map[string]*quicFabricConnEntry{}
reverseConns := t.reverseConns
t.reverseConns = map[string]*quicFabricConnEntry{}
t.mu.Unlock()
for _, entry := range conns {
if entry != nil && entry.conn != nil {
_ = entry.conn.CloseWithError(0, "closed")
}
}
for _, entry := range reverseConns {
if entry != nil && entry.conn != nil {
_ = entry.conn.CloseWithError(0, "closed")
}
}
return nil
}
@@ -370,6 +795,7 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
}
snapshot := QUICFabricTransportSnapshot{
SchemaVersion: "rap.quic_fabric_transport.v1",
LocalPeerID: strings.TrimSpace(t.LocalPeerID),
MaxStreamsPerConn: limit,
Stats: t.stats,
}
@@ -391,6 +817,40 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
PeerID: peerID,
Endpoint: endpoint,
CertSHA256: certSHA256,
Direction: "outbound",
ActiveStreams: entry.activeStreams,
MaxStreams: limit,
Saturated: entry.activeStreams >= limit,
}
if !entry.lastUsed.IsZero() {
connSnapshot.LastUsedUnixSec = entry.lastUsed.UTC().Unix()
}
if limit > 0 {
connSnapshot.CapacityPressurePercent = (entry.activeStreams * 100) / limit
}
snapshot.Connections = append(snapshot.Connections, connSnapshot)
if entry.activeStreams >= limit {
snapshot.SaturatedConnections++
}
}
}
for peerID, entry := range t.reverseConns {
if entry == nil || entry.conn == nil {
delete(t.reverseConns, peerID)
continue
}
select {
case <-entry.conn.Context().Done():
delete(t.reverseConns, peerID)
t.stats.ClosedEvicted++
snapshot.Stats.ClosedEvicted++
default:
snapshot.ActiveCount++
snapshot.ActiveStreams += entry.activeStreams
connSnapshot := QUICFabricConnSnapshot{
PeerID: peerID,
Endpoint: entry.conn.RemoteAddr().String(),
Direction: "reverse",
ActiveStreams: entry.activeStreams,
MaxStreams: limit,
Saturated: entry.activeStreams >= limit,
@@ -462,6 +922,7 @@ func (s *quicFabricSession) Close() error {
s.closeOnce.Do(func() {
close(s.done)
if s.stream != nil {
s.stream.CancelRead(0)
err = s.stream.Close()
}
if s.transport != nil {
@@ -9,6 +9,7 @@ import (
"crypto/x509"
"crypto/x509/pkix"
"encoding/hex"
"encoding/json"
"encoding/pem"
"math/big"
"strings"
@@ -341,6 +342,119 @@ func TestQUICFabricTransportLimitsStreamsPerConnection(t *testing.T) {
defer second.Close()
}
func TestQUICFabricTransportReusesInboundConnectionForReverseStream(t *testing.T) {
reverseTransport := NewQUICFabricTransport(nil)
defer reverseTransport.Close()
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: testQUICTLSConfig(t),
ReverseTransport: reverseTransport,
SyntheticForwardHandler: func(_ context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
envelope.To, envelope.From = envelope.From, PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-r"}
return envelope, nil
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
clientTransport := NewQUICFabricTransport(nil)
defer clientTransport.Close()
clientTransport.SetLocalPeerID("node-a")
clientTransport.SetInboundHandlers(func(_ context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
return ProductionForwardResult{
Accepted: true,
Delivered: true,
Forwarded: true,
By: PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-a"},
MessageID: envelope.MessageID,
RouteID: envelope.RouteID,
}, nil
}, nil, nil)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
session, err := clientTransport.Connect(ctx, FabricTransportTarget{
PeerID: "node-r",
Endpoint: server.Addr().String(),
TLSConfig: &tls.Config{
InsecureSkipVerify: true,
NextProtos: []string{fabricQUICNextProto},
},
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("client connect: %v", err)
}
defer session.Close()
deadline := time.Now().Add(time.Second)
for {
if reverseTransport.Snapshot().Stats.ReverseRegisters > 0 {
break
}
if time.Now().After(deadline) {
t.Fatalf("reverse hello did not register connection: %+v", reverseTransport.Snapshot())
}
time.Sleep(10 * time.Millisecond)
}
reverseSession, err := reverseTransport.Connect(ctx, FabricTransportTarget{
PeerID: "node-a",
Endpoint: "10.0.0.2:19443",
Transport: "relay_quic",
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("reverse connect: %v", err)
}
defer reverseSession.Close()
productionPayload, err := json.Marshal(ProductionEnvelope{
FabricProtocolVersion: ProtocolVersion,
MessageID: "msg-1",
RouteID: "route-r-a",
ClusterID: "cluster-1",
SourceNodeID: "node-r",
DestinationNodeID: "node-a",
CurrentHopNodeID: "node-a",
NextHopNodeID: "node-a",
ChannelClass: ProductionChannelFabricControl,
MessageType: ProductionMessageFabricControl,
TTL: 4,
CreatedAt: time.Now().UTC(),
ExpiresAt: time.Now().UTC().Add(time.Minute),
PayloadHash: "unused-by-test-handler",
})
if err != nil {
t.Fatalf("marshal production: %v", err)
}
if err := reverseSession.Send(ctx, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: 2, Payload: productionPayload}); err != nil {
t.Fatalf("send reverse production: %v", err)
}
select {
case frame := <-reverseSession.Frames():
var response quicProductionForwardResponse
if err := json.Unmarshal(frame.Payload, &response); err != nil {
t.Fatalf("decode response: %v", err)
}
if !response.Result.Accepted || !response.Result.Delivered || response.Result.By.NodeID != "node-a" {
t.Fatalf("response = %+v", response)
}
case err := <-reverseSession.Errors():
t.Fatalf("reverse session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
snapshot := reverseTransport.Snapshot()
if snapshot.Stats.ReverseRegisters == 0 || snapshot.Stats.ReverseReuses == 0 {
t.Fatalf("reverse connection was not registered/reused: %+v", snapshot)
}
}
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
var events []FabricSessionEventLogEntry
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
@@ -389,6 +503,68 @@ func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
}
}
func TestQUICFabricServerHandlesWebIngressForwardFrames(t *testing.T) {
var received []byte
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: testQUICTLSConfig(t),
WebIngressForwardHandler: func(_ context.Context, payload []byte) ([]byte, error) {
received = append([]byte(nil), payload...)
return []byte(`{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}`), nil
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: server.Addr().String(),
TLSConfig: &tls.Config{
InsecureSkipVerify: true,
NextProtos: []string{fabricQUICNextProto},
},
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("connect quic fabric: %v", err)
}
defer session.Close()
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: WebIngressForwardQUICStreamID,
Sequence: 44,
Payload: []byte(`{"envelope":true}`),
}); err != nil {
t.Fatalf("send web ingress frame: %v", err)
}
select {
case frame := <-session.Frames():
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID || frame.Sequence != 44 {
t.Fatalf("frame = %+v", frame)
}
var response quicWebIngressForwardResponse
if err := json.Unmarshal(frame.Payload, &response); err != nil {
t.Fatalf("decode response: %v", err)
}
if string(response.Payload) != `{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}` || response.Error != "" {
t.Fatalf("response = %+v", response)
}
case err := <-session.Errors():
t.Fatalf("session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
if string(received) != `{"envelope":true}` {
t.Fatalf("received = %s", string(received))
}
}
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
t.Helper()
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
@@ -0,0 +1,128 @@
package mesh
import (
"strings"
"sync"
"time"
)
type FabricRouteHealthTracker struct {
mu sync.Mutex
QuarantineTTL time.Duration
routes map[string]FabricRouteHealthEntry
}
type FabricRouteHealthEntry struct {
Reason string `json:"reason,omitempty"`
Failures uint64 `json:"failures"`
LastFailure time.Time `json:"last_failure,omitempty"`
RetryAfter time.Time `json:"retry_after,omitempty"`
}
type FabricRouteHealthSnapshot struct {
Quarantined map[string]FabricRouteHealthEntry `json:"quarantined,omitempty"`
}
func NewFabricRouteHealthTracker(ttl time.Duration) *FabricRouteHealthTracker {
if ttl <= 0 {
ttl = 30 * time.Second
}
return &FabricRouteHealthTracker{QuarantineTTL: ttl, routes: map[string]FabricRouteHealthEntry{}}
}
func (t *FabricRouteHealthTracker) MarkFailure(routeID string, reason string, now time.Time) {
routeID = strings.TrimSpace(routeID)
if t == nil || routeID == "" {
return
}
if now.IsZero() {
now = time.Now().UTC()
}
ttl := t.QuarantineTTL
if ttl <= 0 {
ttl = 30 * time.Second
}
t.mu.Lock()
entry := t.routes[routeID]
entry.Failures++
entry.Reason = strings.TrimSpace(reason)
entry.LastFailure = now
entry.RetryAfter = now.Add(ttl)
if t.routes == nil {
t.routes = map[string]FabricRouteHealthEntry{}
}
t.routes[routeID] = entry
t.mu.Unlock()
}
func (t *FabricRouteHealthTracker) MarkSuccess(routeID string) {
routeID = strings.TrimSpace(routeID)
if t == nil || routeID == "" {
return
}
t.mu.Lock()
delete(t.routes, routeID)
t.mu.Unlock()
}
func (t *FabricRouteHealthTracker) Apply(routeSet FabricRouteSet, now time.Time) FabricRouteSet {
if t == nil {
return routeSet
}
if now.IsZero() {
now = time.Now().UTC()
}
t.mu.Lock()
defer t.mu.Unlock()
if len(t.routes) == 0 {
return routeSet
}
return mapFabricRouteSet(routeSet, func(route FabricRoute) FabricRoute {
entry, ok := t.routes[route.RouteID]
if !ok {
return route
}
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
delete(t.routes, route.RouteID)
return route
}
route.Healthy = false
route.Degraded = true
return route
})
}
func (t *FabricRouteHealthTracker) Snapshot(now time.Time) FabricRouteHealthSnapshot {
if t == nil {
return FabricRouteHealthSnapshot{}
}
if now.IsZero() {
now = time.Now().UTC()
}
t.mu.Lock()
defer t.mu.Unlock()
out := map[string]FabricRouteHealthEntry{}
for routeID, entry := range t.routes {
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
continue
}
out[routeID] = entry
}
if len(out) == 0 {
return FabricRouteHealthSnapshot{}
}
return FabricRouteHealthSnapshot{Quarantined: out}
}
func mapFabricRouteSet(routeSet FabricRouteSet, fn func(FabricRoute) FabricRoute) FabricRouteSet {
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
routeSet.Primary = fn(routeSet.Primary)
}
for i := range routeSet.WarmStandby {
routeSet.WarmStandby[i] = fn(routeSet.WarmStandby[i])
}
for i := range routeSet.ColdFallbacks {
routeSet.ColdFallbacks[i] = fn(routeSet.ColdFallbacks[i])
}
return routeSet
}
@@ -0,0 +1,322 @@
package mesh
import (
"encoding/json"
"fmt"
"strings"
"time"
)
const (
FabricCandidateReachabilityPublic = "public"
FabricCandidateReachabilityPrivate = "private"
FabricCandidateReachabilityRelay = "relay"
FabricCandidateReachabilityOutboundOnly = "outbound_only"
FabricConnectivityDirect = "direct"
FabricConnectivityOutboundOnly = "outbound_only"
FabricConnectivityRelayRequired = "relay_required"
)
type FabricRoutePlannerConfig struct {
ClusterID string
LocalNodeID string
LocalSegmentID string
LocalNATGroupID string
DefaultCapacity int
RelayCapacity int
ReverseCapacity int
Observations map[string]EndpointCandidateHealthObservation
CapacityPressure map[string]EndpointCandidateCapacityPressure
Now time.Time
MaxObservationAge time.Duration
MaxCapacityPressureAge time.Duration
}
type FabricCandidateMetadata struct {
LocalSegmentID string `json:"local_segment_id,omitempty"`
NATGroupID string `json:"nat_group_id,omitempty"`
RelayNodeID string `json:"relay_node_id,omitempty"`
RelayEndpoint string `json:"relay_endpoint,omitempty"`
ViaNodeID string `json:"via_node_id,omitempty"`
STUNServer string `json:"stun_server,omitempty"`
ICEFoundation string `json:"ice_foundation,omitempty"`
}
func FabricRouteSetForPeerEndpointCandidates(targetNodeID string, candidates []PeerEndpointCandidate, cfg FabricRoutePlannerConfig) FabricRouteSet {
targetNodeID = strings.TrimSpace(targetNodeID)
if targetNodeID == "" && len(candidates) > 0 {
targetNodeID = strings.TrimSpace(candidates[0].NodeID)
}
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: targetNodeID}
if len(candidates) == 0 {
return routeSet
}
now := cfg.Now
if now.IsZero() {
now = time.Now().UTC()
}
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
Now: now,
Observations: cfg.Observations,
MaxObservationAge: firstNonZeroDuration(cfg.MaxObservationAge, 30*time.Second),
CapacityPressure: cfg.CapacityPressure,
MaxCapacityPressureAge: firstNonZeroDuration(cfg.MaxCapacityPressureAge, 10*time.Second),
})
routes := make([]FabricRoute, 0, len(ranked))
for index, scored := range ranked {
route, ok := fabricRouteForPeerEndpointCandidate(scored.Candidate, cfg, scored.Score, index, now)
if ok {
routes = append(routes, route)
}
}
return routeSetFromRoutes(routeSet, routes)
}
func FabricRouteSetsForPeerEndpointCandidates(candidatesByNode map[string][]PeerEndpointCandidate, cfg FabricRoutePlannerConfig) map[string]FabricRouteSet {
out := make(map[string]FabricRouteSet, len(candidatesByNode))
for nodeID, candidates := range candidatesByNode {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
routeSet := FabricRouteSetForPeerEndpointCandidates(nodeID, candidates, cfg)
if strings.TrimSpace(routeSet.Primary.RouteID) != "" || len(routeSet.WarmStandby) > 0 || len(routeSet.ColdFallbacks) > 0 {
out[nodeID] = routeSet
}
}
return out
}
func fabricRouteForPeerEndpointCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int, now time.Time) (FabricRoute, bool) {
candidate.EndpointID = strings.TrimSpace(candidate.EndpointID)
candidate.NodeID = strings.TrimSpace(candidate.NodeID)
candidate.Address = strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
if candidate.EndpointID == "" || candidate.NodeID == "" || candidate.Address == "" || !isQUICOnlyCandidateTransport(candidate.Transport) {
return FabricRoute{}, false
}
metadata := decodeFabricCandidateMetadata(candidate.Metadata)
mode := fabricRouteModeForPeerEndpointCandidate(candidate, metadata, cfg)
hops := fabricRouteHopsForCandidate(candidate, metadata, mode, cfg)
if len(hops) == 0 {
return FabricRoute{}, false
}
relayCount := 0
for _, hop := range hops {
if hop.Mode == FabricRouteRelay {
relayCount++
}
}
latency := fabricRouteLatencyFromCandidate(candidate, cfg, score, index)
capacity := fabricRouteCapacityForMode(mode, cfg)
if capacity <= 0 {
capacity = 100
}
healthy := true
degraded := false
if observation, ok := cfg.Observations[candidate.EndpointID]; ok {
healthy = observation.ReliabilityScore == 0 || observation.ReliabilityScore >= 50
degraded = observation.LastLatencyMs > 0 && observation.LastLatencyMs >= 250
}
return FabricRoute{
RouteID: candidate.EndpointID,
ClusterID: strings.TrimSpace(cfg.ClusterID),
SourceNodeID: strings.TrimSpace(cfg.LocalNodeID),
DestinationNodeID: candidate.NodeID,
Hops: hops,
BaseLatencyMs: latency,
Capacity: capacity,
ActiveChannels: int(candidatePressureCount(candidate.EndpointID, cfg)),
RelayCount: relayCount,
Healthy: healthy,
Degraded: degraded,
LastUpdatedAt: now,
}, true
}
func fabricRouteModeForPeerEndpointCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) FabricRouteMode {
transportMode := fabricRouteModeForTransportTarget(FabricTransportTarget{Transport: candidate.Transport})
if transportMode == FabricRouteRelay || transportMode == FabricRouteReverse || transportMode == FabricRouteICE || transportMode == FabricRouteLAN {
return transportMode
}
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
if sameLocalSegment(metadata, cfg) || sameNATGroup(metadata, cfg) {
return FabricRouteLAN
}
if reachability == FabricCandidateReachabilityRelay || connectivity == FabricConnectivityRelayRequired || strings.TrimSpace(metadata.RelayEndpoint) != "" {
return FabricRouteRelay
}
if connectivity == FabricConnectivityOutboundOnly || reachability == FabricCandidateReachabilityOutboundOnly {
return FabricRouteReverse
}
if strings.TrimSpace(metadata.STUNServer) != "" || strings.TrimSpace(metadata.ICEFoundation) != "" || candidate.NATType != "" {
return FabricRouteICE
}
return FabricRouteDirect
}
func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, mode FabricRouteMode, cfg FabricRoutePlannerConfig) []FabricRouteHop {
localNodeID := strings.TrimSpace(cfg.LocalNodeID)
targetNodeID := strings.TrimSpace(candidate.NodeID)
endpoint := strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
switch mode {
case FabricRouteRelay:
relayNodeID := firstNonEmpty(strings.TrimSpace(metadata.RelayNodeID), strings.TrimSpace(metadata.ViaNodeID))
relayEndpoint := firstNonEmpty(strings.TrimRight(strings.TrimSpace(metadata.RelayEndpoint), "/"), endpoint)
hops := []FabricRouteHop{}
if localNodeID != "" {
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: FabricRouteDirect})
}
if relayNodeID == "" {
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
return hops
}
hops = append(hops,
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint},
FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)},
)
return hops
case FabricRouteLAN, FabricRouteICE, FabricRouteReverse, FabricRouteDirect:
hops := []FabricRouteHop{}
if localNodeID != "" {
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: mode})
}
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: mode, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
return hops
default:
return nil
}
}
func isQUICOnlyCandidateTransport(transport string) bool {
switch strings.ToLower(strings.TrimSpace(transport)) {
case "quic", "direct_quic", "udp_quic", "quic_udp",
string(FabricRouteLAN), string(FabricRouteReverse), string(FabricRouteRelay), string(FabricRouteICE):
return true
default:
return false
}
}
func fabricRouteLatencyFromCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int) int {
if observation, ok := cfg.Observations[candidate.EndpointID]; ok && observation.LastLatencyMs > 0 {
if observation.LastLatencyMs > int64(^uint(0)>>1) {
return int(^uint(0) >> 1)
}
return int(observation.LastLatencyMs)
}
base := 10 + index
switch strings.ToLower(strings.TrimSpace(candidate.Reachability)) {
case FabricCandidateReachabilityPrivate:
base = 3 + index
case FabricCandidateReachabilityOutboundOnly:
base = 25 + index
case FabricCandidateReachabilityRelay:
base = 40 + index
}
if score < 100 {
base += (100 - score) / 10
}
return base
}
func fabricRouteCapacityForMode(mode FabricRouteMode, cfg FabricRoutePlannerConfig) int {
switch mode {
case FabricRouteRelay:
return firstPositiveInt(cfg.RelayCapacity, cfg.DefaultCapacity, 100)
case FabricRouteReverse:
return firstPositiveInt(cfg.ReverseCapacity, cfg.DefaultCapacity, 100)
default:
return firstPositiveInt(cfg.DefaultCapacity, 100)
}
}
func candidatePressureCount(endpointID string, cfg FabricRoutePlannerConfig) int64 {
if pressure, ok := cfg.CapacityPressure[endpointID]; ok {
return pressure.Count
}
return 0
}
func sameLocalSegment(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
localSegment := strings.TrimSpace(cfg.LocalSegmentID)
if localSegment == "" {
return false
}
return strings.EqualFold(strings.TrimSpace(metadata.LocalSegmentID), localSegment)
}
func sameNATGroup(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
localNATGroup := strings.TrimSpace(cfg.LocalNATGroupID)
if localNATGroup == "" {
return false
}
return strings.EqualFold(strings.TrimSpace(metadata.NATGroupID), localNATGroup)
}
func decodeFabricCandidateMetadata(raw json.RawMessage) FabricCandidateMetadata {
if len(raw) == 0 {
return FabricCandidateMetadata{}
}
var metadata FabricCandidateMetadata
if err := json.Unmarshal(raw, &metadata); err != nil {
return FabricCandidateMetadata{}
}
return metadata
}
func candidatePeerCertSHA256(candidate PeerEndpointCandidate) string {
var metadata struct {
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
}
if len(candidate.Metadata) == 0 {
return ""
}
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
return ""
}
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
}
func firstPositiveInt(values ...int) int {
for _, value := range values {
if value > 0 {
return value
}
}
return 0
}
func firstNonZeroDuration(values ...time.Duration) time.Duration {
for _, value := range values {
if value > 0 {
return value
}
}
return 0
}
func FabricRouteSetForRelayFallback(clusterID string, sourceNodeID string, targetNodeID string, relayNodeID string, relayEndpoint string, targetEndpoint string) FabricRouteSet {
relayEndpoint = strings.TrimRight(strings.TrimSpace(relayEndpoint), "/")
targetEndpoint = strings.TrimRight(strings.TrimSpace(targetEndpoint), "/")
candidate := PeerEndpointCandidate{
EndpointID: fmt.Sprintf("%s-via-%s-relay", strings.TrimSpace(targetNodeID), strings.TrimSpace(relayNodeID)),
NodeID: strings.TrimSpace(targetNodeID),
Transport: string(FabricRouteRelay),
Address: targetEndpoint,
Reachability: FabricCandidateReachabilityRelay,
ConnectivityMode: FabricConnectivityRelayRequired,
Metadata: mustMarshalFabricCandidateMetadata(FabricCandidateMetadata{RelayNodeID: relayNodeID, RelayEndpoint: relayEndpoint}),
}
return FabricRouteSetForPeerEndpointCandidates(targetNodeID, []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{
ClusterID: clusterID,
LocalNodeID: sourceNodeID,
})
}
func mustMarshalFabricCandidateMetadata(metadata FabricCandidateMetadata) json.RawMessage {
raw, _ := json.Marshal(metadata)
return raw
}
@@ -0,0 +1,187 @@
package mesh
import (
"encoding/json"
"testing"
"time"
)
func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
metadata, _ := json.Marshal(FabricCandidateMetadata{LocalSegmentID: "site-a", NATGroupID: "nat-a"})
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "quic",
Address: "quic://203.0.113.10:19443",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 10,
},
{
EndpointID: "node-b-lan",
NodeID: "node-b",
Transport: "quic",
Address: "quic://10.10.0.12:19443",
Reachability: "private",
ConnectivityMode: "direct",
PolicyTags: []string{"private-lan"},
Metadata: metadata,
},
}, FabricRoutePlannerConfig{
ClusterID: "cluster-1",
LocalNodeID: "node-a",
LocalSegmentID: "site-a",
DefaultCapacity: 200,
Now: time.Unix(100, 0).UTC(),
})
if routeSet.Primary.RouteID != "node-b-lan" {
t.Fatalf("primary route = %q, want node-b-lan", routeSet.Primary.RouteID)
}
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
t.Fatalf("primary mode = %q, want lan", routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode)
}
}
func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T) {
metadata, _ := json.Marshal(FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"})
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "quic",
Address: "quic://node-b-passive:19443",
Reachability: "outbound_only",
ConnectivityMode: "relay_required",
NATType: "symmetric",
Metadata: metadata,
}}, FabricRoutePlannerConfig{
ClusterID: "cluster-1",
LocalNodeID: "node-a",
RelayCapacity: 50,
Now: time.Unix(100, 0).UTC(),
})
if routeSet.Primary.RouteID != "node-b-relay" {
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
}
if routeSet.Primary.RelayCount != 2 {
t.Fatalf("relay count = %d, want 2", routeSet.Primary.RelayCount)
}
if got := routeSet.Primary.Hops[1].NodeID; got != "node-r" {
t.Fatalf("relay hop = %q, want node-r", got)
}
if routeSet.Primary.Capacity != 50 {
t.Fatalf("capacity = %d, want 50", routeSet.Primary.Capacity)
}
}
func TestFabricRouteSetForPeerEndpointCandidatesUsesTargetWhenRelayMetadataIsAbsent(t *testing.T) {
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay_quic",
Address: "quic://node-b:19443",
Reachability: "relay",
ConnectivityMode: "relay_required",
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
if routeSet.Primary.RouteID != "node-b-relay" {
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
}
if len(routeSet.Primary.Hops) != 2 {
t.Fatalf("hops = %+v, want local + target only", routeSet.Primary.Hops)
}
targetHop := routeSet.Primary.Hops[1]
if targetHop.NodeID != "node-b" || targetHop.Mode != FabricRouteRelay || targetHop.PeerCertSHA256 != "abc123" {
t.Fatalf("target hop = %+v, want relay-mode target with cert", targetHop)
}
}
func TestFabricRouteSetForPeerEndpointCandidatesAcceptsExplicitQUICModes(t *testing.T) {
for _, tc := range []struct {
name string
transport string
wantMode FabricRouteMode
}{
{name: "lan", transport: "lan_quic", wantMode: FabricRouteLAN},
{name: "reverse", transport: "reverse_quic", wantMode: FabricRouteReverse},
{name: "relay", transport: "relay_quic", wantMode: FabricRouteRelay},
{name: "ice", transport: "ice_quic", wantMode: FabricRouteICE},
} {
t.Run(tc.name, func(t *testing.T) {
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
EndpointID: "node-b-" + tc.name,
NodeID: "node-b",
Transport: tc.transport,
Address: "quic://node-b:19443",
Reachability: "private",
ConnectivityMode: "direct",
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
if routeSet.Primary.RouteID == "" {
t.Fatalf("%s candidate produced empty route set", tc.transport)
}
hop := routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1]
if hop.Mode != tc.wantMode {
t.Fatalf("mode = %q, want %q", hop.Mode, tc.wantMode)
}
if hop.PeerCertSHA256 != "abc123" {
t.Fatalf("peer cert = %q, want abc123", hop.PeerCertSHA256)
}
})
}
}
func TestFabricRouteSetForPeerEndpointCandidatesTreatsSameNATGroupAsLAN(t *testing.T) {
metadata, _ := json.Marshal(FabricCandidateMetadata{NATGroupID: "nat-a"})
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
EndpointID: "node-b-nat-lan",
NodeID: "node-b",
Transport: "quic",
Address: "quic://10.44.0.12:19443",
Reachability: "private",
ConnectivityMode: "direct",
NATType: "symmetric",
Metadata: metadata,
}}, FabricRoutePlannerConfig{
ClusterID: "cluster-1",
LocalNodeID: "node-a",
LocalNATGroupID: "nat-a",
})
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
t.Fatalf("route = %+v, want LAN mode for same NAT group", routeSet.Primary)
}
}
func TestFabricRouteSetForPeerEndpointCandidatesRejectsNonQUIC(t *testing.T) {
for _, candidate := range []PeerEndpointCandidate{
{
EndpointID: "node-b-http",
NodeID: "node-b",
Transport: "direct_http",
Address: "http://node-b:8080",
Reachability: "public",
ConnectivityMode: "direct",
},
{
EndpointID: "node-b-legacy-relay",
NodeID: "node-b",
Transport: "relay",
Address: "quic://node-r:19443",
Reachability: "relay",
ConnectivityMode: "relay_required",
},
{
EndpointID: "node-b-legacy-reverse",
NodeID: "node-b",
Transport: "outbound_reverse",
Address: "quic://node-b:19443",
Reachability: "outbound_only",
ConnectivityMode: "outbound_only",
},
} {
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
if routeSet.Primary.RouteID != "" || len(routeSet.WarmStandby) != 0 {
t.Fatalf("non-quic candidate produced route set: %+v", routeSet)
}
}
}
@@ -0,0 +1,137 @@
package mesh
import (
"strings"
"sync"
"sync/atomic"
)
type FabricRoutePressureTracker struct {
mu sync.Mutex
active map[string]int
maxActive map[string]int
acquiredTotal uint64
releasedTotal uint64
maxActiveTotal int
lastAcquiredRoute string
lastReleasedRoute string
}
type FabricRoutePressureSnapshot struct {
Active map[string]int `json:"active"`
MaxActive map[string]int `json:"max_active"`
ActiveTotal int `json:"active_total"`
MaxActiveTotal int `json:"max_active_total"`
AcquiredTotal uint64 `json:"acquired_total"`
ReleasedTotal uint64 `json:"released_total"`
LastAcquiredRoute string `json:"last_acquired_route,omitempty"`
LastReleasedRoute string `json:"last_released_route,omitempty"`
}
func NewFabricRoutePressureTracker() *FabricRoutePressureTracker {
return &FabricRoutePressureTracker{
active: map[string]int{},
maxActive: map[string]int{},
}
}
func (t *FabricRoutePressureTracker) Apply(routeSet FabricRouteSet) FabricRouteSet {
if t == nil {
return routeSet
}
active := t.Snapshot()
if len(active) == 0 {
return routeSet
}
apply := func(route FabricRoute) FabricRoute {
if count := active[route.RouteID]; count > 0 {
route.ActiveChannels += count
}
return route
}
routeSet.Primary = apply(routeSet.Primary)
for i := range routeSet.WarmStandby {
routeSet.WarmStandby[i] = apply(routeSet.WarmStandby[i])
}
for i := range routeSet.ColdFallbacks {
routeSet.ColdFallbacks[i] = apply(routeSet.ColdFallbacks[i])
}
return routeSet
}
func (t *FabricRoutePressureTracker) Acquire(routeID string) func() {
routeID = strings.TrimSpace(routeID)
if t == nil || routeID == "" {
return func() {}
}
t.mu.Lock()
if t.active == nil {
t.active = map[string]int{}
}
if t.maxActive == nil {
t.maxActive = map[string]int{}
}
t.active[routeID]++
if t.active[routeID] > t.maxActive[routeID] {
t.maxActive[routeID] = t.active[routeID]
}
t.acquiredTotal++
t.lastAcquiredRoute = routeID
if activeTotal := activeTotalLocked(t.active); activeTotal > t.maxActiveTotal {
t.maxActiveTotal = activeTotal
}
t.mu.Unlock()
var released atomic.Bool
return func() {
if released.Swap(true) {
return
}
t.mu.Lock()
if t.active[routeID] <= 1 {
delete(t.active, routeID)
} else {
t.active[routeID]--
}
t.releasedTotal++
t.lastReleasedRoute = routeID
t.mu.Unlock()
}
}
func (t *FabricRoutePressureTracker) Snapshot() map[string]int {
return t.SnapshotPressure().Active
}
func (t *FabricRoutePressureTracker) SnapshotPressure() FabricRoutePressureSnapshot {
if t == nil {
return FabricRoutePressureSnapshot{}
}
t.mu.Lock()
defer t.mu.Unlock()
active := make(map[string]int, len(t.active))
for routeID, count := range t.active {
active[routeID] = count
}
maxActive := make(map[string]int, len(t.maxActive))
for routeID, count := range t.maxActive {
maxActive[routeID] = count
}
return FabricRoutePressureSnapshot{
Active: active,
MaxActive: maxActive,
ActiveTotal: activeTotalLocked(active),
MaxActiveTotal: t.maxActiveTotal,
AcquiredTotal: t.acquiredTotal,
ReleasedTotal: t.releasedTotal,
LastAcquiredRoute: t.lastAcquiredRoute,
LastReleasedRoute: t.lastReleasedRoute,
}
}
func activeTotalLocked(active map[string]int) int {
total := 0
for _, count := range active {
total += count
}
return total
}
@@ -0,0 +1,44 @@
package mesh
import "testing"
func TestFabricRoutePressureTrackerAppliesAndReleasesActiveChannels(t *testing.T) {
tracker := NewFabricRoutePressureTracker()
releaseA := tracker.Acquire("route-a")
releaseAAgain := tracker.Acquire("route-a")
releaseB := tracker.Acquire("route-b")
routeSet := FabricRouteSet{
TargetKind: FabricChannelTargetNode,
TargetID: "node-b",
Primary: testFabricRoute("route-a", "node-b", 10, 100, 3, true),
WarmStandby: []FabricRoute{
testFabricRoute("route-b", "node-b", 10, 100, 0, true),
},
}
withPressure := tracker.Apply(routeSet)
if withPressure.Primary.ActiveChannels != 5 {
t.Fatalf("primary active = %d, want 5", withPressure.Primary.ActiveChannels)
}
if withPressure.WarmStandby[0].ActiveChannels != 1 {
t.Fatalf("standby active = %d, want 1", withPressure.WarmStandby[0].ActiveChannels)
}
releaseA()
releaseA()
releaseAAgain()
releaseB()
snapshot := tracker.SnapshotPressure()
if len(snapshot.Active) != 0 || snapshot.ActiveTotal != 0 {
t.Fatalf("snapshot after release = %+v, want inactive", snapshot)
}
if snapshot.AcquiredTotal != 3 || snapshot.ReleasedTotal != 3 {
t.Fatalf("snapshot totals = %+v, want acquired/released 3", snapshot)
}
if snapshot.MaxActive["route-a"] != 2 || snapshot.MaxActive["route-b"] != 1 || snapshot.MaxActiveTotal != 3 {
t.Fatalf("snapshot max = %+v", snapshot)
}
if snapshot.LastAcquiredRoute != "route-b" || snapshot.LastReleasedRoute != "route-b" {
t.Fatalf("snapshot last routes = %+v", snapshot)
}
}
@@ -12,8 +12,9 @@ import (
func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
var opened int
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
if entry.Event == "fabric_session_websocket_opened" {
opened++
@@ -83,8 +84,9 @@ func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
var opened int
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
if entry.Event == "fabric_session_websocket_opened" {
opened++
@@ -131,8 +133,9 @@ func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
func TestFabricSessionPeerManagerReopensClosedPump(t *testing.T) {
var opened int
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
if entry.Event == "fabric_session_websocket_opened" {
opened++
@@ -40,73 +40,22 @@ type FabricTransportTarget struct {
ErrorBuffer int
}
func FabricTransportForTarget(target FabricTransportTarget, websocket *WebSocketFabricTransport, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
func FabricTransportForTarget(target FabricTransportTarget, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
transportLabel := strings.ToLower(strings.TrimSpace(target.Transport))
endpoint := strings.TrimSpace(target.Endpoint)
if strings.HasPrefix(strings.ToLower(endpoint), "quic://") {
transportLabel = "quic"
if transportLabel == "" {
transportLabel = "quic"
}
target.Endpoint = strings.TrimPrefix(endpoint, "quic://")
}
switch transportLabel {
case "quic", "direct_quic", "udp_quic", "quic_udp":
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
if quicTransport == nil {
quicTransport = NewQUICFabricTransport(nil)
}
return quicTransport, target, nil
case "", "websocket", "ws", "wss", "direct_http", "direct_https", "direct_tcp_tls":
if websocket == nil {
websocket = NewWebSocketFabricTransport(nil)
}
return websocket, target, nil
default:
return nil, target, fmt.Errorf("unsupported fabric transport %q", target.Transport)
return nil, target, fmt.Errorf("unsupported fabric transport %q: quic is required", target.Transport)
}
}
type WebSocketFabricTransport struct {
Manager *FabricSessionPeerManager
}
func NewWebSocketFabricTransport(manager *FabricSessionPeerManager) *WebSocketFabricTransport {
if manager == nil {
manager = NewFabricSessionPeerManager()
}
return &WebSocketFabricTransport{Manager: manager}
}
func (t *WebSocketFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
manager := t.Manager
if manager == nil {
manager = NewFabricSessionPeerManager()
t.Manager = manager
}
return manager.Get(ctx, FabricSessionPeerTarget{
PeerID: target.PeerID,
BaseURL: target.Endpoint,
Options: FabricSessionDialOptions{
Token: target.Token,
Header: target.Header,
Timeout: target.Timeout,
MaxPayload: target.MaxPayload,
},
Pump: FabricSessionPumpOptions{
OutboundBuffer: target.OutboundBuffer,
InboundBuffer: target.InboundBuffer,
ErrorBuffer: target.ErrorBuffer,
},
})
}
func (t *WebSocketFabricTransport) Close() error {
if t == nil || t.Manager == nil {
return nil
}
return t.Manager.Close()
}
func (t *WebSocketFabricTransport) Snapshot() FabricSessionPeerManagerSnapshot {
if t == nil || t.Manager == nil {
return FabricSessionPeerManagerSnapshot{SchemaVersion: "rap.fabric_session_peer_manager.v1"}
}
return t.Manager.Snapshot()
}
@@ -1,117 +1,27 @@
package mesh
import (
"context"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
func TestWebSocketFabricTransportConnectsAndReusesSession(t *testing.T) {
var opened int
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
if entry.Event == "fabric_session_websocket_opened" {
opened++
}
},
}.Handler())
defer server.Close()
transport := NewWebSocketFabricTransport(nil)
defer transport.Close()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
target := FabricTransportTarget{
PeerID: "node-a",
Endpoint: server.URL,
Token: "rap_fsn_transport",
Timeout: time.Second,
OutboundBuffer: 4,
InboundBuffer: 4,
ErrorBuffer: 4,
}
first, err := transport.Connect(ctx, target)
if err != nil {
t.Fatalf("first connect: %v", err)
}
second, err := transport.Connect(ctx, target)
if err != nil {
t.Fatalf("second connect: %v", err)
}
if first != second {
t.Fatal("transport did not reuse session")
}
if opened != 1 {
t.Fatalf("opened = %d, want 1", opened)
}
if err := first.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, Sequence: 1, Payload: []byte("transport")}); err != nil {
t.Fatalf("send ping: %v", err)
}
select {
case frame := <-first.Frames():
if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "transport" {
t.Fatalf("frame = %+v", frame)
func TestFabricTransportRejectsWebSocketTransport(t *testing.T) {
for _, target := range []FabricTransportTarget{
{Transport: "wss", Endpoint: "wss://node-a.example/fabric/session"},
{Transport: "relay", Endpoint: "quic://node-r.example:19443"},
{Transport: "outbound_reverse", Endpoint: "quic://node-b.example:19443"},
} {
_, _, err := FabricTransportForTarget(target, nil)
if err == nil || !strings.Contains(err.Error(), "quic is required") {
t.Fatalf("target = %+v err = %v, want quic-only rejection", target, err)
}
case err := <-first.Errors():
t.Fatalf("session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
}
func TestWebSocketFabricTransportReopensClosedSession(t *testing.T) {
var opened int
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
if entry.Event == "fabric_session_websocket_opened" {
opened++
}
},
}.Handler())
defer server.Close()
transport := NewWebSocketFabricTransport(nil)
defer transport.Close()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
target := FabricTransportTarget{
PeerID: "node-a",
Endpoint: server.URL,
Token: "rap_fsn_transport_reopen",
Timeout: time.Second,
}
first, err := transport.Connect(ctx, target)
if err != nil {
t.Fatalf("first connect: %v", err)
}
if err := first.Close(); err != nil {
t.Fatalf("close first session: %v", err)
}
second, err := transport.Connect(ctx, target)
if err != nil {
t.Fatalf("second connect: %v", err)
}
if first == second {
t.Fatal("transport reused closed session")
}
if opened != 2 {
t.Fatalf("opened = %d, want 2", opened)
}
}
func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
Endpoint: "quic://127.0.0.1:4433",
}, nil, nil)
}, nil)
if err != nil {
t.Fatalf("select transport: %v", err)
}
@@ -123,15 +33,12 @@ func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
}
}
func TestFabricTransportForTargetSelectsWebSocketByDefault(t *testing.T) {
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
func TestFabricTransportForTargetRejectsNonQUICByDefault(t *testing.T) {
_, target, err := FabricTransportForTarget(FabricTransportTarget{
Endpoint: "https://node.example",
}, nil, nil)
if err != nil {
t.Fatalf("select transport: %v", err)
}
if _, ok := transport.(*WebSocketFabricTransport); !ok {
t.Fatalf("transport = %T, want websocket", transport)
}, nil)
if err == nil {
t.Fatal("non-QUIC target unexpectedly selected a transport")
}
if target.Endpoint != "https://node.example" {
t.Fatalf("endpoint = %q", target.Endpoint)
@@ -1,42 +0,0 @@
package mesh
import (
"context"
"net/http"
"strings"
)
// HTTPPeerTransport sends synthetic mesh envelopes to explicitly configured
// peer endpoints. It is intentionally narrow: production forwarding remains
// disabled and only SyntheticRuntime messages use this transport.
type HTTPPeerTransport struct {
PeerURLs map[string]string
HTTPClient *http.Client
}
func NewHTTPPeerTransport(peerURLs map[string]string) *HTTPPeerTransport {
normalized := make(map[string]string, len(peerURLs))
for nodeID, baseURL := range peerURLs {
nodeID = strings.TrimSpace(nodeID)
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
if nodeID != "" && baseURL != "" {
normalized[nodeID] = baseURL
}
}
return &HTTPPeerTransport{PeerURLs: normalized}
}
func (t *HTTPPeerTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
if t == nil {
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
}
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
if baseURL == "" {
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
}
client := NewClient(baseURL)
if t.HTTPClient != nil {
client.HTTPClient = t.HTTPClient
}
return client.SendSynthetic(ctx, envelope)
}
@@ -1,130 +0,0 @@
package mesh
import (
"context"
"errors"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestHTTPPeerTransportDirectSyntheticProbe(t *testing.T) {
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
defer nodeA.Close()
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
defer nodeB.Close()
route := liveSyntheticRoute("route-direct", []string{"node-a", "node-b"})
routes := []SyntheticRoute{route}
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-b": nodeB.URL})
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-direct")
if err != nil {
t.Fatalf("send live direct probe: %v", err)
}
if ack.MessageType != SyntheticMessageProbeAck {
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
}
payload := decodeAckPayload(t, ack)
if got, want := payload.Path, []string{"node-a", "node-b"}; !sameStrings(got, want) {
t.Fatalf("path = %v, want %v", got, want)
}
}
func TestHTTPPeerTransportSingleRelaySyntheticProbe(t *testing.T) {
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
defer nodeA.Close()
nodeR := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"})
defer nodeR.Close()
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
defer nodeB.Close()
route := liveSyntheticRoute("route-relay", []string{"node-a", "node-r", "node-b"})
routes := []SyntheticRoute{route}
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-r": nodeR.URL})
nodeR.Runtime = newLiveRuntime(nodeR.Local, routes, map[string]string{"node-b": nodeB.URL})
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-relay")
if err != nil {
t.Fatalf("send live relay probe: %v", err)
}
if ack.MessageType != SyntheticMessageProbeAck {
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
}
payload := decodeAckPayload(t, ack)
if got, want := payload.Path, []string{"node-a", "node-r", "node-b"}; !sameStrings(got, want) {
t.Fatalf("path = %v, want %v", got, want)
}
}
func TestHTTPPeerTransportMissingPeer(t *testing.T) {
transport := NewHTTPPeerTransport(map[string]string{})
_, err := transport.SendSynthetic(context.Background(), "node-missing", SyntheticEnvelope{})
if !errors.Is(err, ErrSyntheticPeerUnavailable) {
t.Fatalf("err = %v, want ErrSyntheticPeerUnavailable", err)
}
}
type liveSyntheticNode struct {
Local PeerIdentity
Runtime *SyntheticRuntime
URL string
server *httptest.Server
}
func newLiveSyntheticNode(t *testing.T, local PeerIdentity) *liveSyntheticNode {
t.Helper()
node := &liveSyntheticNode{Local: local}
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Server{Local: node.Local, SyntheticRuntime: node.Runtime}.Handler().ServeHTTP(w, r)
}))
node.URL = node.server.URL
return node
}
func (n *liveSyntheticNode) Close() {
if n.server != nil {
n.server.Close()
}
}
func newLiveRuntime(local PeerIdentity, routes []SyntheticRoute, peers map[string]string) *SyntheticRuntime {
return NewSyntheticRuntime(SyntheticRuntimeConfig{
Enabled: true,
Local: local,
Routes: routes,
Transport: NewHTTPPeerTransport(peers),
})
}
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
return SyntheticRoute{
RouteID: routeID,
ClusterID: "cluster-1",
SourceNodeID: hops[0],
DestinationNodeID: hops[len(hops)-1],
Hops: hops,
AllowedChannels: []string{SyntheticChannelFabricControl},
MaxTTL: 8,
MaxHops: 8,
ExpiresAt: time.Now().UTC().Add(time.Hour),
RouteVersion: "route-v1",
PolicyVersion: "policy-v1",
PeerDirectoryVersion: "peers-v1",
}
}
func sameStrings(left, right []string) bool {
if len(left) != len(right) {
return false
}
for i := range left {
if left[i] != right[i] {
return false
}
}
return true
}
@@ -1,6 +1,7 @@
package mesh
import (
"encoding/json"
"sort"
"strings"
"time"
@@ -53,9 +54,11 @@ type PeerCacheEntry struct {
BestReachability string `json:"best_reachability,omitempty"`
BestConnectivity string `json:"best_connectivity,omitempty"`
BestNATType string `json:"best_nat_type,omitempty"`
BestRegion string `json:"best_region,omitempty"`
BestPolicyTags []string `json:"best_policy_tags,omitempty"`
BestCandidateScore int `json:"best_candidate_score,omitempty"`
BestScoreReasons []string `json:"best_score_reasons,omitempty"`
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
RelayNodeID string `json:"relay_node_id,omitempty"`
@@ -132,9 +135,11 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
entry.BestReachability = scored[0].Candidate.Reachability
entry.BestConnectivity = scored[0].Candidate.ConnectivityMode
entry.BestNATType = scored[0].Candidate.NATType
entry.BestRegion = scored[0].Candidate.Region
entry.BestPolicyTags = append([]string{}, scored[0].Candidate.PolicyTags...)
entry.BestCandidateScore = scored[0].Score
entry.BestScoreReasons = append([]string{}, scored[0].Reasons...)
entry.BestPeerCertSHA256 = candidatePeerCertSHA256(scored[0].Candidate)
entry.bestScore = scored[0].Score
if strings.TrimSpace(scored[0].Candidate.Address) != "" {
entry.Endpoint = strings.TrimSpace(scored[0].Candidate.Address)
@@ -188,6 +193,7 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
if lease.PeerNodeID != cfg.Local.NodeID {
entry := peerCacheEntry(entries, lease.PeerNodeID)
useLeaseEndpoint := shouldUseRendezvousEndpoint(*entry)
localRelay := lease.RelayNodeID == cfg.Local.NodeID
entry.RendezvousLeaseID = lease.LeaseID
entry.RelayNodeID = lease.RelayNodeID
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
@@ -195,12 +201,21 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
if useLeaseEndpoint {
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_control")
if localRelay {
entry.BestTransport = "reverse_quic"
} else {
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_quic")
}
entry.BestReachability = "relay"
entry.BestConnectivity = firstNonEmpty(lease.ConnectivityMode, "relay_required")
entry.Endpoint = entry.RelayEndpoint
entry.BestCandidateID = lease.LeaseID
entry.BestCandidateAddr = entry.RelayEndpoint
if !localRelay {
entry.Endpoint = entry.RelayEndpoint
entry.BestCandidateID = lease.LeaseID
entry.BestCandidateAddr = entry.RelayEndpoint
entry.BestPeerCertSHA256 = rendezvousLeasePeerCertSHA256(lease)
} else if strings.TrimSpace(entry.Endpoint) == "" {
entry.Endpoint = firstNonEmpty(entry.BestCandidateAddr, entry.RelayEndpoint)
}
entry.bestScore = maxInt(entry.bestScore, 500)
}
}
@@ -262,6 +277,20 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
}}
}
func rendezvousLeasePeerCertSHA256(lease PeerRendezvousLease) string {
var metadata struct {
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
}
if len(lease.Metadata) == 0 {
return ""
}
if err := json.Unmarshal(lease.Metadata, &metadata); err != nil {
return ""
}
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
}
func (c *PeerCache) Snapshot() PeerCacheSnapshot {
if c == nil {
return PeerCacheSnapshot{}
@@ -10,15 +10,15 @@ func TestPeerCacheSelectsAdjacentWarmPeersWithinLimit(t *testing.T) {
cache := NewPeerCache(PeerCacheConfig{
Local: local,
PeerEndpoints: map[string]string{
"node-a": "http://node-a:19000",
"node-r": "http://node-r:19000",
"node-c": "http://node-c:19000",
"node-a": "quic://node-a:19443",
"node-r": "quic://node-r:19443",
"node-c": "quic://node-c:19443",
},
Routes: []SyntheticRoute{
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"}),
},
RecoverySeeds: []PeerRecoverySeed{
{NodeID: "node-seed", Endpoint: "https://seed.example.test", Transport: "direct_tcp_tls", Priority: 10},
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", Priority: 10},
},
WarmPeerLimit: 2,
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
@@ -42,7 +42,7 @@ func TestPeerCachePromotesRecoverySeedAfterRoutePeers(t *testing.T) {
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r"}),
},
RecoverySeeds: []PeerRecoverySeed{
{NodeID: "node-seed", Endpoint: "wss://seed.example.test/mesh", Transport: "wss", ConnectivityMode: "direct", Priority: 1},
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", ConnectivityMode: "direct", Priority: 1},
},
WarmPeerLimit: 3,
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
@@ -68,7 +68,7 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay",
Transport: "relay_quic",
Address: "relay.example.test",
Reachability: "relay",
ConnectivityMode: "relay_required",
@@ -77,8 +77,8 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -119,10 +119,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
LastVerifiedAt: &now,
},
{
EndpointID: "node-b-wss",
EndpointID: "node-b-ice",
NodeID: "node-b",
Transport: "wss",
Address: "https://node-b.example.test:443",
Transport: "ice_quic",
Address: "quic://node-b.example.test:19444",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -148,10 +148,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
if !ok {
t.Fatal("node-b missing from cache")
}
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
if entry.BestCandidateID != "node-b-ice" || entry.Endpoint != "quic://node-b.example.test:19444" {
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
}
if !containsString(entry.BestScoreReasons, "transport:wss") {
if !containsString(entry.BestScoreReasons, "transport:ice_quic") {
t.Fatalf("peer cache did not expose score reasons: %+v", entry.BestScoreReasons)
}
}
@@ -161,15 +161,15 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
cache := NewPeerCache(PeerCacheConfig{
Local: local,
PeerEndpoints: map[string]string{
"node-b": "https://node-b.public.example.test:443",
"node-b": "quic://node-b.public.example.test:19443",
},
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "https://node-b.public.example.test:443",
Transport: "direct_quic",
Address: "quic://node-b.public.example.test:19443",
Reachability: "public",
NATType: "none",
ConnectivityMode: "direct",
@@ -179,8 +179,8 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
{
EndpointID: "node-b-corp-lan",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "http://10.24.10.20:19001",
Transport: "lan_quic",
Address: "quic://10.24.10.20:19443",
Reachability: "private",
NATType: "none",
ConnectivityMode: "direct",
@@ -199,7 +199,7 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
if !ok {
t.Fatal("node-b missing from peer cache")
}
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "http://10.24.10.20:19001" {
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "quic://10.24.10.20:19443" {
t.Fatalf("peer cache did not choose corp LAN endpoint: %+v", entry)
}
}
@@ -29,6 +29,7 @@ type PeerConnectionIntentPlanConfig struct {
PeerCache PeerCacheSnapshot
RecoveryPlan PeerRecoveryPlan
RendezvousLeases []PeerRendezvousLease
PreferredRegion string
Now time.Time
}
@@ -62,12 +63,14 @@ type PeerConnectionIntent struct {
Reachability string `json:"reachability,omitempty"`
ConnectivityMode string `json:"connectivity_mode,omitempty"`
NATType string `json:"nat_type,omitempty"`
Region string `json:"region,omitempty"`
PolicyTags []string `json:"policy_tags,omitempty"`
RequiresRendezvous bool `json:"requires_rendezvous"`
RendezvousResolved bool `json:"rendezvous_resolved"`
DirectCandidate bool `json:"direct_candidate"`
RelayCandidate bool `json:"relay_candidate"`
BestCandidateID string `json:"best_candidate_id,omitempty"`
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
RelayNodeID string `json:"relay_node_id,omitempty"`
RelayEndpoint string `json:"relay_endpoint,omitempty"`
@@ -94,33 +97,35 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
}
entry := entryByNode[candidate.NodeID]
intent := PeerConnectionIntent{
NodeID: candidate.NodeID,
Action: connectionIntentAction(candidate),
Reason: candidate.Reason,
Endpoint: candidate.Endpoint,
ConnectionState: candidate.ConnectionState,
Transport: firstNonEmpty(candidate.BestTransport, entry.BestTransport),
Reachability: entry.BestReachability,
ConnectivityMode: entry.BestConnectivity,
NATType: entry.BestNATType,
PolicyTags: append([]string{}, entry.BestPolicyTags...),
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
RendezvousLeaseID: entry.RendezvousLeaseID,
RelayNodeID: entry.RelayNodeID,
RelayEndpoint: entry.RelayEndpoint,
RelayCandidate: entry.RelayControl,
ControlPlaneOnly: entry.RelayControl,
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
Priority: candidate.Priority,
GeneratedAt: now,
NodeID: candidate.NodeID,
Action: connectionIntentAction(candidate),
Reason: candidate.Reason,
Endpoint: candidate.Endpoint,
ConnectionState: candidate.ConnectionState,
Transport: firstNonEmpty(candidate.BestTransport, entry.BestTransport),
Reachability: entry.BestReachability,
ConnectivityMode: entry.BestConnectivity,
NATType: entry.BestNATType,
Region: entry.BestRegion,
PolicyTags: append([]string{}, entry.BestPolicyTags...),
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
BestPeerCertSHA256: entry.BestPeerCertSHA256,
RendezvousLeaseID: entry.RendezvousLeaseID,
RelayNodeID: entry.RelayNodeID,
RelayEndpoint: entry.RelayEndpoint,
RelayCandidate: entry.RelayControl,
ControlPlaneOnly: entry.RelayControl,
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
Priority: candidate.Priority,
GeneratedAt: now,
}
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent)
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent, cfg.PreferredRegion)
intent.TransportMode = mode
intent.RequiresRendezvous = requiresRendezvous
intent.DirectCandidate = directCandidate
if intent.RequiresRendezvous {
if lease, ok := rendezvousLeaseForPeer(cfg.RendezvousLeases, intent.NodeID, now); ok {
applyRendezvousLease(&intent, lease)
applyRendezvousLease(&intent, lease, cfg.PeerCache.LocalNodeID)
}
}
intents = append(intents, intent)
@@ -185,10 +190,12 @@ func connectionIntentAction(candidate PeerRecoveryCandidate) string {
}
}
func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
func classifyPeerTransport(intent PeerConnectionIntent, preferredRegion string) (string, bool, bool) {
transport := strings.ToLower(strings.TrimSpace(intent.Transport))
connectivity := strings.ToLower(strings.TrimSpace(intent.ConnectivityMode))
reachability := strings.ToLower(strings.TrimSpace(intent.Reachability))
region := strings.TrimSpace(intent.Region)
preferredRegion = strings.TrimSpace(preferredRegion)
tags := lowerStringSet(intent.PolicyTags)
if strings.Contains(transport, "relay") || connectivity == "relay_required" || reachability == "relay" {
@@ -201,6 +208,9 @@ func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
return PeerTransportModeCorporateLAN, false, true
}
if tags["private-lan"] || reachability == "private" || endpointHasPrivateHost(intent.Endpoint) {
if preferredRegion != "" && region != "" && !strings.EqualFold(region, preferredRegion) {
return PeerTransportModeRelayRequired, true, false
}
return PeerTransportModePrivateLAN, false, true
}
if strings.Contains(transport, "direct") || reachability == "public" || connectivity == "direct" {
@@ -246,9 +256,16 @@ func rendezvousLeaseForPeer(leases []PeerRendezvousLease, peerNodeID string, now
return candidates[0], true
}
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease) {
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
intent.Transport = firstNonEmpty(lease.Transport, "relay_control")
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease, localNodeID string) {
localRelay := strings.TrimSpace(lease.RelayNodeID) == strings.TrimSpace(localNodeID)
if !localRelay {
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
}
if localRelay {
intent.Transport = "reverse_quic"
} else {
intent.Transport = firstNonEmpty(lease.Transport, "relay_quic")
}
intent.TransportMode = PeerTransportModeRelayControl
intent.RequiresRendezvous = false
intent.RendezvousResolved = true
@@ -256,17 +273,33 @@ func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLeas
intent.RelayCandidate = true
intent.RendezvousLeaseID = lease.LeaseID
intent.RelayNodeID = lease.RelayNodeID
intent.RelayEndpoint = intent.Endpoint
intent.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
intent.ControlPlaneOnly = true
if certSHA256 := rendezvousLeasePeerCertSHA256(lease); certSHA256 != "" && !localRelay {
intent.BestPeerCertSHA256 = certSHA256
}
if lease.ConnectivityMode != "" {
intent.ConnectivityMode = lease.ConnectivityMode
}
}
func endpointHasPrivateHost(rawEndpoint string) bool {
addr, ok := endpointHostAddr(rawEndpoint)
if !ok {
return false
}
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
}
func endpointHasUnspecifiedHost(rawEndpoint string) bool {
addr, ok := endpointHostAddr(rawEndpoint)
return ok && addr.IsUnspecified()
}
func endpointHostAddr(rawEndpoint string) (netip.Addr, bool) {
rawEndpoint = strings.TrimSpace(rawEndpoint)
if rawEndpoint == "" {
return false
return netip.Addr{}, false
}
host := rawEndpoint
if parsed, err := url.Parse(rawEndpoint); err == nil && parsed.Host != "" {
@@ -277,9 +310,9 @@ func endpointHasPrivateHost(rawEndpoint string) bool {
}
addr, err := netip.ParseAddr(strings.Trim(host, "[]"))
if err != nil {
return false
return netip.Addr{}, false
}
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
return addr, true
}
func lowerStringSet(values []string) map[string]bool {
@@ -1,6 +1,7 @@
package mesh
import (
"encoding/json"
"testing"
"time"
)
@@ -11,8 +12,8 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
{
NodeID: "node-b",
Endpoint: "http://10.24.10.20:19001",
BestTransport: "direct_tcp_tls",
Endpoint: "quic://10.24.10.20:19443",
BestTransport: "lan_quic",
BestReachability: "private",
BestConnectivity: "direct",
BestPolicyTags: []string{"corp-lan", "same-site"},
@@ -23,7 +24,7 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
Candidates: []PeerRecoveryCandidate{
{
NodeID: "node-b",
Endpoint: "http://10.24.10.20:19001",
Endpoint: "quic://10.24.10.20:19443",
ConnectionState: PeerConnectionReady,
Reason: "maintain_ready",
Priority: 100,
@@ -48,15 +49,15 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
{
NodeID: "node-b",
Endpoint: "https://node-b.example.test:443",
BestTransport: "direct_tcp_tls",
Endpoint: "quic://node-b.example.test:19443",
BestTransport: "reverse_quic",
BestReachability: "outbound_only",
BestConnectivity: "outbound_only",
},
{
NodeID: "node-c",
Endpoint: "relay://fabric-relay/node-c",
BestTransport: "relay",
BestTransport: "relay_quic",
BestReachability: "relay",
BestConnectivity: "relay_required",
},
@@ -66,7 +67,7 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
Candidates: []PeerRecoveryCandidate{
{
NodeID: "node-b",
Endpoint: "https://node-b.example.test:443",
Endpoint: "quic://node-b.example.test:19443",
ConnectionState: PeerConnectionDisconnected,
Reason: "recover_warm",
Priority: 90,
@@ -91,6 +92,42 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
}
}
func TestPeerConnectionIntentsRequireRendezvousForRemotePrivateRegion(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
PreferredRegion: "ifcm",
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
{
NodeID: "node-b",
Endpoint: "quic://192.168.200.61:19132",
BestTransport: "direct_quic",
BestReachability: "private",
BestConnectivity: "private_lan",
BestRegion: "docker-test",
},
}},
RecoveryPlan: PeerRecoveryPlan{
Mode: PeerRecoveryModeRecovery,
Candidates: []PeerRecoveryCandidate{{
NodeID: "node-b",
Endpoint: "quic://192.168.200.61:19132",
ConnectionState: PeerConnectionDisconnected,
Reason: "recover_warm",
Priority: 100,
}},
},
Now: now,
})
if plan.IntentCount != 1 || plan.RelayRequiredCount != 1 || plan.RendezvousRequiredCount != 1 {
t.Fatalf("unexpected remote private plan counts: %+v", plan)
}
intent := plan.Intents[0]
if intent.DirectCandidate || !intent.RequiresRendezvous || intent.TransportMode != PeerTransportModeRelayRequired {
t.Fatalf("unexpected remote private intent: %+v", intent)
}
}
func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
@@ -120,13 +157,14 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "http://node-r:19000",
Transport: "relay_control",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Minute),
Metadata: peerConnectionIntentLeaseMetadata(t, "abc123"),
},
},
Now: now,
@@ -137,9 +175,10 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
}
intent := plan.Intents[0]
if intent.TransportMode != PeerTransportModeRelayControl ||
intent.Endpoint != "http://node-r:19000" ||
intent.Endpoint != "quic://node-r:19443" ||
intent.RelayNodeID != "node-r" ||
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
intent.BestPeerCertSHA256 != "abc123" ||
!intent.RelayCandidate ||
!intent.RendezvousResolved ||
intent.RequiresRendezvous {
@@ -176,8 +215,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
LeaseID: "lease-expired-preferred",
PeerNodeID: "node-b",
RelayNodeID: "node-r-old",
RelayEndpoint: "http://node-r-old:19000",
Transport: "relay_control",
RelayEndpoint: "quic://node-r-old:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
Priority: 1,
ControlPlaneOnly: true,
@@ -188,8 +227,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
LeaseID: "lease-active-reselected",
PeerNodeID: "node-b",
RelayNodeID: "node-r-new",
RelayEndpoint: "http://node-r-new:19000",
Transport: "relay_control",
RelayEndpoint: "quic://node-r-new:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
Priority: 20,
ControlPlaneOnly: true,
@@ -206,20 +245,29 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
intent := plan.Intents[0]
if intent.RendezvousLeaseID != "lease-active-reselected" ||
intent.RelayNodeID != "node-r-new" ||
intent.Endpoint != "http://node-r-new:19000" {
intent.Endpoint != "quic://node-r-new:19443" {
t.Fatalf("expired lease was not skipped: %+v", intent)
}
}
func peerConnectionIntentLeaseMetadata(t *testing.T, certSHA256 string) json.RawMessage {
t.Helper()
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
if err != nil {
t.Fatalf("marshal metadata: %v", err)
}
return payload
}
func TestPeerConnectionIntentsClassifyPrivateEndpointWithoutCandidateHints(t *testing.T) {
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
{NodeID: "node-b", Endpoint: "http://192.168.10.20:19001"},
{NodeID: "node-b", Endpoint: "quic://192.168.10.20:19443"},
}},
RecoveryPlan: PeerRecoveryPlan{Candidates: []PeerRecoveryCandidate{
{
NodeID: "node-b",
Endpoint: "http://192.168.10.20:19001",
Endpoint: "quic://192.168.10.20:19443",
ConnectionState: PeerConnectionDisconnected,
Reason: "recover_peer",
Priority: 10,
@@ -2,6 +2,7 @@ package mesh
import (
"context"
"fmt"
"net/http"
"strings"
"sync"
@@ -25,6 +26,8 @@ type PeerConnectionManagerConfig struct {
Tracker *PeerConnectionTracker
RendezvousLeases []PeerRendezvousLease
HTTPClient *http.Client
QUICTransport *QUICFabricTransport
PreferredRegion string
ProbeTimeout time.Duration
Now func() time.Time
}
@@ -35,6 +38,8 @@ type PeerConnectionManager struct {
tracker *PeerConnectionTracker
rendezvousLeases []PeerRendezvousLease
httpClient *http.Client
quicTransport *QUICFabricTransport
preferredRegion string
probeTimeout time.Duration
now func() time.Time
@@ -101,9 +106,10 @@ type PeerConnectionCandidateProbeResult struct {
}
type peerConnectionProbeTarget struct {
CandidateID string
Endpoint string
Transport string
CandidateID string
Endpoint string
Transport string
PeerCertSHA256 string
}
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
@@ -132,6 +138,8 @@ func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionMa
tracker: cfg.Tracker,
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
httpClient: httpClient,
quicTransport: cfg.QUICTransport,
preferredRegion: strings.TrimSpace(cfg.PreferredRegion),
probeTimeout: probeTimeout,
now: now,
}
@@ -155,6 +163,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
PeerCache: peerSnapshot,
RecoveryPlan: recoveryPlan,
RendezvousLeases: rendezvousLeases,
PreferredRegion: m.preferredRegion,
Now: startedAt,
})
entriesByNode := map[string]PeerCacheEntry{}
@@ -215,6 +224,15 @@ func (m *PeerConnectionManager) UpdatePeerConfig(peerCache *PeerCache, rendezvou
m.rendezvousLeases = append([]PeerRendezvousLease{}, rendezvousLeases...)
}
func (m *PeerConnectionManager) UpdateQUICTransport(transport *QUICFabricTransport) {
if m == nil {
return
}
m.mu.Lock()
defer m.mu.Unlock()
m.quicTransport = transport
}
func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvousLease) {
if m == nil {
return nil, nil
@@ -242,17 +260,18 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
StartedAt: startedAt,
}
peer := PeerCacheEntry{
NodeID: intent.NodeID,
Endpoint: intent.Endpoint,
Warm: true,
WarmReason: intent.Reason,
RecoverySeed: intent.RecoverySeed,
BestCandidateID: intent.BestCandidateID,
BestTransport: intent.Transport,
RendezvousLeaseID: intent.RendezvousLeaseID,
RelayNodeID: intent.RelayNodeID,
RelayEndpoint: intent.RelayEndpoint,
RelayControl: intent.RelayCandidate,
NodeID: intent.NodeID,
Endpoint: intent.Endpoint,
Warm: true,
WarmReason: intent.Reason,
RecoverySeed: intent.RecoverySeed,
BestCandidateID: intent.BestCandidateID,
BestTransport: intent.Transport,
RendezvousLeaseID: intent.RendezvousLeaseID,
RelayNodeID: intent.RelayNodeID,
RelayEndpoint: intent.RelayEndpoint,
RelayControl: intent.RelayCandidate,
BestPeerCertSHA256: firstNonEmpty(intent.BestPeerCertSHA256, cacheEntry.BestPeerCertSHA256),
}
if intent.RequiresRendezvous {
result.LinkStatus = PeerConnectionProbeDeferred
@@ -282,13 +301,12 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
ClusterID: m.local.ClusterID,
NodeID: intent.NodeID,
}
if intent.RelayCandidate && intent.RelayNodeID != "" {
target.NodeID = intent.RelayNodeID
}
target.NodeID = peerConnectionProbeTargetNodeID(intent, m.local.NodeID)
targets := []peerConnectionProbeTarget{{
CandidateID: intent.BestCandidateID,
Endpoint: intent.Endpoint,
Transport: intent.Transport,
CandidateID: intent.BestCandidateID,
Endpoint: intent.Endpoint,
Transport: intent.Transport,
PeerCertSHA256: intent.BestPeerCertSHA256,
}}
if intent.DirectCandidate {
targets = peerConnectionProbeTargets(intent, cacheEntry)
@@ -300,13 +318,14 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
probePeer.BestCandidateID = strings.TrimSpace(probeTarget.CandidateID)
probePeer.BestCandidateAddr = probePeer.Endpoint
probePeer.BestTransport = strings.TrimSpace(probeTarget.Transport)
probePeer.BestPeerCertSHA256 = firstNonEmpty(probeTarget.PeerCertSHA256, probePeer.BestPeerCertSHA256)
if probePeer.Endpoint == "" {
continue
}
candidateStartedAt := normalizedNow(m.now())
m.tracker.BeginProbe(probePeer, candidateStartedAt)
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
_, err := NewClient(probePeer.Endpoint).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
err := m.probePeerTarget(probeCtx, probePeer, target)
cancel()
completedAt := normalizedNow(m.now())
candidateResult := PeerConnectionCandidateProbeResult{
@@ -354,47 +373,97 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
return result
}
func peerConnectionProbeTargetNodeID(intent PeerConnectionIntent, localNodeID string) string {
if intent.RelayCandidate && strings.TrimSpace(intent.RelayNodeID) != "" && strings.TrimSpace(intent.RelayNodeID) != strings.TrimSpace(localNodeID) {
return intent.RelayNodeID
}
return intent.NodeID
}
func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer PeerCacheEntry, target PeerIdentity) error {
endpoint := strings.TrimRight(strings.TrimSpace(probePeer.Endpoint), "/")
transport := strings.TrimSpace(probePeer.BestTransport)
if hasLegacyEndpointScheme(endpoint) {
return fmt.Errorf("non_quic_probe_rejected")
}
if peerConnectionTargetIsQUIC(transport, endpoint) {
carrier, selectedTarget, err := FabricTransportForTarget(FabricTransportTarget{
EndpointID: probePeer.BestCandidateID,
PeerID: target.NodeID,
Endpoint: endpoint,
Transport: transport,
Timeout: m.probeTimeout,
PeerCertSHA256: strings.TrimSpace(probePeer.BestPeerCertSHA256),
}, m.quicTransport)
if err != nil {
return err
}
session, err := carrier.Connect(ctx, selectedTarget)
if err != nil {
return err
}
return session.Close()
}
return fmt.Errorf("non_quic_probe_rejected")
}
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
seen := map[string]struct{}{}
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
add := func(candidateID, endpoint, transport string) {
add := func(candidateID, endpoint, transport, peerCertSHA256 string) {
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
if endpoint == "" {
return
}
if endpointHasUnspecifiedHost(endpoint) {
return
}
key := candidateID + "|" + endpoint
if _, ok := seen[key]; ok {
return
}
seen[key] = struct{}{}
out = append(out, peerConnectionProbeTarget{
CandidateID: strings.TrimSpace(candidateID),
Endpoint: endpoint,
Transport: strings.TrimSpace(transport),
CandidateID: strings.TrimSpace(candidateID),
Endpoint: endpoint,
Transport: strings.TrimSpace(transport),
PeerCertSHA256: strings.TrimSpace(peerCertSHA256),
})
}
for _, candidate := range cacheEntry.EndpointCandidates {
if !candidateUsableForDirectProbe(candidate) {
continue
}
add(candidate.EndpointID, candidate.Address, candidate.Transport)
add(candidate.EndpointID, candidate.Address, candidate.Transport, candidatePeerCertSHA256(candidate))
}
add(intent.BestCandidateID, intent.Endpoint, intent.Transport)
add(intent.BestCandidateID, intent.Endpoint, intent.Transport, cacheEntry.BestPeerCertSHA256)
return out
}
func peerConnectionTargetIsQUIC(transport string, endpoint string) bool {
return isQUICOnlyCandidateTransport(transport) || strings.HasPrefix(strings.ToLower(strings.TrimSpace(endpoint)), "quic://")
}
func candidateUsableForDirectProbe(candidate PeerEndpointCandidate) bool {
endpoint := strings.TrimSpace(candidate.Address)
if endpoint == "" || strings.HasPrefix(endpoint, "relay://") || strings.HasPrefix(endpoint, "outbound://") {
return false
}
if endpointHasUnspecifiedHost(endpoint) {
return false
}
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
if connectivity == "outbound_only" || connectivity == "relay_required" || reachability == "outbound_only" || reachability == "relay" {
return false
}
return transport == "" || strings.Contains(transport, "direct") || transport == "wss" || strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://")
return transport == "" ||
strings.Contains(transport, "direct_quic") ||
transport == "quic" ||
transport == "lan_quic" ||
transport == "ice_quic" ||
strings.HasPrefix(endpoint, "quic://")
}
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
@@ -2,8 +2,8 @@ package mesh
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
)
@@ -11,12 +11,18 @@ import (
func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
current := now
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
}.Handler())
tlsConfig := testQUICTLSConfig(t)
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
certSHA256 := testQUICCertSHA256(t, tlsConfig)
cache := NewPeerCache(PeerCacheConfig{
Local: local,
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
@@ -24,19 +30,20 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
{
EndpointID: "node-b-direct",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: server.URL,
Transport: "direct_quic",
Address: "quic://" + server.Addr().String(),
Reachability: "private",
ConnectivityMode: "direct",
PolicyTags: []string{"corp-lan", "same-site"},
Priority: 1,
Metadata: peerConnectionProbeMetadata(t, certSHA256),
},
},
"node-c": {
{
EndpointID: "node-c-relay",
NodeID: "node-c",
Transport: "relay",
Transport: "relay_quic",
Address: "relay://fabric/node-c",
Reachability: "relay",
ConnectivityMode: "relay_required",
@@ -49,10 +56,11 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
})
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
Local: local,
PeerCache: cache,
Tracker: tracker,
ProbeTimeout: time.Second,
Local: local,
PeerCache: cache,
Tracker: tracker,
QUICTransport: NewQUICFabricTransport(nil),
ProbeTimeout: time.Second,
Now: func() time.Time {
current = current.Add(10 * time.Millisecond)
return current
@@ -116,24 +124,31 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
current := now
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
}.Handler())
tlsConfig := testQUICTLSConfig(t)
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
certSHA256 := testQUICCertSHA256(t, tlsConfig)
leases := []PeerRendezvousLease{
{
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: server.URL,
Transport: "relay_control",
RelayEndpoint: "quic://" + server.Addr().String(),
Transport: "relay_quic",
ConnectivityMode: "relay_required",
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(time.Minute),
Metadata: peerConnectionProbeMetadata(t, certSHA256),
},
}
cache := NewPeerCache(PeerCacheConfig{
@@ -143,7 +158,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
{
EndpointID: "node-b-relay",
NodeID: "node-b",
Transport: "relay",
Transport: "relay_quic",
Address: "relay://fabric/node-b",
Reachability: "relay",
ConnectivityMode: "relay_required",
@@ -161,6 +176,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
PeerCache: cache,
Tracker: tracker,
RendezvousLeases: leases,
QUICTransport: NewQUICFabricTransport(nil),
ProbeTimeout: time.Second,
Now: func() time.Time {
current = current.Add(10 * time.Millisecond)
@@ -189,15 +205,37 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
}
}
func TestPeerConnectionProbeTargetKeepsPeerForLocalRelayReverseQUIC(t *testing.T) {
intent := PeerConnectionIntent{
NodeID: "node-b",
RelayCandidate: true,
RelayNodeID: "node-a",
Transport: "reverse_quic",
}
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-b" {
t.Fatalf("local relay reverse probe target = %q, want peer node-b", got)
}
intent.RelayNodeID = "node-r"
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-r" {
t.Fatalf("remote relay probe target = %q, want relay node-r", got)
}
}
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
current := now
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
}.Handler())
tlsConfig := testQUICTLSConfig(t)
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
certSHA256 := testQUICCertSHA256(t, tlsConfig)
cache := NewPeerCache(PeerCacheConfig{
Local: local,
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
@@ -205,8 +243,8 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
{
EndpointID: "node-b-dead",
NodeID: "node-b",
Transport: "direct_http",
Address: "http://127.0.0.1:1",
Transport: "lan_quic",
Address: "quic://127.0.0.1:1",
Reachability: "private",
ConnectivityMode: "private_lan",
Priority: 1,
@@ -214,11 +252,12 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
{
EndpointID: "node-b-live",
NodeID: "node-b",
Transport: "direct_http",
Address: server.URL,
Transport: "lan_quic",
Address: "quic://" + server.Addr().String(),
Reachability: "private",
ConnectivityMode: "private_lan",
Priority: 2,
Metadata: peerConnectionProbeMetadata(t, certSHA256),
},
},
},
@@ -227,11 +266,11 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
})
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
Local: local,
PeerCache: cache,
Tracker: tracker,
HTTPClient: &http.Client{Timeout: 100 * time.Millisecond},
ProbeTimeout: 100 * time.Millisecond,
Local: local,
PeerCache: cache,
Tracker: tracker,
QUICTransport: NewQUICFabricTransport(nil),
ProbeTimeout: 100 * time.Millisecond,
Now: func() time.Time {
current = current.Add(10 * time.Millisecond)
return current
@@ -243,7 +282,7 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
t.Fatalf("unexpected cycle: %+v", cycle)
}
result := cycle.Results[0]
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != server.URL {
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
t.Fatalf("fallback did not select live candidate: %+v", result)
}
if len(result.CandidateResults) != 2 ||
@@ -252,7 +291,85 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
t.Fatalf("candidate probe trail mismatch: %+v", result.CandidateResults)
}
snapshot := tracker.Snapshot()
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != server.URL {
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != "quic://"+server.Addr().String() {
t.Fatalf("tracker did not retain selected candidate: %+v", snapshot)
}
}
func TestPeerConnectionManagerSkipsUnspecifiedQUICCandidates(t *testing.T) {
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
current := now
tlsConfig := testQUICTLSConfig(t)
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: tlsConfig,
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
certSHA256 := testQUICCertSHA256(t, tlsConfig)
cache := NewPeerCache(PeerCacheConfig{
Local: local,
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-unspecified-v6",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://[::]:19131",
Reachability: "public",
ConnectivityMode: "direct",
Priority: 1,
},
{
EndpointID: "node-b-live",
NodeID: "node-b",
Transport: "direct_quic",
Address: "quic://" + server.Addr().String(),
Reachability: "public",
ConnectivityMode: "direct",
Priority: 2,
Metadata: peerConnectionProbeMetadata(t, certSHA256),
},
},
},
WarmPeerLimit: 1,
Now: now,
})
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
Local: local,
PeerCache: cache,
Tracker: tracker,
QUICTransport: NewQUICFabricTransport(nil),
ProbeTimeout: time.Second,
Now: func() time.Time {
current = current.Add(10 * time.Millisecond)
return current
},
})
cycle := manager.ProbeOnce(context.Background())
if cycle.Attempted != 1 || cycle.Succeeded != 1 || len(cycle.Results) != 1 {
t.Fatalf("unexpected cycle: %+v", cycle)
}
result := cycle.Results[0]
if result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
t.Fatalf("manager did not skip unspecified endpoint: %+v", result)
}
if len(result.CandidateResults) != 1 || result.CandidateResults[0].CandidateID != "node-b-live" {
t.Fatalf("unspecified endpoint should not be probed: %+v", result.CandidateResults)
}
}
func peerConnectionProbeMetadata(t *testing.T, certSHA256 string) json.RawMessage {
t.Helper()
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
if err != nil {
t.Fatalf("marshal probe metadata: %v", err)
}
return payload
}
@@ -9,7 +9,7 @@ func TestPeerConnectionTrackerTransitionsReadyAndDegraded(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
Entries: []PeerCacheEntry{
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "http://node-b:19000"},
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "quic://node-b:19443"},
},
}, now)
@@ -76,12 +76,12 @@ func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
Entries: []PeerCacheEntry{
{
NodeID: "node-c",
Endpoint: "http://relay:19001",
Endpoint: "quic://relay:19443",
Warm: true,
WarmReason: "rendezvous_lease",
RendezvousLeaseID: "lease-1",
RelayNodeID: "node-r",
RelayEndpoint: "http://relay:19001",
RelayEndpoint: "quic://relay:19443",
RelayControl: true,
},
},
@@ -121,7 +121,7 @@ func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
return PeerCacheEntry{
NodeID: nodeID,
Endpoint: "http://" + nodeID + ":19001",
Endpoint: "quic://" + nodeID + ":19443",
Warm: warm,
WarmReason: warmReason,
RecoverySeed: recoverySeed,
@@ -2,42 +2,369 @@ package mesh
import (
"context"
"net/http"
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
type ProductionForwardTransport interface {
SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error)
}
type HTTPProductionForwardTransport struct {
PeerURLs map[string]string
HTTPClient *http.Client
type QUICProductionForwardTransport struct {
Targets map[string]FabricTransportTarget
RouteSets map[string]FabricRouteSet
Transport FabricTransport
Router FabricChannelRouter
Timeout time.Duration
Pressure *FabricRoutePressureTracker
Health *FabricRouteHealthTracker
sequence atomic.Uint64
}
func NewHTTPProductionForwardTransport(peerURLs map[string]string) *HTTPProductionForwardTransport {
normalized := make(map[string]string, len(peerURLs))
for nodeID, baseURL := range peerURLs {
type QUICProductionForwardTransportSnapshot struct {
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
}
func NewQUICProductionForwardTransport(targets map[string]FabricTransportTarget, transport *QUICFabricTransport) *QUICProductionForwardTransport {
routeSets := make(map[string]FabricRouteSet, len(targets))
for nodeID, target := range targets {
nodeID = strings.TrimSpace(nodeID)
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
if nodeID != "" && baseURL != "" {
normalized[nodeID] = baseURL
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
target.Transport = strings.TrimSpace(target.Transport)
if nodeID != "" && target.Endpoint != "" {
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), nodeID)
routeSets[nodeID] = FabricRouteSetForTransportTargets("", "", nodeID, []FabricTransportTarget{target})
}
}
return &HTTPProductionForwardTransport{PeerURLs: normalized}
if transport == nil {
transport = NewQUICFabricTransport(nil)
}
return NewQUICProductionForwardTransportFromRouteSets(routeSets, transport)
}
func (t *HTTPProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
if t == nil {
return ProductionForwardResult{}, ErrForwardPeerUnavailable
func NewQUICProductionForwardTransportFromRouteSets(routeSets map[string]FabricRouteSet, transport FabricTransport) *QUICProductionForwardTransport {
normalizedRouteSets := make(map[string]FabricRouteSet, len(routeSets))
targets := make(map[string]FabricTransportTarget, len(routeSets))
for nodeID, routeSet := range routeSets {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
normalizedRouteSets[nodeID] = routeSet
if target, err := FabricTransportTargetForRoute(routeSet.Primary); err == nil {
targets[nodeID] = target
}
}
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
if baseURL == "" {
return ProductionForwardResult{}, ErrForwardPeerUnavailable
if transport == nil {
transport = NewQUICFabricTransport(nil)
}
client := NewClient(baseURL)
if t.HTTPClient != nil {
client.HTTPClient = t.HTTPClient
return &QUICProductionForwardTransport{
Targets: targets,
RouteSets: normalizedRouteSets,
Transport: transport,
Router: NewFabricChannelRouter(FabricChannelRouterConfig{
MaxAckLatencyMs: 2000,
MinRerouteInterval: 50 * time.Millisecond,
}),
Timeout: 30 * time.Second,
Pressure: NewFabricRoutePressureTracker(),
Health: NewFabricRouteHealthTracker(30 * time.Second),
}
return client.SendProduction(ctx, envelope)
}
func (t *QUICProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
if t == nil || t.Transport == nil {
return ProductionForwardResult{}, ErrForwardPeerUnavailable
}
nextNodeID = strings.TrimSpace(nextNodeID)
routeSet, ok := t.RouteSets[nextNodeID]
if !ok {
target, targetOK := t.Targets[nextNodeID]
if !targetOK || strings.TrimSpace(target.Endpoint) == "" {
return ProductionForwardResult{}, ErrForwardPeerUnavailable
}
routeSet = FabricRouteSetForTransportTargets(envelope.ClusterID, envelope.CurrentHopNodeID, nextNodeID, []FabricTransportTarget{target})
}
spec := FabricChannelSpec{
ChannelID: firstNonEmpty(strings.TrimSpace(envelope.MessageID), fmt.Sprintf("production-%d", t.sequence.Add(1))),
ClusterID: envelope.ClusterID,
SourceNodeID: firstNonEmpty(productionRouteSetSourceNodeID(routeSet), envelope.CurrentHopNodeID),
TargetKind: FabricChannelTargetNode,
TargetID: nextNodeID,
TrafficClass: FabricServiceChannelReliable,
CreatedAt: time.Now().UTC(),
}
payload, err := json.Marshal(envelope)
if err != nil {
return ProductionForwardResult{}, err
}
result, err := t.sendProductionWithRouteSet(ctx, spec, routeSet, payload)
if err != nil {
return ProductionForwardResult{}, err
}
return result, nil
}
func productionRouteSetSourceNodeID(routeSet FabricRouteSet) string {
for _, route := range flattenFabricRouteSet(routeSet) {
if sourceNodeID := strings.TrimSpace(route.SourceNodeID); sourceNodeID != "" {
return sourceNodeID
}
}
return ""
}
func (t *QUICProductionForwardTransport) sendProductionWithRouteSet(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (ProductionForwardResult, error) {
router := t.Router
if router.Config.MaxRoutePressure == 0 {
router = NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 2000, MinRerouteInterval: 50 * time.Millisecond})
}
routeSet = t.routeSetForScheduling(routeSet)
channel, _, err := router.OpenChannel(spec, routeSet, time.Now().UTC())
if err != nil {
return ProductionForwardResult{}, err
}
timeout := t.Timeout
if timeout <= 0 {
timeout = 30 * time.Second
}
for {
routeSet = t.routeSetForScheduling(routeSet)
route, ok := findFabricRoute(routeSet, channel.RouteID)
if !ok {
return ProductionForwardResult{}, ErrFabricRouteNotFound
}
target, err := FabricTransportTargetForRoute(route)
if err != nil {
return ProductionForwardResult{}, err
}
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), spec.TargetID)
target.MaxPayload = fabricproto.DefaultMaxPayload
releaseRoute := t.acquireProductionRoute(route.RouteID)
session, err := t.Transport.Connect(ctx, target)
if err != nil {
releaseRoute()
t.markProductionRouteFailure(route.RouteID, err)
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "connect_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
if event.Type == FabricChannelRouteEventReroute {
continue
}
if rerouteErr != nil {
return ProductionForwardResult{}, rerouteErr
}
return ProductionForwardResult{}, err
}
response, ackMs, err := t.sendProductionOnSession(ctx, session, payload, timeout)
_ = session.Close()
releaseRoute()
if err == nil {
t.markProductionRouteSuccess(route.RouteID)
_, _, _ = router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
AckLatencyMs: ackMs,
BytesSent: uint64(len(payload)),
FramesSent: 1,
BytesRecv: uint64(len(response.Payload)),
FramesRecv: 1,
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
return decodeQUICProductionForwardResponse(response.Payload)
}
t.markProductionRouteFailure(route.RouteID, err)
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "response_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
if event.Type == FabricChannelRouteEventReroute {
continue
}
if rerouteErr != nil {
return ProductionForwardResult{}, rerouteErr
}
return ProductionForwardResult{}, err
}
}
func (t *QUICProductionForwardTransport) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
if t == nil || t.Pressure == nil {
return routeSet
}
return t.Pressure.Apply(routeSet)
}
func (t *QUICProductionForwardTransport) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
if t != nil && t.Health != nil {
routeSet = t.Health.Apply(routeSet, time.Now().UTC())
}
return t.routeSetWithActiveChannels(routeSet)
}
func (t *QUICProductionForwardTransport) acquireProductionRoute(routeID string) func() {
if t == nil || t.Pressure == nil {
return func() {}
}
return t.Pressure.Acquire(routeID)
}
func (t *QUICProductionForwardTransport) markProductionRouteFailure(routeID string, err error) {
if t == nil || t.Health == nil || err == nil {
return
}
t.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
}
func (t *QUICProductionForwardTransport) markProductionRouteSuccess(routeID string) {
if t == nil || t.Health == nil {
return
}
t.Health.MarkSuccess(routeID)
}
func (t *QUICProductionForwardTransport) Snapshot() QUICProductionForwardTransportSnapshot {
if t == nil {
return QUICProductionForwardTransportSnapshot{}
}
var pressure FabricRoutePressureSnapshot
if t.Pressure != nil {
pressure = t.Pressure.SnapshotPressure()
}
var health FabricRouteHealthSnapshot
if t.Health != nil {
health = t.Health.Snapshot(time.Now().UTC())
}
return QUICProductionForwardTransportSnapshot{RoutePressure: pressure, RouteHealth: health}
}
func (t *QUICProductionForwardTransport) sendProductionOnSession(ctx context.Context, session FabricTransportSession, payload []byte, timeout time.Duration) (fabricproto.Frame, int64, error) {
sequence := t.sequence.Add(1)
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: ProductionForwardQUICStreamID,
Sequence: sequence,
Payload: payload,
}); err != nil {
return fabricproto.Frame{}, 0, err
}
waitCtx := ctx
if timeout > 0 {
var cancel context.CancelFunc
waitCtx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
started := time.Now()
for {
select {
case <-waitCtx.Done():
return fabricproto.Frame{}, 0, waitCtx.Err()
case err, ok := <-session.Errors():
if !ok {
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
}
if err != nil {
return fabricproto.Frame{}, 0, err
}
case frame, ok := <-session.Frames():
if !ok {
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
}
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID || frame.Sequence != sequence {
continue
}
return frame, time.Since(started).Milliseconds(), nil
}
}
}
func decodeQUICProductionForwardResponse(payload []byte) (ProductionForwardResult, error) {
var response quicProductionForwardResponse
if err := json.Unmarshal(payload, &response); err != nil {
return ProductionForwardResult{}, err
}
if strings.TrimSpace(response.Error) != "" {
return ProductionForwardResult{}, fmt.Errorf("%w: %s", ErrForwardPeerUnavailable, response.Error)
}
return response.Result, nil
}
func FabricRouteSetForTransportTargets(clusterID string, sourceNodeID string, targetNodeID string, targets []FabricTransportTarget) FabricRouteSet {
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: strings.TrimSpace(targetNodeID)}
routes := make([]FabricRoute, 0, len(targets))
for index, target := range targets {
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
if strings.TrimSpace(target.Endpoint) == "" {
continue
}
peerID := firstNonEmpty(strings.TrimSpace(target.PeerID), strings.TrimSpace(targetNodeID))
routeID := strings.TrimSpace(target.EndpointID)
if routeID == "" {
routeID = fmt.Sprintf("%s-quic-%d", peerID, index)
}
routes = append(routes, FabricRoute{
RouteID: routeID,
ClusterID: strings.TrimSpace(clusterID),
SourceNodeID: strings.TrimSpace(sourceNodeID),
DestinationNodeID: peerID,
Hops: []FabricRouteHop{{
NodeID: peerID,
Mode: fabricRouteModeForTransportTarget(target),
EndpointID: strings.TrimSpace(target.EndpointID),
Address: target.Endpoint,
PeerCertSHA256: strings.TrimSpace(target.PeerCertSHA256),
}},
BaseLatencyMs: routeLatencyForIndex(index),
Capacity: 100,
ActiveChannels: 0,
Healthy: true,
LastUpdatedAt: time.Now().UTC(),
})
}
if len(routes) == 0 {
return routeSet
}
routeSet.Primary = routes[0]
if len(routes) > 1 {
routeSet.WarmStandby = append(routeSet.WarmStandby, routes[1:]...)
}
return routeSet
}
func fabricRouteModeForTransportTarget(target FabricTransportTarget) FabricRouteMode {
switch strings.ToLower(strings.TrimSpace(target.Transport)) {
case string(FabricRouteLAN):
return FabricRouteLAN
case string(FabricRouteReverse):
return FabricRouteReverse
case string(FabricRouteRelay):
return FabricRouteRelay
case string(FabricRouteICE):
return FabricRouteICE
default:
return FabricRouteDirect
}
}
func routeLatencyForIndex(index int) int {
if index <= 0 {
return 10
}
return 10 + index
}
@@ -0,0 +1,339 @@
package mesh
import (
"context"
"encoding/json"
"sync"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
func TestQUICProductionForwardTransportReroutesOnConnectFailure(t *testing.T) {
transport := newFakeProductionForwardFabricTransport()
transport.failConnect["quic://dead.example.test:19443"] = true
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{
Delivered: true,
MessageID: "message-1",
RouteID: "route-1",
}
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
}),
}, transport)
forward.Timeout = time.Second
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
if err != nil {
t.Fatalf("send production: %v", err)
}
if !result.Delivered || result.MessageID != "message-1" {
t.Fatalf("result = %+v", result)
}
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
t.Fatalf("dead connect count = %d, want 1", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
t.Fatalf("fast connect count = %d, want 1", got)
}
snapshot := forward.Snapshot()
if snapshot.RoutePressure.AcquiredTotal != 2 || snapshot.RoutePressure.ReleasedTotal != 2 || snapshot.RoutePressure.MaxActiveTotal == 0 {
t.Fatalf("route pressure snapshot = %+v", snapshot)
}
}
func TestQUICProductionForwardTransportQuarantinesFailedRoute(t *testing.T) {
transport := newFakeProductionForwardFabricTransport()
transport.failConnect["quic://dead.example.test:19443"] = true
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
}),
}, transport)
forward.Timeout = time.Second
for i := 0; i < 2; i++ {
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
if err != nil {
t.Fatalf("send production #%d: %v", i+1, err)
}
if !result.Delivered {
t.Fatalf("result #%d = %+v", i+1, result)
}
}
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
t.Fatalf("dead connect count = %d, want quarantine after first failure", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
t.Fatalf("fast connect count = %d, want both sends on healthy route", got)
}
snapshot := forward.Snapshot()
if snapshot.RouteHealth.Quarantined["dead"].Failures != 1 {
t.Fatalf("route health snapshot = %+v, want dead route quarantined", snapshot.RouteHealth)
}
}
func TestFabricRouteHealthTrackerExpiresQuarantine(t *testing.T) {
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
})
tracker := NewFabricRouteHealthTracker(time.Second)
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
tracker.MarkFailure("dead", "connect failed", now)
applied := tracker.Apply(routeSet, now.Add(500*time.Millisecond))
if applied.Primary.Healthy || !applied.Primary.Degraded {
t.Fatalf("primary after quarantine = %+v, want unhealthy degraded route", applied.Primary)
}
if len(tracker.Snapshot(now.Add(500*time.Millisecond)).Quarantined) != 1 {
t.Fatalf("route health snapshot = %+v, want one quarantined route", tracker.Snapshot(now.Add(500*time.Millisecond)))
}
applied = tracker.Apply(routeSet, now.Add(2*time.Second))
if !applied.Primary.Healthy || applied.Primary.Degraded {
t.Fatalf("primary after ttl = %+v, want route restored", applied.Primary)
}
if snapshot := tracker.Snapshot(now.Add(2 * time.Second)); len(snapshot.Quarantined) != 0 {
t.Fatalf("route health snapshot after ttl = %+v, want empty quarantine", snapshot)
}
}
func TestQUICProductionForwardTransportReroutesOnResponseTimeout(t *testing.T) {
transport := newFakeProductionForwardFabricTransport()
transport.delays["quic://slow.example.test:19443"] = 100 * time.Millisecond
transport.results["quic://slow.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "slow", PeerID: "node-b", Endpoint: "quic://slow.example.test:19443", Transport: "quic"},
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
}),
}, transport)
forward.Timeout = 10 * time.Millisecond
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
if err != nil {
t.Fatalf("send production: %v", err)
}
if !result.Delivered || result.MessageID != "message-1" {
t.Fatalf("result = %+v", result)
}
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
t.Fatalf("slow connect count = %d, want 1", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
t.Fatalf("fast connect count = %d, want 1", got)
}
}
func TestQUICProductionForwardTransportSchedulesWithRouteSetSourceForForwardedEnvelope(t *testing.T) {
transport := newFakeProductionForwardFabricTransport()
transport.results["quic://node-c.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-forwarded"}
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
"node-c": FabricRouteSetForTransportTargets("cluster-a", "node-b", "node-c", []FabricTransportTarget{
{EndpointID: "node-c-direct", PeerID: "node-c", Endpoint: "quic://node-c.example.test:19443", Transport: "quic"},
}),
}, transport)
forward.Timeout = time.Second
envelope := testProductionForwardEnvelope("message-forwarded")
envelope.ClusterID = "cluster-a"
envelope.SourceNodeID = "node-a"
envelope.DestinationNodeID = "node-c"
envelope.CurrentHopNodeID = "node-c"
envelope.NextHopNodeID = "node-c"
result, err := forward.SendProduction(context.Background(), "node-c", envelope)
if err != nil {
t.Fatalf("send production: %v", err)
}
if !result.Delivered || result.MessageID != "message-forwarded" {
t.Fatalf("result = %+v", result)
}
if got := transport.connectCount("quic://node-c.example.test:19443"); got != 1 {
t.Fatalf("connect count = %d, want 1", got)
}
}
func TestQUICProductionForwardTransportSpreadsConcurrentChannelsByActivePressure(t *testing.T) {
transport := newFakeProductionForwardFabricTransport()
transport.delays["quic://route-a.example.test:19443"] = 80 * time.Millisecond
transport.results["quic://route-a.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
transport.results["quic://route-b.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-2"}
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "route-a", PeerID: "node-b", Endpoint: "quic://route-a.example.test:19443", Transport: "quic"},
{EndpointID: "route-b", PeerID: "node-b", Endpoint: "quic://route-b.example.test:19443", Transport: "quic"},
})
routeSet.Primary.Capacity = 100
routeSet.WarmStandby[0].Capacity = 100
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{"node-b": routeSet}, transport)
forward.Timeout = time.Second
firstDone := make(chan error, 1)
go func() {
_, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
firstDone <- err
}()
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-2"))
if err != nil {
t.Fatalf("second send production: %v", err)
}
if !result.Delivered || result.MessageID != "message-2" {
t.Fatalf("second result = %+v", result)
}
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
t.Fatalf("route-b connect count = %d, want 1", got)
}
if err := <-firstDone; err != nil {
t.Fatalf("first send production: %v", err)
}
snapshot := forward.Snapshot()
if snapshot.RoutePressure.MaxActive["route-a"] != 1 || snapshot.RoutePressure.MaxActive["route-b"] != 1 || snapshot.RoutePressure.AcquiredTotal != 2 {
t.Fatalf("route pressure snapshot = %+v", snapshot)
}
}
type fakeProductionForwardFabricTransport struct {
mu sync.Mutex
failConnect map[string]bool
delays map[string]time.Duration
results map[string]ProductionForwardResult
connects map[string]int
}
func newFakeProductionForwardFabricTransport() *fakeProductionForwardFabricTransport {
return &fakeProductionForwardFabricTransport{
failConnect: map[string]bool{},
delays: map[string]time.Duration{},
results: map[string]ProductionForwardResult{},
connects: map[string]int{},
}
}
func (t *fakeProductionForwardFabricTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
endpoint := target.Endpoint
t.mu.Lock()
t.connects[endpoint]++
fail := t.failConnect[endpoint]
delay := t.delays[endpoint]
result := t.results[endpoint]
t.mu.Unlock()
if fail {
return nil, ErrForwardPeerUnavailable
}
return &fakeProductionForwardFabricSession{
delay: delay,
result: result,
frames: make(chan fabricproto.Frame, 16),
errors: make(chan error, 1),
done: make(chan struct{}),
}, nil
}
func (t *fakeProductionForwardFabricTransport) Close() error {
return nil
}
func (t *fakeProductionForwardFabricTransport) connectCount(endpoint string) int {
t.mu.Lock()
defer t.mu.Unlock()
return t.connects[endpoint]
}
func (t *fakeProductionForwardFabricTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
tb.Helper()
deadline := time.Now().Add(time.Second)
for {
t.mu.Lock()
got := t.connects[endpoint]
t.mu.Unlock()
if got >= count {
return
}
if time.Now().After(deadline) {
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
}
time.Sleep(time.Millisecond)
}
}
type fakeProductionForwardFabricSession struct {
delay time.Duration
result ProductionForwardResult
frames chan fabricproto.Frame
errors chan error
done chan struct{}
once sync.Once
}
func (s *fakeProductionForwardFabricSession) Send(_ context.Context, frame fabricproto.Frame) error {
if frame.Type != fabricproto.FrameData {
return nil
}
responsePayload, _ := json.Marshal(quicProductionForwardResponse{Result: s.result})
go func() {
if s.delay > 0 {
time.Sleep(s.delay)
}
select {
case <-s.done:
case s.frames <- fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: frame.TrafficClass,
StreamID: frame.StreamID,
Sequence: frame.Sequence,
Payload: responsePayload,
}:
}
}()
return nil
}
func (s *fakeProductionForwardFabricSession) Frames() <-chan fabricproto.Frame {
return s.frames
}
func (s *fakeProductionForwardFabricSession) Errors() <-chan error {
return s.errors
}
func (s *fakeProductionForwardFabricSession) Close() error {
s.once.Do(func() {
close(s.done)
})
return nil
}
func (s *fakeProductionForwardFabricSession) Closed() bool {
select {
case <-s.done:
return true
default:
return false
}
}
func testProductionForwardEnvelope(messageID string) ProductionEnvelope {
now := time.Now().UTC()
return ProductionEnvelope{
FabricProtocolVersion: ProtocolVersion,
MessageID: messageID,
RouteID: "route-1",
ClusterID: "cluster-a",
SourceNodeID: "node-a",
DestinationNodeID: "node-b",
CurrentHopNodeID: "node-a",
NextHopNodeID: "node-b",
ChannelClass: ProductionChannelFabricControl,
MessageType: ProductionMessageFabricControl,
TTL: 8,
CreatedAt: now,
ExpiresAt: now.Add(time.Minute),
}
}
@@ -106,6 +106,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
}
if hasLegacyEndpointScheme(endpoint) {
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint")
}
}
for nodeID, candidates := range cfg.PeerEndpointCandidates {
if strings.TrimSpace(nodeID) == "" {
@@ -121,6 +124,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
strings.TrimSpace(candidate.ConnectivityMode) == "" {
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
}
if !isQUICOnlyCandidateTransport(candidate.Transport) || hasLegacyEndpointScheme(candidate.Address) {
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint candidate")
}
}
}
for endpointID, observation := range cfg.PeerEndpointObservations {
@@ -179,6 +185,14 @@ func validatePeerDirectory(entries []PeerDirectoryEntry, localNodeID string) err
return nil
}
func hasLegacyEndpointScheme(endpoint string) bool {
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
return strings.HasPrefix(endpoint, "http://") ||
strings.HasPrefix(endpoint, "https://") ||
strings.HasPrefix(endpoint, "ws://") ||
strings.HasPrefix(endpoint, "wss://")
}
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
if len(seeds) > 20 {
return fmt.Errorf("scoped synthetic mesh config contains too many recovery seeds")
@@ -191,6 +205,9 @@ func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
strings.TrimSpace(seed.Transport) == "" {
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
}
if !isQUICOnlyCandidateTransport(seed.Transport) || hasLegacyEndpointScheme(seed.Endpoint) {
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC recovery seed")
}
if _, duplicate := seen[key]; duplicate {
return fmt.Errorf("scoped synthetic mesh config contains duplicate recovery seed")
}
@@ -224,6 +241,9 @@ func validateRendezvousLeases(leases []PeerRendezvousLease, routes []SyntheticRo
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
}
if !isQUICOnlyCandidateTransport(lease.Transport) || hasLegacyEndpointScheme(lease.RelayEndpoint) {
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC rendezvous lease")
}
if _, duplicate := seen[lease.LeaseID]; duplicate {
return fmt.Errorf("scoped synthetic mesh config contains duplicate rendezvous lease")
}
@@ -18,14 +18,14 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
ConfigVersion: "config-v1",
PeerDirectoryVersion: "peers-v1",
PolicyVersion: "policy-v1",
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
PeerEndpoints: map[string]string{"node-b": "quic://127.0.0.1:19443"},
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-public",
NodeID: "node-b",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
NATType: "restricted",
ConnectivityMode: "direct",
@@ -55,8 +55,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
RecoverySeeds: []PeerRecoverySeed{
{
NodeID: "node-b",
Endpoint: "https://node-b.example.test:443",
Transport: "direct_tcp_tls",
Endpoint: "quic://node-b.example.test:19443",
Transport: "direct_quic",
ConnectivityMode: "direct",
Priority: 10,
},
@@ -66,8 +66,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "http://node-r:19000",
Transport: "relay_control",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-a-b"},
AllowedChannels: []string{"fabric_control", "route_control"},
@@ -158,8 +158,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
{
EndpointID: "node-b-public",
NodeID: "node-c",
Transport: "direct_tcp_tls",
Address: "203.0.113.20:443",
Transport: "direct_quic",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
ConnectivityMode: "direct",
},
@@ -174,6 +174,73 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
}
}
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpoint(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
PeerEndpoints: map[string]string{"node-b": "https://node-b.example.test:443"},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
})
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
if err == nil {
t.Fatal("expected non-QUIC peer endpoint error")
}
}
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateTransport(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-websocket",
NodeID: "node-b",
Transport: "websocket",
Address: "quic://203.0.113.20:19443",
Reachability: "public",
ConnectivityMode: "direct",
},
},
},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
})
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
if err == nil {
t.Fatal("expected non-QUIC peer endpoint candidate error")
}
}
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateScheme(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
"node-b": {
{
EndpointID: "node-b-https",
NodeID: "node-b",
Transport: "direct_quic",
Address: "https://node-b.example.test:443",
Reachability: "public",
ConnectivityMode: "direct",
},
},
},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
})
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
if err == nil {
t.Fatal("expected non-QUIC peer endpoint candidate error")
}
}
func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointObservation(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
@@ -217,7 +284,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
ClusterID: "cluster-1",
LocalNodeID: "node-a",
RecoverySeeds: []PeerRecoverySeed{
{NodeID: "node-b", Endpoint: "", Transport: "direct_tcp_tls"},
{NodeID: "node-b", Endpoint: "", Transport: "direct_quic"},
},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
})
@@ -228,6 +295,23 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
}
}
func TestLoadScopedSyntheticConfigRejectsLegacyRecoverySeed(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17f.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
RecoverySeeds: []PeerRecoverySeed{
{NodeID: "node-b", Endpoint: "https://node-b.example.test:443", Transport: "direct_quic"},
},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
})
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
if err == nil {
t.Fatal("expected non-QUIC recovery seed error")
}
}
func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17z12.synthetic.v1",
@@ -238,8 +322,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "http://node-r:19000",
Transport: "relay_control",
RelayEndpoint: "quic://node-r:19443",
Transport: "relay_quic",
RouteIDs: []string{"route-a-b"},
ExpiresAt: time.Now().UTC().Add(time.Hour),
},
@@ -253,6 +337,36 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
}
}
func TestLoadScopedSyntheticConfigRejectsLegacyRendezvousLease(t *testing.T) {
path := writeScopedConfig(t, ScopedSyntheticConfig{
SchemaVersion: "c17z12.synthetic.v1",
ClusterID: "cluster-1",
LocalNodeID: "node-a",
RendezvousLeases: []PeerRendezvousLease{
{
LeaseID: "lease-node-b-via-node-r",
PeerNodeID: "node-b",
RelayNodeID: "node-r",
RelayEndpoint: "https://node-r.example.test:443",
Transport: "relay_quic",
ConnectivityMode: "relay_required",
RouteIDs: []string{"route-a-b"},
AllowedChannels: []string{"fabric_control", "route_control"},
Priority: 10,
ControlPlaneOnly: true,
IssuedAt: time.Now().UTC().Add(-time.Minute),
ExpiresAt: time.Now().UTC().Add(time.Hour),
},
},
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-r", "node-b"})},
})
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
if err == nil {
t.Fatal("expected non-QUIC rendezvous lease error")
}
}
func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
t.Helper()
payload, err := json.Marshal(cfg)
@@ -265,3 +379,32 @@ func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
}
return path
}
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
return SyntheticRoute{
RouteID: routeID,
ClusterID: "cluster-1",
SourceNodeID: hops[0],
DestinationNodeID: hops[len(hops)-1],
Hops: hops,
AllowedChannels: []string{SyntheticChannelFabricControl},
MaxTTL: 8,
MaxHops: 8,
ExpiresAt: time.Now().UTC().Add(time.Hour),
RouteVersion: "route-v1",
PolicyVersion: "policy-v1",
PeerDirectoryVersion: "peers-v1",
}
}
func sameStrings(left, right []string) bool {
if len(left) != len(right) {
return false
}
for i := range left {
if left[i] != right[i] {
return false
}
}
return true
}
+63 -56
View File
@@ -69,22 +69,24 @@ type VPNPacketIngressRoutePreference interface {
}
type Server struct {
Local PeerIdentity
SyntheticRuntime *SyntheticRuntime
ProductionForwardingEnabled bool
ProductionEnvelopeObserver ProductionEnvelopeObserver
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
ProductionForwardTransport ProductionForwardTransport
ProductionForwardLogger ProductionForwardLogger
FabricServiceChannelLogger FabricServiceChannelAccessLogger
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
ProductionRoutes []SyntheticRoute
VPNPacketIngress VPNPacketIngress
BackendProxyBaseURL string
ClusterAuthorityPublicKey string
ServiceChannelIntrospection bool
FabricSessionEnabled bool
FabricSessionLogger FabricSessionEventLogger
Local PeerIdentity
SyntheticRuntime *SyntheticRuntime
ProductionForwardingEnabled bool
ProductionEnvelopeObserver ProductionEnvelopeObserver
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
ProductionForwardTransport ProductionForwardTransport
ProductionForwardLogger ProductionForwardLogger
DisableHTTPDataPlane bool
FabricServiceChannelLogger FabricServiceChannelAccessLogger
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
ProductionRoutes []SyntheticRoute
VPNPacketIngress VPNPacketIngress
BackendProxyBaseURL string
ClusterAuthorityPublicKey string
ServiceChannelIntrospection bool
FabricSessionEnabled bool
FabricSessionWebSocketEnabled bool
FabricSessionLogger FabricSessionEventLogger
}
func (s Server) Handler() http.Handler {
@@ -92,7 +94,7 @@ func (s Server) Handler() http.Handler {
mux.HandleFunc("/mesh/v1/health", s.handleHealth)
mux.HandleFunc("/mesh/v1/forward", s.handleForward)
mux.HandleFunc("/mesh/v1/synthetic/probe", s.handleSyntheticProbe)
if s.FabricSessionEnabled {
if s.FabricSessionEnabled && s.FabricSessionWebSocketEnabled {
mux.HandleFunc("/mesh/v1/fabric/session/ws", s.handleFabricSessionWebSocket)
}
if s.RemoteWorkspaceFrameSink != nil {
@@ -198,6 +200,7 @@ type FabricSessionEventLogEntry struct {
Event string `json:"event"`
ClusterID string `json:"cluster_id,omitempty"`
NodeID string `json:"node_id,omitempty"`
PeerID string `json:"peer_id,omitempty"`
AcceptedBy string `json:"accepted_by,omitempty"`
SessionID string `json:"session_id,omitempty"`
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
@@ -2079,16 +2082,12 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
if s.DisableHTTPDataPlane {
http.Error(w, "mesh data-plane forwarding requires QUIC fabric transport", http.StatusGone)
return
}
if !s.ProductionForwardingEnabled {
s.logProductionForward(ProductionForwardLogEntry{
Event: "production_forward_rejected",
ClusterID: s.Local.ClusterID,
LocalNodeID: s.Local.NodeID,
Reason: ErrForwardDisabled.Error(),
StatusCode: http.StatusNotImplemented,
OccurredAt: time.Now().UTC(),
})
http.Error(w, ErrForwardDisabled.Error(), http.StatusNotImplemented)
s.rejectProductionForward(w, ProductionEnvelope{}, ErrForwardDisabled, forwardStatusCode(ErrForwardDisabled))
return
}
var envelope ProductionEnvelope
@@ -2104,54 +2103,57 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
http.Error(w, "invalid production mesh envelope", http.StatusBadRequest)
return
}
if err := ValidateProductionEnvelope(s.Local, envelope, time.Now().UTC()); err != nil {
result, err := s.ForwardProduction(r.Context(), envelope)
if err != nil {
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
return
}
writeProductionForwardResult(w, result)
}
func (s Server) ForwardProduction(ctx context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
if !s.ProductionForwardingEnabled {
return ProductionForwardResult{}, ErrForwardDisabled
}
if err := ValidateProductionEnvelope(s.Local, envelope, time.Now().UTC()); err != nil {
return ProductionForwardResult{}, err
}
if err := ValidateProductionEnvelopeRouteConfig(s.Local, envelope, s.ProductionRoutes, time.Now().UTC()); err != nil {
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
return
return ProductionForwardResult{}, err
}
s.logProductionForward(productionForwardLogEntry("production_forward_accepted", s.Local, envelope, "", 0))
if s.ProductionEnvelopeObserver != nil {
observation := NewProductionEnvelopeObservation(envelope, time.Now().UTC())
if err := observeProductionEnvelope(r.Context(), s.ProductionEnvelopeObserver, observation); err != nil {
if err := observeProductionEnvelope(ctx, s.ProductionEnvelopeObserver, observation); err != nil {
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, ErrForwardObservationFailed.Error(), http.StatusInternalServerError))
http.Error(w, ErrForwardObservationFailed.Error(), http.StatusInternalServerError)
return
return ProductionForwardResult{}, ErrForwardObservationFailed
}
}
if envelope.DestinationNodeID == s.Local.NodeID {
if err := deliverProductionEnvelope(r.Context(), s.ProductionEnvelopeDelivery, envelope); err != nil {
if err := deliverProductionEnvelope(ctx, s.ProductionEnvelopeDelivery, envelope); err != nil {
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, ErrForwardDeliveryFailed.Error(), http.StatusInternalServerError))
http.Error(w, ErrForwardDeliveryFailed.Error(), http.StatusInternalServerError)
return
return ProductionForwardResult{}, ErrForwardDeliveryFailed
}
s.logProductionForward(productionForwardLogEntry("production_forward_delivered", s.Local, envelope, "", http.StatusOK))
writeProductionForwardResult(w, ProductionForwardResult{
return ProductionForwardResult{
Accepted: true,
Delivered: true,
By: s.Local,
MessageID: envelope.MessageID,
RouteID: envelope.RouteID,
})
return
}, nil
}
if envelope.NextHopNodeID == s.Local.NodeID {
s.rejectProductionForward(w, envelope, ErrLoopDetected, forwardStatusCode(ErrLoopDetected))
return
return ProductionForwardResult{}, ErrLoopDetected
}
if len(envelope.RoutePath) == 0 && envelope.NextHopNodeID != envelope.DestinationNodeID {
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
return
return ProductionForwardResult{}, ErrForwardRuntimeUnavailable
}
if s.ProductionForwardTransport == nil {
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
return
return ProductionForwardResult{}, ErrForwardRuntimeUnavailable
}
if envelope.TTL <= 1 {
s.rejectProductionForward(w, envelope, ErrTTLExhausted, forwardStatusCode(ErrTTLExhausted))
return
return ProductionForwardResult{}, ErrTTLExhausted
}
forwarded := envelope
forwarded.CurrentHopNodeID = envelope.NextHopNodeID
@@ -2159,10 +2161,9 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
forwarded.TTL = envelope.TTL - 1
forwarded.HopCount = envelope.HopCount + 1
forwarded.VisitedNodeIDs = append(append([]string{}, envelope.VisitedNodeIDs...), s.Local.NodeID)
result, err := s.ProductionForwardTransport.SendProduction(r.Context(), envelope.NextHopNodeID, forwarded)
result, err := s.ProductionForwardTransport.SendProduction(ctx, envelope.NextHopNodeID, forwarded)
if err != nil {
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
return
return ProductionForwardResult{}, err
}
s.logProductionForward(productionForwardLogEntry("production_forward_forwarded", s.Local, envelope, "", http.StatusOK))
result.Accepted = true
@@ -2171,7 +2172,7 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
result.MessageID = envelope.MessageID
result.RouteID = envelope.RouteID
result.NextNodeID = envelope.NextHopNodeID
writeProductionForwardResult(w, result)
return result, nil
}
func (s Server) rejectProductionForward(w http.ResponseWriter, envelope ProductionEnvelope, err error, statusCode int) {
@@ -2262,6 +2263,10 @@ func (s Server) handleSyntheticProbe(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
if s.DisableHTTPDataPlane {
http.Error(w, "mesh synthetic probes require QUIC fabric transport", http.StatusGone)
return
}
if s.SyntheticRuntime == nil {
http.Error(w, ErrMeshRuntimeDisabled.Error(), http.StatusServiceUnavailable)
return
@@ -2307,17 +2312,19 @@ func syntheticStatusCode(err error) int {
}
func forwardStatusCode(err error) int {
switch err {
case ErrClusterMismatch, ErrNodeMismatch, ErrUnauthorizedChannel, ErrLoopDetected:
switch {
case errors.Is(err, ErrClusterMismatch), errors.Is(err, ErrNodeMismatch), errors.Is(err, ErrUnauthorizedChannel), errors.Is(err, ErrLoopDetected):
return http.StatusForbidden
case ErrRouteExpired, ErrTTLExhausted, ErrInvalidRoutePath, ErrRouteIDRequired:
case errors.Is(err, ErrRouteExpired), errors.Is(err, ErrTTLExhausted), errors.Is(err, ErrInvalidRoutePath), errors.Is(err, ErrRouteIDRequired), errors.Is(err, ErrForwardEnvelopeInvalid):
return http.StatusBadRequest
case ErrForwardRuntimeUnavailable:
case errors.Is(err, ErrForwardRuntimeUnavailable), errors.Is(err, ErrForwardDisabled):
return http.StatusNotImplemented
case ErrRouteNotFound:
case errors.Is(err, ErrRouteNotFound):
return http.StatusNotFound
case ErrForwardPeerUnavailable:
case errors.Is(err, ErrForwardPeerUnavailable):
return http.StatusBadGateway
case errors.Is(err, ErrForwardObservationFailed), errors.Is(err, ErrForwardDeliveryFailed):
return http.StatusInternalServerError
default:
return http.StatusBadRequest
}
@@ -23,6 +23,18 @@ import (
"github.com/gorilla/websocket"
)
type testProductionForwardTransport struct {
targets map[string]Server
}
func (t testProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
target, ok := t.targets[strings.TrimSpace(nextNodeID)]
if !ok {
return ProductionForwardResult{}, ErrForwardPeerUnavailable
}
return target.ForwardProduction(ctx, envelope)
}
func TestMeshHealthAcceptsSameCluster(t *testing.T) {
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
server := httptest.NewServer(Server{Local: local}.Handler())
@@ -92,8 +104,9 @@ func TestFabricSessionWebSocketDisabledByDefault(t *testing.T) {
func TestFabricSessionWebSocketPingPongAndEvents(t *testing.T) {
var events []FabricSessionEventLogEntry
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
events = append(events, entry)
},
@@ -119,8 +132,9 @@ func TestFabricSessionWebSocketPingPongAndEvents(t *testing.T) {
func TestFabricSessionWebSocketOpenStreamDataAck(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -151,8 +165,9 @@ func TestFabricSessionWebSocketOpenStreamDataAck(t *testing.T) {
func TestFabricSessionWebSocketRequiresToken(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
}.Handler())
defer server.Close()
@@ -172,9 +187,10 @@ func TestFabricSessionWebSocketRequiresSignedAuthorityWhenConfigured(t *testing.
t.Fatalf("generate key: %v", err)
}
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
}.Handler())
defer server.Close()
@@ -196,9 +212,10 @@ func TestFabricSessionWebSocketAcceptsSignedAuthority(t *testing.T) {
token := "rap_fsn_signedtest"
var events []FabricSessionEventLogEntry
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
FabricSessionEnabled: true,
FabricSessionWebSocketEnabled: true,
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
events = append(events, entry)
},
@@ -360,23 +377,20 @@ func TestMeshForwardingGateDeliversFabricControlAtDestination(t *testing.T) {
func TestMeshForwardingGateForwardsDirectFabricControlToNextHop(t *testing.T) {
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
var deliveredObservation ProductionEnvelopeObservation
serverC := httptest.NewServer(Server{
serverC := Server{
Local: nodeC,
ProductionForwardingEnabled: true,
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
deliveredObservation = observation
return nil
},
}.Handler())
defer serverC.Close()
}
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
serverB := httptest.NewServer(Server{
Local: nodeB,
ProductionForwardingEnabled: true,
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
nodeC.NodeID: serverC.URL,
}),
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
}.Handler())
defer serverB.Close()
@@ -414,36 +428,30 @@ func TestMeshForwardingGateForwardsMultiHopFabricControlByRoutePath(t *testing.T
var deliveredObservation ProductionEnvelopeObservation
var nodeREvents []ProductionForwardLogEntry
var nodeBEvents []ProductionForwardLogEntry
serverC := httptest.NewServer(Server{
serverC := Server{
Local: nodeC,
ProductionForwardingEnabled: true,
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
deliveredObservation = observation
return nil
},
}.Handler())
defer serverC.Close()
}
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
serverR := httptest.NewServer(Server{
serverR := Server{
Local: nodeR,
ProductionForwardingEnabled: true,
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
nodeC.NodeID: serverC.URL,
}),
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
nodeREvents = append(nodeREvents, entry)
},
}.Handler())
defer serverR.Close()
}
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
serverB := httptest.NewServer(Server{
Local: nodeB,
ProductionForwardingEnabled: true,
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
nodeR.NodeID: serverR.URL,
}),
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeR.NodeID: serverR}},
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
nodeBEvents = append(nodeBEvents, entry)
},
@@ -490,7 +498,7 @@ func TestMeshForwardingGateForwardsConfiguredProductionRoute(t *testing.T) {
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
route := configuredProductionRoute("route-1", []string{"node-a", "node-b", "node-r", nodeC.NodeID})
var deliveredObservation ProductionEnvelopeObservation
serverC := httptest.NewServer(Server{
serverC := Server{
Local: nodeC,
ProductionForwardingEnabled: true,
ProductionRoutes: []SyntheticRoute{route},
@@ -498,28 +506,22 @@ func TestMeshForwardingGateForwardsConfiguredProductionRoute(t *testing.T) {
deliveredObservation = observation
return nil
},
}.Handler())
defer serverC.Close()
}
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
serverR := httptest.NewServer(Server{
serverR := Server{
Local: nodeR,
ProductionForwardingEnabled: true,
ProductionRoutes: []SyntheticRoute{route},
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
nodeC.NodeID: serverC.URL,
}),
}.Handler())
defer serverR.Close()
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
}
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
serverB := httptest.NewServer(Server{
Local: nodeB,
ProductionForwardingEnabled: true,
ProductionRoutes: []SyntheticRoute{route},
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
nodeR.NodeID: serverR.URL,
}),
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeR.NodeID: serverR}},
}.Handler())
defer serverB.Close()
@@ -5016,3 +5018,30 @@ func TestSyntheticEndpointDisabledByDefault(t *testing.T) {
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusServiceUnavailable)
}
}
func TestHTTPDataPlaneDisabledRequiresQUIC(t *testing.T) {
server := httptest.NewServer(Server{
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
SyntheticRuntime: NewSyntheticRuntime(SyntheticRuntimeConfig{Enabled: true, Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}}),
DisableHTTPDataPlane: true,
}.Handler())
defer server.Close()
resp, err := http.Post(server.URL+"/mesh/v1/synthetic/probe", "application/json", bytes.NewReader([]byte(`{}`)))
if err != nil {
t.Fatalf("post synthetic probe: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusGone {
t.Fatalf("synthetic status = %d, want %d", resp.StatusCode, http.StatusGone)
}
resp, err = http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader([]byte(`{}`)))
if err != nil {
t.Fatalf("post production forward: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusGone {
t.Fatalf("forward status = %d, want %d", resp.StatusCode, http.StatusGone)
}
}
@@ -0,0 +1,268 @@
package mesh
import (
"context"
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
type QUICSyntheticTransport struct {
Targets map[string]FabricTransportTarget
RouteSets map[string]FabricRouteSet
Transport FabricTransport
Router FabricChannelRouter
Timeout time.Duration
Pressure *FabricRoutePressureTracker
Health *FabricRouteHealthTracker
sequence atomic.Uint64
}
type QUICSyntheticTransportSnapshot struct {
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
}
func NewQUICSyntheticTransportFromRouteSets(routeSets map[string]FabricRouteSet, transport FabricTransport) *QUICSyntheticTransport {
normalizedRouteSets := make(map[string]FabricRouteSet, len(routeSets))
targets := make(map[string]FabricTransportTarget, len(routeSets))
for nodeID, routeSet := range routeSets {
nodeID = strings.TrimSpace(nodeID)
if nodeID == "" {
continue
}
normalizedRouteSets[nodeID] = routeSet
if target, err := FabricTransportTargetForRoute(routeSet.Primary); err == nil {
targets[nodeID] = target
}
}
if transport == nil {
transport = NewQUICFabricTransport(nil)
}
return &QUICSyntheticTransport{
Targets: targets,
RouteSets: normalizedRouteSets,
Transport: transport,
Router: NewFabricChannelRouter(FabricChannelRouterConfig{
MaxAckLatencyMs: 2000,
MinRerouteInterval: 50 * time.Millisecond,
}),
Timeout: 10 * time.Second,
Pressure: NewFabricRoutePressureTracker(),
Health: NewFabricRouteHealthTracker(30 * time.Second),
}
}
func (t *QUICSyntheticTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
if t == nil || t.Transport == nil {
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
}
nextNodeID = strings.TrimSpace(nextNodeID)
routeSet, ok := t.RouteSets[nextNodeID]
if !ok {
target, targetOK := t.Targets[nextNodeID]
if !targetOK || strings.TrimSpace(target.Endpoint) == "" {
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
}
routeSet = FabricRouteSetForTransportTargets(envelope.ClusterID, envelope.From.NodeID, nextNodeID, []FabricTransportTarget{target})
}
spec := FabricChannelSpec{
ChannelID: fmt.Sprintf("synthetic-%d", t.sequence.Add(1)),
ClusterID: envelope.ClusterID,
SourceNodeID: envelope.From.NodeID,
TargetKind: FabricChannelTargetNode,
TargetID: nextNodeID,
TrafficClass: FabricServiceChannelReliable,
CreatedAt: time.Now().UTC(),
}
payload, err := json.Marshal(envelope)
if err != nil {
return SyntheticEnvelope{}, err
}
return t.sendSyntheticWithRouteSet(ctx, spec, routeSet, payload)
}
func (t *QUICSyntheticTransport) sendSyntheticWithRouteSet(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (SyntheticEnvelope, error) {
router := t.Router
if router.Config.MaxRoutePressure == 0 {
router = NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 2000, MinRerouteInterval: 50 * time.Millisecond})
}
routeSet = t.routeSetForScheduling(routeSet)
channel, _, err := router.OpenChannel(spec, routeSet, time.Now().UTC())
if err != nil {
return SyntheticEnvelope{}, err
}
timeout := t.Timeout
if timeout <= 0 {
timeout = 10 * time.Second
}
for {
routeSet = t.routeSetForScheduling(routeSet)
route, ok := findFabricRoute(routeSet, channel.RouteID)
if !ok {
return SyntheticEnvelope{}, ErrFabricRouteNotFound
}
target, err := FabricTransportTargetForRoute(route)
if err != nil {
return SyntheticEnvelope{}, err
}
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), spec.TargetID)
target.MaxPayload = fabricproto.DefaultMaxPayload
releaseRoute := t.acquireSyntheticRoute(route.RouteID)
session, err := t.Transport.Connect(ctx, target)
if err != nil {
releaseRoute()
t.markSyntheticRouteFailure(route.RouteID, err)
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "connect_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
if event.Type == FabricChannelRouteEventReroute {
continue
}
if rerouteErr != nil {
return SyntheticEnvelope{}, rerouteErr
}
return SyntheticEnvelope{}, fmt.Errorf("%w: %v", ErrSyntheticPeerUnavailable, err)
}
response, ackMs, err := t.sendSyntheticOnSession(ctx, session, payload, timeout)
_ = session.Close()
releaseRoute()
if err == nil {
t.markSyntheticRouteSuccess(route.RouteID)
_, _, _ = router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
AckLatencyMs: ackMs,
BytesSent: uint64(len(payload)),
FramesSent: 1,
BytesRecv: uint64(len(response.Payload)),
FramesRecv: 1,
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
return decodeQUICSyntheticForwardResponse(response.Payload)
}
t.markSyntheticRouteFailure(route.RouteID, err)
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
ChannelID: spec.ChannelID,
RouteID: route.RouteID,
Failed: true,
Reason: "response_failed",
ObservedAt: time.Now().UTC(),
}, time.Now().UTC())
channel = updated
if event.Type == FabricChannelRouteEventReroute {
continue
}
if rerouteErr != nil {
return SyntheticEnvelope{}, rerouteErr
}
return SyntheticEnvelope{}, fmt.Errorf("%w: %v", ErrSyntheticPeerUnavailable, err)
}
}
func (t *QUICSyntheticTransport) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
if t != nil && t.Health != nil {
routeSet = t.Health.Apply(routeSet, time.Now().UTC())
}
if t != nil && t.Pressure != nil {
routeSet = t.Pressure.Apply(routeSet)
}
return routeSet
}
func (t *QUICSyntheticTransport) acquireSyntheticRoute(routeID string) func() {
if t == nil || t.Pressure == nil {
return func() {}
}
return t.Pressure.Acquire(routeID)
}
func (t *QUICSyntheticTransport) markSyntheticRouteFailure(routeID string, err error) {
if t == nil || t.Health == nil || err == nil {
return
}
t.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
}
func (t *QUICSyntheticTransport) markSyntheticRouteSuccess(routeID string) {
if t == nil || t.Health == nil {
return
}
t.Health.MarkSuccess(routeID)
}
func (t *QUICSyntheticTransport) Snapshot() QUICSyntheticTransportSnapshot {
if t == nil {
return QUICSyntheticTransportSnapshot{}
}
var pressure FabricRoutePressureSnapshot
if t.Pressure != nil {
pressure = t.Pressure.SnapshotPressure()
}
var health FabricRouteHealthSnapshot
if t.Health != nil {
health = t.Health.Snapshot(time.Now().UTC())
}
return QUICSyntheticTransportSnapshot{RoutePressure: pressure, RouteHealth: health}
}
func (t *QUICSyntheticTransport) sendSyntheticOnSession(ctx context.Context, session FabricTransportSession, payload []byte, timeout time.Duration) (fabricproto.Frame, int64, error) {
sequence := t.sequence.Add(1)
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: SyntheticForwardQUICStreamID,
Sequence: sequence,
Payload: payload,
}); err != nil {
return fabricproto.Frame{}, 0, err
}
waitCtx := ctx
if timeout > 0 {
var cancel context.CancelFunc
waitCtx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
started := time.Now()
for {
select {
case <-waitCtx.Done():
return fabricproto.Frame{}, 0, waitCtx.Err()
case err, ok := <-session.Errors():
if !ok {
return fabricproto.Frame{}, 0, ErrSyntheticPeerUnavailable
}
if err != nil {
return fabricproto.Frame{}, 0, err
}
case frame, ok := <-session.Frames():
if !ok {
return fabricproto.Frame{}, 0, ErrSyntheticPeerUnavailable
}
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID || frame.Sequence != sequence {
continue
}
return frame, time.Since(started).Milliseconds(), nil
}
}
}
func decodeQUICSyntheticForwardResponse(payload []byte) (SyntheticEnvelope, error) {
var response quicSyntheticForwardResponse
if err := json.Unmarshal(payload, &response); err != nil {
return SyntheticEnvelope{}, err
}
if strings.TrimSpace(response.Error) != "" {
return SyntheticEnvelope{}, fmt.Errorf("%w: %s", ErrSyntheticPeerUnavailable, response.Error)
}
return response.Envelope, nil
}
@@ -0,0 +1,223 @@
package mesh
import (
"context"
"crypto/tls"
"encoding/json"
"sync"
"testing"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
)
func TestQUICSyntheticTransportReroutesOnConnectFailure(t *testing.T) {
transport := newFakeSyntheticFabricTransport()
transport.failConnect["quic://dead.example.test:19443"] = true
transport.responses["quic://fast.example.test:19443"] = testSyntheticAckEnvelope("route-1", 1)
forward := NewQUICSyntheticTransportFromRouteSets(map[string]FabricRouteSet{
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
}),
}, transport)
forward.Timeout = time.Second
ack, err := forward.SendSynthetic(context.Background(), "node-b", testSyntheticEnvelope("route-1", 1))
if err != nil {
t.Fatalf("send synthetic: %v", err)
}
if ack.RouteID != "route-1" || ack.MessageType != SyntheticMessageRouteHealthAck {
t.Fatalf("ack = %+v", ack)
}
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
t.Fatalf("dead connect count = %d, want 1", got)
}
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
t.Fatalf("fast connect count = %d, want 1", got)
}
}
func TestQUICFabricServerHandlesSyntheticFrames(t *testing.T) {
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
ListenAddr: "127.0.0.1:0",
TLSConfig: testQUICTLSConfig(t),
SyntheticForwardHandler: func(_ context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
return testSyntheticAckEnvelope(envelope.RouteID, envelope.Sequence), nil
},
})
if err != nil {
t.Fatalf("start quic fabric server: %v", err)
}
defer server.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: server.Addr().String(),
TLSConfig: &tls.Config{
InsecureSkipVerify: true,
NextProtos: []string{fabricQUICNextProto},
},
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("connect: %v", err)
}
defer session.Close()
payload, err := json.Marshal(testSyntheticEnvelope("route-1", 7))
if err != nil {
t.Fatalf("marshal envelope: %v", err)
}
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: SyntheticForwardQUICStreamID,
Sequence: 42,
Payload: payload,
}); err != nil {
t.Fatalf("send synthetic frame: %v", err)
}
select {
case frame := <-session.Frames():
if frame.StreamID != SyntheticForwardQUICStreamID || frame.Sequence != 42 {
t.Fatalf("frame = %+v", frame)
}
ack, err := decodeQUICSyntheticForwardResponse(frame.Payload)
if err != nil {
t.Fatalf("decode response: %v", err)
}
if ack.RouteID != "route-1" || ack.MessageType != SyntheticMessageRouteHealthAck || ack.Sequence != 7 {
t.Fatalf("ack = %+v", ack)
}
case err := <-session.Errors():
t.Fatalf("session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
}
type fakeSyntheticFabricTransport struct {
mu sync.Mutex
failConnect map[string]bool
responses map[string]SyntheticEnvelope
connects map[string]int
}
func newFakeSyntheticFabricTransport() *fakeSyntheticFabricTransport {
return &fakeSyntheticFabricTransport{
failConnect: map[string]bool{},
responses: map[string]SyntheticEnvelope{},
connects: map[string]int{},
}
}
func (t *fakeSyntheticFabricTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
endpoint := target.Endpoint
t.mu.Lock()
t.connects[endpoint]++
fail := t.failConnect[endpoint]
response := t.responses[endpoint]
t.mu.Unlock()
if fail {
return nil, ErrSyntheticPeerUnavailable
}
return &fakeSyntheticFabricSession{
response: response,
frames: make(chan fabricproto.Frame, 16),
errors: make(chan error, 1),
done: make(chan struct{}),
}, nil
}
func (t *fakeSyntheticFabricTransport) Close() error {
return nil
}
func (t *fakeSyntheticFabricTransport) connectCount(endpoint string) int {
t.mu.Lock()
defer t.mu.Unlock()
return t.connects[endpoint]
}
type fakeSyntheticFabricSession struct {
response SyntheticEnvelope
frames chan fabricproto.Frame
errors chan error
done chan struct{}
once sync.Once
}
func (s *fakeSyntheticFabricSession) Send(_ context.Context, frame fabricproto.Frame) error {
if frame.Type != fabricproto.FrameData {
return nil
}
responsePayload, _ := json.Marshal(quicSyntheticForwardResponse{Envelope: s.response})
go func() {
select {
case <-s.done:
case s.frames <- fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: frame.TrafficClass,
StreamID: frame.StreamID,
Sequence: frame.Sequence,
Payload: responsePayload,
}:
}
}()
return nil
}
func (s *fakeSyntheticFabricSession) Frames() <-chan fabricproto.Frame {
return s.frames
}
func (s *fakeSyntheticFabricSession) Errors() <-chan error {
return s.errors
}
func (s *fakeSyntheticFabricSession) Close() error {
s.once.Do(func() {
close(s.done)
})
return nil
}
func (s *fakeSyntheticFabricSession) Closed() bool {
select {
case <-s.done:
return true
default:
return false
}
}
func testSyntheticEnvelope(routeID string, sequence uint64) SyntheticEnvelope {
now := time.Now().UTC()
return SyntheticEnvelope{
ProtocolVersion: ProtocolVersion,
RouteID: routeID,
ClusterID: "cluster-a",
From: PeerIdentity{ClusterID: "cluster-a", NodeID: "node-a"},
To: PeerIdentity{ClusterID: "cluster-a", NodeID: "node-b"},
Channel: SyntheticChannelFabricControl,
MessageType: SyntheticMessageRouteHealth,
TTL: 8,
HopCount: 1,
Visited: []string{"node-a"},
Sequence: sequence,
SentAt: now,
}
}
func testSyntheticAckEnvelope(routeID string, sequence uint64) SyntheticEnvelope {
ack := testSyntheticEnvelope(routeID, sequence)
ack.From = PeerIdentity{ClusterID: "cluster-a", NodeID: "node-b"}
ack.To = PeerIdentity{ClusterID: "cluster-a", NodeID: "node-a"}
ack.MessageType = SyntheticMessageRouteHealthAck
ack.Visited = []string{"node-a", "node-b"}
return ack
}
+17 -11
View File
@@ -13,17 +13,18 @@ import (
const FileName = "identity.json"
type Identity struct {
NodeID string `json:"node_id"`
ClusterID string `json:"cluster_id"`
NodeName string `json:"node_name"`
NodeFingerprint string `json:"node_fingerprint"`
PublicKey string `json:"public_key"`
IdentityStatus string `json:"identity_status"`
PendingJoinRequestID string `json:"pending_join_request_id,omitempty"`
ClusterAuthorityPublicKey string `json:"cluster_authority_public_key,omitempty"`
ClusterAuthorityFingerprint string `json:"cluster_authority_fingerprint,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
NodeID string `json:"node_id"`
ClusterID string `json:"cluster_id"`
NodeName string `json:"node_name"`
NodeFingerprint string `json:"node_fingerprint"`
PublicKey string `json:"public_key"`
IdentityStatus string `json:"identity_status"`
PendingJoinRequestID string `json:"pending_join_request_id,omitempty"`
ClusterAuthorityPublicKey string `json:"cluster_authority_public_key,omitempty"`
ClusterAuthorityFingerprint string `json:"cluster_authority_fingerprint,omitempty"`
ClusterAuthorityQuorum json.RawMessage `json:"cluster_authority_quorum,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
func LoadOrCreate(dir, clusterID, nodeName string) (Identity, error) {
@@ -103,6 +104,10 @@ func MarkApproved(dir string, nodeID, clusterID, status string) (Identity, error
}
func MarkApprovedWithAuthority(dir string, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint string) (Identity, error) {
return MarkApprovedWithAuthorityAndQuorum(dir, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint, nil)
}
func MarkApprovedWithAuthorityAndQuorum(dir string, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint string, authorityQuorum json.RawMessage) (Identity, error) {
path := filepath.Join(dir, FileName)
identity, err := Load(path)
if err != nil {
@@ -114,6 +119,7 @@ func MarkApprovedWithAuthority(dir string, nodeID, clusterID, status, authorityP
identity.PendingJoinRequestID = ""
identity.ClusterAuthorityPublicKey = authorityPublicKey
identity.ClusterAuthorityFingerprint = authorityFingerprint
identity.ClusterAuthorityQuorum = authorityQuorum
if err := Save(path, identity); err != nil {
return Identity{}, err
}
@@ -2,10 +2,12 @@ package supervisor
import (
"context"
"strconv"
"strings"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/webingress"
)
type Supervisor interface {
@@ -14,6 +16,8 @@ type Supervisor interface {
type StubSupervisor struct {
Version string
WebIngressRuntimeEnabled bool
WebIngressManager *webingress.Manager
RemoteWorkspaceRealAdapter RemoteWorkspaceRealAdapterConfig
}
@@ -56,6 +60,9 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
}
if desiredState != "enabled" {
payload["reason"] = "desired_state_not_enabled"
if (serviceType == "public-ingress" || serviceType == "admin-ingress") && s.WebIngressManager != nil {
payload["listener_status"] = s.WebIngressManager.Stop(context.Background())
}
return client.WorkloadStatusRequest{
ReportedState: "stopped",
RuntimeMode: runtimeMode,
@@ -74,6 +81,57 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
StatusPayload: payload,
}
}
if serviceType == "public-ingress" || serviceType == "admin-ingress" {
contract := s.webIngressContract(serviceType, workload.Config)
for key, value := range contract {
payload[key] = value
}
if contract["contract_valid"] == true {
payload["reason"] = "web_ingress_contract_ready"
payload["execution_mode"] = "contract_probe"
payload["traffic"] = "https_edge_to_fabric_service_channel"
if contract["real_listener_requested"] == true && contract["real_listener_runtime_enabled"] != true {
payload["reason"] = "web_ingress_real_listener_gate_disabled"
payload["traffic"] = "blocked"
return client.WorkloadStatusRequest{
ReportedState: "degraded",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
if contract["real_listener_start_allowed"] == true && s.WebIngressManager != nil {
listenerStatus := s.WebIngressManager.Apply(context.Background(), webIngressListenerConfig(serviceType, workload.Config))
payload["listener_status"] = listenerStatus
payload["ports_opened_by_runtime"] = listenerStatus.Running
payload["ports_opened_by_stub"] = false
if !listenerStatus.HTTPSRunning {
payload["reason"] = "web_ingress_listener_partial"
payload["traffic"] = "blocked"
return client.WorkloadStatusRequest{
ReportedState: "degraded",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
}
return client.WorkloadStatusRequest{
ReportedState: "running",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
payload["reason"] = "web_ingress_contract_invalid"
payload["traffic"] = "blocked"
return client.WorkloadStatusRequest{
ReportedState: "degraded",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
if serviceType == "synthetic.echo" && runtimeMode == "native" {
payload["reason"] = "internal_synthetic_echo_ready"
payload["execution_mode"] = "builtin"
@@ -85,6 +143,23 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
StatusPayload: payload,
}
}
if (serviceType == "vpn-exit" || serviceType == "ipv4-egress" || serviceType == "vpn-client") && runtimeMode == "native" {
for key, value := range vpnFabricOnlyContract(serviceType, workload.Config) {
payload[key] = value
}
payload["execution_mode"] = "contract_probe"
payload["fabric_transport"] = "quic_only"
payload["fabric_service_channel_required"] = true
payload["backend_relay_fallback"] = false
payload["legacy_protocol_compatibility"] = false
payload["traffic"] = "fabric_service_channel_only"
return client.WorkloadStatusRequest{
ReportedState: "running",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
if serviceType == "rdp-worker" && runtimeMode == "native" && boolConfig(workload.Config, "adapter_contract_probe") {
payload["reason"] = "remote_workspace_adapter_contract_probe_ready"
payload["execution_mode"] = "contract_probe"
@@ -126,6 +201,173 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
}
}
func vpnFabricOnlyContract(serviceType string, config map[string]any) map[string]any {
role := "vpn-client"
reason := "vpn_client_node_contract_ready"
serviceClass := "vpn_packets"
internetEgress := false
if serviceType == "vpn-exit" || serviceType == "ipv4-egress" {
role = "ipv4-egress"
reason = "ipv4_egress_contract_ready"
internetEgress = true
}
contract := map[string]any{
"schema_version": "rap.vpn.fabric_node_contract.v1",
"reason": reason,
"role": role,
"service_class": serviceClass,
"internet_egress": internetEgress,
"exit_pool_id": stringConfig(config, "pool_id", ""),
"exit_region": stringConfig(config, "region", ""),
"allowed_cidrs": stringSliceConfig(config, "allowed_cidrs"),
"dns_servers": stringSliceConfig(config, "dns_servers"),
"client_policy_source": stringConfig(config, "client_policy_source", "fabric_access_policy"),
"android_node_supported": serviceType == "vpn-client",
"ipv4_exit_supported": internetEgress,
"fabric_service_channel_required": true,
"packet_runtime_status": "fabric_channel_binding_pending_runtime",
"service_binding": vpnServiceBindingContract(serviceType, config),
}
return contract
}
func vpnServiceBindingContract(serviceType string, config map[string]any) map[string]any {
if serviceType == "vpn-exit" || serviceType == "ipv4-egress" {
return map[string]any{
"type": "ipv4_egress",
"accepts_service_class": "vpn_packets",
"accepts_from_fabric_only": true,
"legacy_protocol_listener": false,
"exit_pool_id": stringConfig(config, "pool_id", ""),
"region": stringConfig(config, "region", ""),
"allowed_cidrs": stringSliceConfig(config, "allowed_cidrs"),
"dns_servers": stringSliceConfig(config, "dns_servers"),
"internet_egress": true,
"requires_host_packet_runtime": true,
}
}
return map[string]any{
"type": "local_ipv4_ingress",
"accepts_from": []string{"android_vpnservice_tun", "linux_tun", "host_service_port"},
"service_class": "vpn_packets",
"exit_selection": "pool",
"preferred_exit_pool_id": stringConfig(config, "exit_pool_id", ""),
"listen_tcp_ports": intSliceConfig(config, "listen_tcp_ports"),
"listen_udp_ports": intSliceConfig(config, "listen_udp_ports"),
"tun_required": true,
"route_authority": "fabric_farm",
"legacy_protocol_listener": false,
"requires_fabric_node_runtime": true,
}
}
func webIngressListenerConfig(serviceType string, config map[string]any) webingress.ListenerConfig {
return webingress.ListenerConfig{
RuntimeConfig: webingress.RuntimeConfig{
ServiceType: serviceType,
Scope: stringConfig(config, "scope", ""),
ServiceClasses: stringSliceConfig(config, "service_classes"),
TLSMode: stringConfig(config, "tls_mode", "terminate"),
HTTPPort: intConfig(config, "listen_http_port", 80),
HTTPSPort: intConfig(config, "listen_https_port", 443),
},
HTTPAddr: stringConfig(config, "listen_http_addr", ":80"),
HTTPSAddr: stringConfig(config, "listen_https_addr", ":443"),
TLSCertFile: stringConfig(config, "tls_cert_file", ""),
TLSKeyFile: stringConfig(config, "tls_key_file", ""),
}
}
func (s StubSupervisor) webIngressContract(serviceType string, config map[string]any) map[string]any {
httpPort := intConfig(config, "listen_http_port", 80)
httpsPort := intConfig(config, "listen_https_port", 443)
tlsMode := strings.TrimSpace(stringConfig(config, "tls_mode", "terminate"))
serviceClasses := stringSliceConfig(config, "service_classes")
scope := strings.TrimSpace(stringConfig(config, "scope", ""))
realListenerRequested := boolConfig(config, "real_listener_enabled")
allowedClasses := webIngressAllowedServiceClasses(serviceType)
missing := []string{}
if httpPort != 80 {
missing = append(missing, "listen_http_port_must_be_80")
}
if httpsPort != 443 {
missing = append(missing, "listen_https_port_must_be_443")
}
if tlsMode != "terminate" && tlsMode != "passthrough-approved-terminator" {
missing = append(missing, "tls_mode_invalid")
}
if scope == "" {
missing = append(missing, "scope_required")
}
if len(serviceClasses) == 0 {
missing = append(missing, "service_classes_required")
}
for _, serviceClass := range serviceClasses {
if !containsString(allowedClasses, serviceClass) {
missing = append(missing, "service_class_not_allowed:"+serviceClass)
}
}
return map[string]any{
"schema_version": "rap.web_ingress.workload_contract.v1",
"contract_valid": len(missing) == 0,
"missing_checks": missing,
"service_edge_only": true,
"authority_service": false,
"fabric_transport": "quic_only",
"http_between_fabric_nodes": false,
"listen_http_port": httpPort,
"listen_https_port": httpsPort,
"tls_mode": tlsMode,
"scope": scope,
"service_classes": serviceClasses,
"allowed_service_classes": allowedClasses,
"fabric_service_channel_required": true,
"runtime_roles_required": webIngressRuntimeRoles(serviceClasses),
"payload_forwarding": "contract_only",
"real_listener_requested": realListenerRequested,
"real_listener_runtime_enabled": s.WebIngressRuntimeEnabled,
"real_listener_start_allowed": len(missing) == 0 && realListenerRequested && s.WebIngressRuntimeEnabled,
"runtime_handler_ready": len(missing) == 0,
"runtime_handler_contract": "rap.web_ingress.runtime_response.v1",
"runtime_handler_payload_status": "fabric_service_channel_binding_not_implemented",
"fabric_envelope_schema": webingress.FabricServiceChannelEnvelopeSchema,
"fabric_runtime_response_schema": "rap.web_ingress.fabric_runtime_response.v1",
"fabric_envelope_signer": "ed25519_available",
"fabric_envelope_sender": "mesh_request_response_runtime_adapter_available",
"fabric_quic_stream": "web_ingress_forward",
"fabric_quic_stream_id": 2,
"fabric_runtime_receiver": "signed_envelope_receiver_available",
"admin_runtime_dispatcher": "read_only_manifest_and_health_available",
"control_api_binding": "read_only_projection_skeleton_available",
"runtime_receiver_policy": "trusted_keys_and_service_class_allow_list",
"ports_opened_by_stub": false,
}
}
func webIngressAllowedServiceClasses(serviceType string) []string {
if serviceType == "admin-ingress" {
return []string{"platform_admin", "cluster_admin"}
}
return []string{"organization_portal", "user_portal"}
}
func webIngressRuntimeRoles(serviceClasses []string) []string {
roles := []string{}
for _, serviceClass := range serviceClasses {
switch serviceClass {
case "platform_admin":
roles = append(roles, "global-admin-runtime", "identity-runtime", "policy-authority", "audit-sink")
case "cluster_admin":
roles = append(roles, "cluster-admin-runtime", "identity-runtime", "policy-authority", "audit-sink")
case "organization_portal":
roles = append(roles, "organization-portal-runtime", "identity-runtime", "policy-authority", "audit-sink")
case "user_portal":
roles = append(roles, "user-portal-runtime", "identity-runtime", "policy-authority", "audit-sink")
}
}
return dedupeStrings(roles)
}
func boolConfig(values map[string]any, key string) bool {
if values == nil {
return false
@@ -144,6 +386,157 @@ func boolConfig(values map[string]any, key string) bool {
}
}
func intConfig(values map[string]any, key string, fallback int) int {
if values == nil {
return fallback
}
switch value := values[key].(type) {
case int:
return value
case int64:
return int(value)
case float64:
return int(value)
case string:
parsed, err := strconv.Atoi(strings.TrimSpace(value))
if err != nil {
return fallback
}
return parsed
default:
return fallback
}
}
func stringConfig(values map[string]any, key string, fallback string) string {
if values == nil {
return fallback
}
value, ok := values[key]
if !ok {
return fallback
}
if text, ok := value.(string); ok {
return text
}
return fallback
}
func stringSliceConfig(values map[string]any, key string) []string {
if values == nil {
return nil
}
value, ok := values[key]
if !ok {
return nil
}
switch typed := value.(type) {
case []string:
return dedupeStrings(typed)
case []any:
out := []string{}
for _, item := range typed {
if text, ok := item.(string); ok {
out = append(out, strings.TrimSpace(text))
}
}
return dedupeStrings(out)
case string:
parts := strings.Split(typed, ",")
for index := range parts {
parts[index] = strings.TrimSpace(parts[index])
}
return dedupeStrings(parts)
default:
return nil
}
}
func intSliceConfig(values map[string]any, key string) []int {
if values == nil {
return nil
}
value, ok := values[key]
if !ok {
return nil
}
add := func(out []int, item any) []int {
switch typed := item.(type) {
case int:
if typed > 0 {
out = append(out, typed)
}
case int64:
if typed > 0 {
out = append(out, int(typed))
}
case float64:
if typed > 0 {
out = append(out, int(typed))
}
case string:
if parsed := intConfig(map[string]any{"value": typed}, "value", 0); parsed > 0 {
out = append(out, parsed)
}
}
return out
}
out := []int{}
switch typed := value.(type) {
case []int:
out = append(out, typed...)
case []any:
for _, item := range typed {
out = add(out, item)
}
case string:
for _, part := range strings.Split(typed, ",") {
out = add(out, strings.TrimSpace(part))
}
default:
out = add(out, typed)
}
seen := map[int]struct{}{}
cleaned := make([]int, 0, len(out))
for _, port := range out {
if port <= 0 || port > 65535 {
continue
}
if _, ok := seen[port]; ok {
continue
}
seen[port] = struct{}{}
cleaned = append(cleaned, port)
}
return cleaned
}
func dedupeStrings(values []string) []string {
out := []string{}
seen := map[string]struct{}{}
for _, value := range values {
normalized := strings.TrimSpace(value)
if normalized == "" {
continue
}
if _, ok := seen[normalized]; ok {
continue
}
seen[normalized] = struct{}{}
out = append(out, normalized)
}
return out
}
func containsString(values []string, needle string) bool {
for _, value := range values {
if value == needle {
return true
}
}
return false
}
func remoteWorkspaceAdapterChannels() []map[string]any {
return []map[string]any{
{"name": "input", "direction": "client_to_adapter", "reliability": "reliable_ordered", "priority": "critical", "droppable": true, "may_block_input": false},
@@ -5,6 +5,7 @@ import (
"testing"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/webingress"
)
func TestStubSupervisorReportsDegradedForEnabledWorkload(t *testing.T) {
@@ -73,6 +74,245 @@ func TestStubSupervisorReportsBuiltinFabricServicesRunning(t *testing.T) {
}
}
func TestStubSupervisorReportsVPNFabricOnlyContractsRunning(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "ipv4-egress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"pool_id": "us-los-angeles-ipv4",
"region": "us-los-angeles",
"allowed_cidrs": []any{"0.0.0.0/0"},
"dns_servers": []any{"192.168.200.210"},
},
},
{
ServiceType: "vpn-client",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"exit_pool_id": "us-los-angeles-ipv4",
"listen_tcp_ports": []any{443, "8443"},
"listen_udp_ports": "443,51820",
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if len(statuses) != 2 {
t.Fatalf("statuses length = %d", len(statuses))
}
for _, status := range statuses {
if status.ReportedState != "running" {
t.Fatalf("ReportedState = %q", status.ReportedState)
}
if status.StatusPayload["execution_mode"] != "contract_probe" {
t.Fatalf("execution_mode = %v", status.StatusPayload["execution_mode"])
}
if status.StatusPayload["fabric_transport"] != "quic_only" {
t.Fatalf("fabric_transport = %v", status.StatusPayload["fabric_transport"])
}
if status.StatusPayload["backend_relay_fallback"] != false {
t.Fatalf("backend_relay_fallback = %v", status.StatusPayload["backend_relay_fallback"])
}
if status.StatusPayload["legacy_protocol_compatibility"] != false {
t.Fatalf("legacy_protocol_compatibility = %v", status.StatusPayload["legacy_protocol_compatibility"])
}
}
if statuses[0].StatusPayload["role"] != "ipv4-egress" || statuses[0].StatusPayload["internet_egress"] != true {
t.Fatalf("ipv4 egress payload = %#v", statuses[0].StatusPayload)
}
if statuses[1].StatusPayload["role"] != "vpn-client" || statuses[1].StatusPayload["android_node_supported"] != true {
t.Fatalf("vpn client payload = %#v", statuses[1].StatusPayload)
}
exitBinding := statuses[0].StatusPayload["service_binding"].(map[string]any)
if exitBinding["type"] != "ipv4_egress" || exitBinding["accepts_from_fabric_only"] != true || exitBinding["exit_pool_id"] != "us-los-angeles-ipv4" {
t.Fatalf("ipv4 egress binding = %#v", exitBinding)
}
clientBinding := statuses[1].StatusPayload["service_binding"].(map[string]any)
if clientBinding["type"] != "local_ipv4_ingress" || clientBinding["preferred_exit_pool_id"] != "us-los-angeles-ipv4" || clientBinding["legacy_protocol_listener"] != false {
t.Fatalf("vpn client binding = %#v", clientBinding)
}
if got := clientBinding["listen_tcp_ports"].([]int); len(got) != 2 || got[0] != 443 || got[1] != 8443 {
t.Fatalf("listen_tcp_ports = %#v", got)
}
if got := clientBinding["listen_udp_ports"].([]int); len(got) != 2 || got[0] != 443 || got[1] != 51820 {
t.Fatalf("listen_udp_ports = %#v", got)
}
}
func TestStubSupervisorReportsWebIngressContractReady(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "admin-ingress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"listen_http_port": 80,
"listen_https_port": 443,
"tls_mode": "terminate",
"scope": "platform",
"service_classes": []any{"platform_admin", "cluster_admin"},
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "running" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
if payload["reason"] != "web_ingress_contract_ready" ||
payload["fabric_transport"] != "quic_only" ||
payload["http_between_fabric_nodes"] != false ||
payload["authority_service"] != false ||
payload["real_listener_start_allowed"] != false ||
payload["runtime_handler_ready"] != true ||
payload["runtime_handler_payload_status"] != "fabric_service_channel_binding_not_implemented" ||
payload["ports_opened_by_stub"] != false {
t.Fatalf("unexpected payload: %#v", payload)
}
roles, ok := payload["runtime_roles_required"].([]string)
if !ok || !containsString(roles, "global-admin-runtime") || !containsString(roles, "policy-authority") {
t.Fatalf("runtime roles = %#v", payload["runtime_roles_required"])
}
}
func TestStubSupervisorBlocksWebIngressRealListenerWithoutRuntimeGate(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "admin-ingress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"listen_http_port": 80,
"listen_https_port": 443,
"tls_mode": "terminate",
"scope": "platform",
"service_classes": []any{"platform_admin"},
"real_listener_enabled": true,
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "degraded" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
if payload["reason"] != "web_ingress_real_listener_gate_disabled" ||
payload["real_listener_requested"] != true ||
payload["real_listener_runtime_enabled"] != false ||
payload["real_listener_start_allowed"] != false ||
payload["ports_opened_by_stub"] != false {
t.Fatalf("unexpected payload: %#v", payload)
}
}
func TestStubSupervisorAllowsWebIngressRealListenerGateButDoesNotOpenPorts(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test", WebIngressRuntimeEnabled: true}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "admin-ingress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"listen_http_port": 80,
"listen_https_port": 443,
"tls_mode": "terminate",
"scope": "platform",
"service_classes": []any{"platform_admin"},
"real_listener_enabled": true,
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "running" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
if payload["real_listener_requested"] != true ||
payload["real_listener_runtime_enabled"] != true ||
payload["real_listener_start_allowed"] != true ||
payload["ports_opened_by_stub"] != false {
t.Fatalf("unexpected payload: %#v", payload)
}
}
func TestStubSupervisorStartsWebIngressManagerWhenRealListenerAllowed(t *testing.T) {
manager := webingress.NewManager()
statuses, err := (StubSupervisor{Version: "test", WebIngressRuntimeEnabled: true, WebIngressManager: manager}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "admin-ingress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"listen_http_port": 80,
"listen_https_port": 443,
"listen_http_addr": "127.0.0.1:0",
"listen_https_addr": "127.0.0.1:0",
"tls_mode": "terminate",
"scope": "platform",
"service_classes": []any{"platform_admin"},
"real_listener_enabled": true,
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "degraded" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
listenerStatus, ok := payload["listener_status"].(webingress.ListenerStatus)
if !ok {
t.Fatalf("listener_status = %#v", payload["listener_status"])
}
if !listenerStatus.HTTPRunning || listenerStatus.HTTPSRunning || listenerStatus.HTTPAddr == "" {
t.Fatalf("listener status = %+v", listenerStatus)
}
if payload["reason"] != "web_ingress_listener_partial" || payload["ports_opened_by_runtime"] != true || payload["ports_opened_by_stub"] != false {
t.Fatalf("payload = %#v", payload)
}
_ = manager.Stop(context.Background())
}
func TestStubSupervisorBlocksInvalidWebIngressContract(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "public-ingress",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"listen_http_port": 8080,
"listen_https_port": 443,
"scope": "organization",
"service_classes": []any{"platform_admin"},
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "degraded" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
if payload["reason"] != "web_ingress_contract_invalid" || payload["traffic"] != "blocked" {
t.Fatalf("unexpected payload: %#v", payload)
}
missing, ok := payload["missing_checks"].([]string)
if !ok || !containsString(missing, "listen_http_port_must_be_80") || !containsString(missing, "service_class_not_allowed:platform_admin") {
t.Fatalf("missing checks = %#v", payload["missing_checks"])
}
}
func TestStubSupervisorKeepsUnsupportedEnabledWorkloadDegraded(t *testing.T) {
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
{ServiceType: "rdp-worker", DesiredState: "enabled", RuntimeMode: "container"},
@@ -0,0 +1,189 @@
package vpnruntime
import (
"context"
"sync"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
)
type FabricSessionFrameWriter interface {
SendFrame(context.Context, fabricproto.Frame) error
}
type FabricSessionPacketPeerRegistry struct {
mu sync.RWMutex
peers map[string]FabricSessionPacketPeer
}
type FabricSessionPacketPeer struct {
VPNConnectionID string
Sender FabricSessionFrameWriter
StreamID uint64
StreamIDsByTrafficClass map[string][]uint64
RegisteredAt time.Time
LastPacketAt time.Time
}
type FabricSessionPacketPeerTransport struct {
Registry *FabricSessionPacketPeerRegistry
Inbox *FabricPacketInbox
VPNConnectionID string
}
func NewFabricSessionPacketPeerRegistry() *FabricSessionPacketPeerRegistry {
return &FabricSessionPacketPeerRegistry{peers: map[string]FabricSessionPacketPeer{}}
}
func (r *FabricSessionPacketPeerRegistry) RegisterFrame(ctx context.Context, sender FabricSessionFrameWriter, frame fabricproto.Frame) (bool, error) {
if r == nil || sender == nil || frame.Type != fabricproto.FrameData || frame.StreamID == 0 {
return false, nil
}
payload, err := DecodeFabricVPNPacketDataFrame(frame)
if err != nil {
return false, nil
}
if payload.VPNConnectionID == "" {
return false, nil
}
now := time.Now().UTC()
r.mu.Lock()
if r.peers == nil {
r.peers = map[string]FabricSessionPacketPeer{}
}
peer := r.peers[payload.VPNConnectionID]
if peer.RegisteredAt.IsZero() {
peer.RegisteredAt = now
}
peer.VPNConnectionID = payload.VPNConnectionID
peer.Sender = sender
peer.StreamID = frame.StreamID
peer.LastPacketAt = now
if peer.StreamIDsByTrafficClass == nil {
peer.StreamIDsByTrafficClass = map[string][]uint64{}
}
trafficClass := fabricSessionTrafficClassName(frame.TrafficClass)
if trafficClass != "" && !containsUint64(peer.StreamIDsByTrafficClass[trafficClass], frame.StreamID) {
peer.StreamIDsByTrafficClass[trafficClass] = append(peer.StreamIDsByTrafficClass[trafficClass], frame.StreamID)
}
r.peers[payload.VPNConnectionID] = peer
r.mu.Unlock()
return true, nil
}
func (r *FabricSessionPacketPeerRegistry) TransportFor(vpnConnectionID string, inbox *FabricPacketInbox) PacketTransport {
if r == nil || inbox == nil || vpnConnectionID == "" {
return nil
}
r.mu.RLock()
peer, ok := r.peers[vpnConnectionID]
r.mu.RUnlock()
if !ok || peer.Sender == nil || peer.StreamID == 0 {
return nil
}
return &FabricSessionPacketTransport{
Sender: fabricSessionFrameWriterAdapter{writer: peer.Sender},
Inbox: inbox,
StreamID: peer.StreamID,
StreamIDsByTrafficClass: copyStreamIDsByClass(peer.StreamIDsByTrafficClass),
VPNConnectionID: vpnConnectionID,
SendDirection: FabricDirectionGatewayToClient,
ReceiveDirection: FabricDirectionClientToGateway,
}
}
func (t *FabricSessionPacketPeerTransport) SendGatewayPacketBatch(ctx context.Context, packets [][]byte) error {
if t == nil || t.Registry == nil || t.Inbox == nil || t.VPNConnectionID == "" {
return mesh.ErrForwardRuntimeUnavailable
}
transport := t.Registry.TransportFor(t.VPNConnectionID, t.Inbox)
if transport == nil {
return mesh.ErrForwardRuntimeUnavailable
}
return transport.SendGatewayPacketBatch(ctx, packets)
}
func (t *FabricSessionPacketPeerTransport) ReceiveGatewayPacketBatch(ctx context.Context, timeout time.Duration) ([][]byte, error) {
if t == nil || t.Inbox == nil || t.VPNConnectionID == "" {
return nil, mesh.ErrForwardRuntimeUnavailable
}
return t.Inbox.Receive(ctx, t.VPNConnectionID, FabricDirectionClientToGateway, timeout)
}
func (t *FabricSessionPacketPeerTransport) Snapshot() map[string]any {
if t == nil {
return map[string]any{
"transport": "fabric_session_peer_dynamic",
"peer_ready": false,
}
}
ready := 0
if t.Registry != nil {
if transport := t.Registry.TransportFor(t.VPNConnectionID, t.Inbox); transport != nil {
ready = 1
}
}
return map[string]any{
"transport": "fabric_session_peer_dynamic",
"vpn_connection_id": t.VPNConnectionID,
"peer_ready": ready == 1,
}
}
func (r *FabricSessionPacketPeerRegistry) Snapshot() map[string]any {
if r == nil {
return map[string]any{"ready": 0}
}
r.mu.RLock()
defer r.mu.RUnlock()
out := map[string]any{"ready": len(r.peers)}
items := make([]map[string]any, 0, len(r.peers))
for _, peer := range r.peers {
item := map[string]any{
"vpn_connection_id": peer.VPNConnectionID,
"stream_id": peer.StreamID,
}
if !peer.RegisteredAt.IsZero() {
item["registered_at"] = peer.RegisteredAt.Format(time.RFC3339Nano)
}
if !peer.LastPacketAt.IsZero() {
item["last_packet_at"] = peer.LastPacketAt.Format(time.RFC3339Nano)
}
items = append(items, item)
}
out["peers"] = items
return out
}
type fabricSessionFrameWriterAdapter struct {
writer FabricSessionFrameWriter
}
func (a fabricSessionFrameWriterAdapter) Send(ctx context.Context, frame fabricproto.Frame) error {
if a.writer == nil {
return mesh.ErrForwardRuntimeUnavailable
}
return a.writer.SendFrame(ctx, frame)
}
func containsUint64(values []uint64, value uint64) bool {
for _, item := range values {
if item == value {
return true
}
}
return false
}
func copyStreamIDsByClass(values map[string][]uint64) map[string][]uint64 {
if len(values) == 0 {
return nil
}
out := make(map[string][]uint64, len(values))
for key, ids := range values {
out[key] = append([]uint64(nil), ids...)
}
return out
}
@@ -130,11 +130,14 @@ func (t *FabricSessionPacketTransport) ReceiveGatewayPacketBatch(ctx context.Con
continue
}
if err != nil {
if packets, receiveErr := t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 100*time.Millisecond); receiveErr != nil || len(packets) > 0 {
return packets, receiveErr
}
return nil, err
}
case frame, ok := <-frames:
if !ok {
return t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 5*time.Millisecond)
return t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 100*time.Millisecond)
}
if frame.Type != fabricproto.FrameData || !t.acceptsStream(frame.StreamID) {
continue
@@ -426,6 +426,59 @@ func TestFabricSessionPacketTransportRunFrameIngressDeliversInbox(t *testing.T)
}
}
func TestFabricSessionPacketPeerTransportSendsReplyToLatestRegisteredPeer(t *testing.T) {
inbox := NewFabricPacketInbox(4)
registry := NewFabricSessionPacketPeerRegistry()
sender := &recordingFrameSender{}
frame, err := NewFabricVPNPacketDataFrame(FabricVPNPacketFrameInput{
StreamID: 7,
VPNConnectionID: "vpn-1",
Direction: FabricDirectionClientToGateway,
Packets: [][]byte{[]byte("request")},
})
if err != nil {
t.Fatalf("frame: %v", err)
}
handled, err := registry.RegisterFrame(context.Background(), sender, frame)
if err != nil || !handled {
t.Fatalf("register frame handled=%v err=%v", handled, err)
}
if err := inbox.DeliverFabricSessionFrame(context.Background(), frame); err != nil {
t.Fatalf("deliver frame: %v", err)
}
transport := &FabricSessionPacketPeerTransport{
Registry: registry,
Inbox: inbox,
VPNConnectionID: "vpn-1",
}
requests, err := transport.ReceiveGatewayPacketBatch(context.Background(), time.Second)
if err != nil || len(requests) != 1 || string(requests[0]) != "request" {
t.Fatalf("requests=%q err=%v", requests, err)
}
if err := transport.SendGatewayPacketBatch(context.Background(), [][]byte{[]byte("reply")}); err != nil {
t.Fatalf("send reply: %v", err)
}
if len(sender.frames) != 1 {
t.Fatalf("sent frames = %d, want 1", len(sender.frames))
}
payload, err := DecodeFabricVPNPacketDataFrame(sender.frames[0])
if err != nil {
t.Fatalf("decode reply: %v", err)
}
if payload.Direction != FabricDirectionGatewayToClient || string(payload.Packets[0]) != "reply" {
t.Fatalf("reply payload = %+v", payload)
}
}
type recordingFrameSender struct {
frames []fabricproto.Frame
}
func (s *recordingFrameSender) SendFrame(_ context.Context, frame fabricproto.Frame) error {
s.frames = append(s.frames, frame)
return nil
}
func TestFabricSessionPacketTransportReceiveReadsPumpFrames(t *testing.T) {
inbox := NewFabricPacketInbox(4)
receiver := memoryFabricSessionReceiver{
@@ -169,6 +169,9 @@ func (g *Gateway) Snapshot() map[string]any {
out := map[string]any{
"running": running,
"service_role": "ipv4-egress",
"service_class": "vpn_packets",
"adapter_contract": "fabric_channel_to_ipv4_nat",
"transport": g.transportName(),
"poll_timeout_ms": g.PollTimeout.Milliseconds(),
"client_to_gateway_batches": g.clientToGatewayBatches.Load(),
@@ -234,14 +237,7 @@ func (g *Gateway) setStopped(err error) {
func (g *Gateway) normalize() error {
if g.Transport == nil {
if g.API == nil {
return fmt.Errorf("api client or packet transport is required")
}
g.Transport = BackendPacketTransport{
API: g.API,
ClusterID: g.ClusterID,
VPNConnectionID: g.VPNConnectionID,
}
return fmt.Errorf("fabric packet transport is required; backend packet relay fallback is disabled")
}
if g.ClusterID == "" || g.VPNConnectionID == "" {
return fmt.Errorf("cluster id and vpn connection id are required")
@@ -95,6 +95,30 @@ func TestGatewayRunClosesPacketTransportOnRuntimeError(t *testing.T) {
}
}
func TestGatewayNormalizeRejectsBackendPacketRelayFallback(t *testing.T) {
gateway := &Gateway{
API: nil,
ClusterID: "cluster-1",
VPNConnectionID: "vpn-1",
}
err := gateway.normalize()
if err == nil {
t.Fatal("normalize succeeded without a fabric packet transport")
}
if got, want := err.Error(), "fabric packet transport is required; backend packet relay fallback is disabled"; got != want {
t.Fatalf("normalize error = %q, want %q", got, want)
}
}
func TestGatewaySnapshotReportsIPv4EgressServiceAdapter(t *testing.T) {
gateway := &Gateway{Transport: &recordingGatewayTransport{}, VPNConnectionID: "vpn-1"}
snapshot := gateway.Snapshot()
if snapshot["service_role"] != "ipv4-egress" || snapshot["service_class"] != "vpn_packets" || snapshot["adapter_contract"] != "fabric_channel_to_ipv4_nat" {
t.Fatalf("unexpected gateway service snapshot: %#v", snapshot)
}
}
func TestGatewayUploadPrioritizesTCPControlPackets(t *testing.T) {
transport := &recordingGatewayTransport{}
gateway := &Gateway{Transport: transport, VPNConnectionID: "vpn-1"}
@@ -0,0 +1,190 @@
package webingress
import (
"context"
"encoding/json"
"net/http"
"strings"
"time"
)
const AdminRuntimeResponseSchema = "rap.web_ingress.admin_runtime_response.v1"
const ControlAPIProjectionRequestSchema = "rap.web_ingress.control_api_projection_request.v1"
const ControlAPIProjectionResponseSchema = "rap.web_ingress.control_api_projection_response.v1"
type AdminRuntimeDispatcher struct {
ProjectionClient ControlAPIProjectionClient
Now func() time.Time
}
type ControlAPIProjectionClient interface {
Project(ctx context.Context, request ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error)
}
type ControlAPIProjectionRequest struct {
SchemaVersion string `json:"schema_version"`
Method string `json:"method"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
Host string `json:"host,omitempty"`
Scope string `json:"scope"`
ServiceClass string `json:"service_class"`
ObservedAt string `json:"observed_at"`
}
type ControlAPIProjectionResponse struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
StatusCode int `json:"status_code"`
Headers map[string]string `json:"headers,omitempty"`
Body json.RawMessage `json:"body,omitempty"`
}
type AdminRuntimeJSONResponse struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
Scope string `json:"scope,omitempty"`
ServiceClass string `json:"service_class,omitempty"`
Path string `json:"path,omitempty"`
Manifest map[string]any `json:"manifest,omitempty"`
ObservedAt string `json:"observed_at"`
}
func (d AdminRuntimeDispatcher) HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error) {
method := strings.ToUpper(strings.TrimSpace(request.Method))
path := normalizeRuntimePath(request.Path)
if method == "" {
method = http.MethodGet
}
if !allowedAdminRuntimeScope(strings.TrimSpace(request.Scope), strings.TrimSpace(request.ServiceClass)) {
return d.json(http.StatusForbidden, request, "blocked", "admin_runtime_scope_rejected", nil), nil
}
switch {
case method == http.MethodGet && (path == "/healthz" || path == "/readyz"):
return d.json(http.StatusOK, request, "ready", "admin_runtime_ready", nil), nil
case d.ProjectionClient != nil && (method == http.MethodGet || method == http.MethodHead):
return d.project(ctx, request)
case method == http.MethodGet && (path == "/ui-manifest" || strings.HasSuffix(path, "/ui-manifest")):
return d.json(http.StatusOK, request, "ready", "ui_manifest_ready", d.manifest(request)), nil
case method != http.MethodGet && method != http.MethodHead:
return d.json(http.StatusForbidden, request, "blocked", "control_api_mutation_binding_not_implemented", nil), nil
default:
return d.json(http.StatusNotImplemented, request, "blocked", "control_api_projection_binding_not_implemented", nil), nil
}
}
func allowedAdminRuntimeScope(scope string, serviceClass string) bool {
switch serviceClass {
case "platform_admin":
return scope == "platform"
case "cluster_admin":
return scope == "cluster"
case "organization_portal":
return scope == "organization"
case "user_portal":
return scope == "user" || scope == "organization"
default:
return false
}
}
func (d AdminRuntimeDispatcher) project(ctx context.Context, request FabricRequest) (FabricResponse, error) {
response, err := d.ProjectionClient.Project(ctx, ControlAPIProjectionRequest{
SchemaVersion: ControlAPIProjectionRequestSchema,
Method: strings.ToUpper(strings.TrimSpace(request.Method)),
Path: normalizeRuntimePath(request.Path),
Query: request.Query,
Host: request.Host,
Scope: request.Scope,
ServiceClass: request.ServiceClass,
ObservedAt: d.observedAt(),
})
if err != nil {
return d.json(http.StatusBadGateway, request, "blocked", "control_api_projection_failed", nil), nil
}
if response.SchemaVersion != ControlAPIProjectionResponseSchema {
return d.json(http.StatusBadGateway, request, "blocked", "control_api_projection_invalid_response", nil), nil
}
headers := http.Header{"Content-Type": []string{"application/json"}}
for key, value := range response.Headers {
if safeResponseHeader(key) && strings.TrimSpace(value) != "" {
headers.Set(key, value)
}
}
statusCode := response.StatusCode
if statusCode < 100 || statusCode > 599 {
statusCode = http.StatusOK
}
return FabricResponse{StatusCode: statusCode, Headers: headers, Body: append([]byte(nil), response.Body...)}, nil
}
func (d AdminRuntimeDispatcher) json(statusCode int, request FabricRequest, status string, reason string, manifest map[string]any) FabricResponse {
payload, _ := json.Marshal(AdminRuntimeJSONResponse{
SchemaVersion: AdminRuntimeResponseSchema,
Status: status,
Reason: reason,
Scope: request.Scope,
ServiceClass: request.ServiceClass,
Path: request.Path,
Manifest: manifest,
ObservedAt: d.observedAt(),
})
return FabricResponse{
StatusCode: statusCode,
Headers: http.Header{"Content-Type": []string{"application/json"}},
Body: payload,
}
}
func (d AdminRuntimeDispatcher) manifest(request FabricRequest) map[string]any {
serviceClass := strings.TrimSpace(request.ServiceClass)
sections := []string{}
actions := []string{}
switch serviceClass {
case "platform_admin":
sections = []string{"clusters", "nodes", "roles", "fabric", "workloads", "audit"}
actions = []string{"read_platform_summary", "read_cluster_summaries", "read_node_status"}
case "cluster_admin":
sections = []string{"cluster", "nodes", "fabric", "workloads", "audit"}
actions = []string{"read_cluster_summary", "read_node_status"}
case "organization_portal":
sections = []string{"organization", "sessions", "resources", "audit"}
actions = []string{"read_organization_summary", "read_sessions"}
case "user_portal":
sections = []string{"profile", "sessions", "resources"}
actions = []string{"read_profile", "read_sessions"}
default:
sections = []string{"status"}
actions = []string{"read_status"}
}
return map[string]any{
"schema_version": "rap.web_ingress.ui_manifest.v1",
"scope": request.Scope,
"service_class": serviceClass,
"sections": sections,
"allowed_actions": actions,
"mutation_enabled": false,
"projection_binding": "control_api_not_bound",
}
}
func (d AdminRuntimeDispatcher) observedAt() string {
now := time.Now().UTC()
if d.Now != nil {
now = d.Now().UTC()
}
return now.Format(time.RFC3339Nano)
}
func normalizeRuntimePath(path string) string {
path = strings.TrimSpace(path)
if path == "" {
return "/"
}
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
return path
}
@@ -0,0 +1,212 @@
package webingress
import (
"context"
"encoding/json"
"net/http"
"testing"
)
func TestAdminRuntimeDispatcherReturnsHealthAndManifest(t *testing.T) {
dispatcher := AdminRuntimeDispatcher{Now: fixedEnvelopeNow}
health, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/readyz",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("health: %v", err)
}
if health.StatusCode != http.StatusOK {
t.Fatalf("health = %+v", health)
}
manifest, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/ui-manifest",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("manifest: %v", err)
}
var payload AdminRuntimeJSONResponse
if err := json.Unmarshal(manifest.Body, &payload); err != nil {
t.Fatalf("decode manifest: %v", err)
}
if manifest.StatusCode != http.StatusOK ||
payload.SchemaVersion != AdminRuntimeResponseSchema ||
payload.Status != "ready" ||
payload.Reason != "ui_manifest_ready" ||
payload.Manifest["schema_version"] != "rap.web_ingress.ui_manifest.v1" ||
payload.Manifest["mutation_enabled"] != false {
t.Fatalf("payload = %+v status=%d", payload, manifest.StatusCode)
}
}
func TestAdminRuntimeDispatcherBlocksMutationsAndUnknownProjection(t *testing.T) {
dispatcher := AdminRuntimeDispatcher{Now: fixedEnvelopeNow}
mutation, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodPost,
Path: "/platform-admin/nodes",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("mutation: %v", err)
}
var mutationPayload AdminRuntimeJSONResponse
if err := json.Unmarshal(mutation.Body, &mutationPayload); err != nil {
t.Fatalf("decode mutation: %v", err)
}
if mutation.StatusCode != http.StatusForbidden || mutationPayload.Reason != "control_api_mutation_binding_not_implemented" {
t.Fatalf("mutation payload = %+v status=%d", mutationPayload, mutation.StatusCode)
}
projection, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/nodes",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("projection: %v", err)
}
var projectionPayload AdminRuntimeJSONResponse
if err := json.Unmarshal(projection.Body, &projectionPayload); err != nil {
t.Fatalf("decode projection: %v", err)
}
if projection.StatusCode != http.StatusNotImplemented || projectionPayload.Reason != "control_api_projection_binding_not_implemented" {
t.Fatalf("projection payload = %+v status=%d", projectionPayload, projection.StatusCode)
}
}
func TestAdminRuntimeDispatcherRejectsInvalidScopeClassPair(t *testing.T) {
dispatcher := AdminRuntimeDispatcher{ProjectionClient: &recordingProjectionClient{}, Now: fixedEnvelopeNow}
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/ui-manifest",
Scope: "organization",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("projection: %v", err)
}
var payload AdminRuntimeJSONResponse
if err := json.Unmarshal(response.Body, &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if response.StatusCode != http.StatusForbidden || payload.Reason != "admin_runtime_scope_rejected" {
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
}
}
func TestAdminRuntimeDispatcherUsesControlAPIProjectionClientForReadRequests(t *testing.T) {
client := &recordingProjectionClient{
response: ControlAPIProjectionResponse{
SchemaVersion: ControlAPIProjectionResponseSchema,
Status: "ready",
StatusCode: http.StatusOK,
Headers: map[string]string{"X-RAP-Projection": "control-api", "Set-Cookie": "blocked"},
Body: json.RawMessage(`{"schema_version":"control.projection.v1","ok":true}`),
},
}
dispatcher := AdminRuntimeDispatcher{ProjectionClient: client, Now: fixedEnvelopeNow}
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/nodes",
Query: "limit=10",
Host: "admin.example.test",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("projection: %v", err)
}
if response.StatusCode != http.StatusOK ||
response.Headers.Get("X-RAP-Projection") != "control-api" ||
response.Headers.Get("Set-Cookie") != "" ||
string(response.Body) != `{"schema_version":"control.projection.v1","ok":true}` {
t.Fatalf("response = %+v body=%s", response, string(response.Body))
}
if client.request.Path != "/platform-admin/nodes" ||
client.request.Query != "limit=10" ||
client.request.Scope != "platform" ||
client.request.ServiceClass != "platform_admin" {
t.Fatalf("request = %+v", client.request)
}
}
func TestAdminRuntimeDispatcherReportsProjectionClientFailure(t *testing.T) {
dispatcher := AdminRuntimeDispatcher{ProjectionClient: failingProjectionClient{}, Now: fixedEnvelopeNow}
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/nodes",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("projection: %v", err)
}
var payload AdminRuntimeJSONResponse
if err := json.Unmarshal(response.Body, &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if response.StatusCode != http.StatusBadGateway || payload.Reason != "control_api_projection_failed" {
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
}
}
func TestAdminRuntimeDispatcherRejectsInvalidProjectionResponseSchema(t *testing.T) {
dispatcher := AdminRuntimeDispatcher{
ProjectionClient: &recordingProjectionClient{
response: ControlAPIProjectionResponse{
SchemaVersion: "wrong.schema",
Status: "ready",
StatusCode: http.StatusOK,
Body: json.RawMessage(`{"ok":true}`),
},
},
Now: fixedEnvelopeNow,
}
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
Method: http.MethodGet,
Path: "/platform-admin/nodes",
Scope: "platform",
ServiceClass: "platform_admin",
})
if err != nil {
t.Fatalf("projection: %v", err)
}
var payload AdminRuntimeJSONResponse
if err := json.Unmarshal(response.Body, &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if response.StatusCode != http.StatusBadGateway || payload.Reason != "control_api_projection_invalid_response" {
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
}
}
type recordingProjectionClient struct {
request ControlAPIProjectionRequest
response ControlAPIProjectionResponse
}
func (c *recordingProjectionClient) Project(_ context.Context, request ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error) {
c.request = request
return c.response, nil
}
type failingProjectionClient struct{}
func (failingProjectionClient) Project(context.Context, ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error) {
return ControlAPIProjectionResponse{}, errTestProjectionFailure{}
}
type errTestProjectionFailure struct{}
func (errTestProjectionFailure) Error() string { return "projection failed" }
@@ -0,0 +1,151 @@
package webingress
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"net/http"
"sort"
"strings"
"time"
)
const FabricServiceChannelEnvelopeSchema = "rap.web_ingress.fabric_service_channel_envelope.v1"
var (
ErrFabricEnvelopeSignerRequired = errors.New("web ingress fabric envelope signer required")
ErrFabricEnvelopeSenderRequired = errors.New("web ingress fabric envelope sender required")
ErrFabricEnvelopeScopeRequired = errors.New("web ingress fabric envelope scope required")
ErrFabricEnvelopeClassRequired = errors.New("web ingress fabric envelope service class required")
)
type EnvelopeSigner interface {
Sign(ctx context.Context, canonical []byte) (FabricEnvelopeSignature, error)
}
type EnvelopeSender interface {
Send(ctx context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error)
}
type DefaultFabricBinder struct {
Signer EnvelopeSigner
Sender EnvelopeSender
Now func() time.Time
}
type FabricServiceChannelEnvelope struct {
SchemaVersion string `json:"schema_version"`
RequestSchema string `json:"request_schema"`
Method string `json:"method"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
Host string `json:"host"`
ServiceType string `json:"service_type"`
Scope string `json:"scope"`
ServiceClass string `json:"service_class"`
Headers map[string][]string `json:"headers,omitempty"`
BodyBase64 string `json:"body_b64,omitempty"`
ObservedAt string `json:"observed_at"`
EnvelopedAt string `json:"enveloped_at"`
}
type FabricEnvelopeSignature struct {
KeyID string `json:"key_id"`
Alg string `json:"alg"`
Signature string `json:"signature"`
SignedAt string `json:"signed_at,omitempty"`
}
type SignedFabricServiceChannelEnvelope struct {
SchemaVersion string `json:"schema_version"`
Envelope FabricServiceChannelEnvelope `json:"envelope"`
Signature FabricEnvelopeSignature `json:"signature"`
Canonical []byte `json:"-"`
}
func (b DefaultFabricBinder) Forward(ctx context.Context, request FabricRequest) (FabricResponse, error) {
if b.Signer == nil {
return FabricResponse{}, ErrFabricEnvelopeSignerRequired
}
if b.Sender == nil {
return FabricResponse{}, ErrFabricEnvelopeSenderRequired
}
if strings.TrimSpace(request.Scope) == "" {
return FabricResponse{}, ErrFabricEnvelopeScopeRequired
}
if strings.TrimSpace(request.ServiceClass) == "" {
return FabricResponse{}, ErrFabricEnvelopeClassRequired
}
envelope := b.envelope(request)
canonical, err := json.Marshal(envelope)
if err != nil {
return FabricResponse{}, err
}
signature, err := b.Signer.Sign(ctx, canonical)
if err != nil {
return FabricResponse{}, err
}
return b.Sender.Send(ctx, SignedFabricServiceChannelEnvelope{
SchemaVersion: SignedFabricServiceChannelEnvelopeSchema,
Envelope: envelope,
Signature: signature,
Canonical: canonical,
})
}
func (b DefaultFabricBinder) envelope(request FabricRequest) FabricServiceChannelEnvelope {
now := time.Now().UTC()
if b.Now != nil {
now = b.Now().UTC()
}
observedAt := request.ObservedAt.UTC()
if observedAt.IsZero() {
observedAt = now
}
return FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
RequestSchema: strings.TrimSpace(request.SchemaVersion),
Method: strings.ToUpper(strings.TrimSpace(request.Method)),
Path: request.Path,
Query: request.Query,
Host: strings.TrimSpace(request.Host),
ServiceType: strings.TrimSpace(request.ServiceType),
Scope: strings.TrimSpace(request.Scope),
ServiceClass: strings.TrimSpace(request.ServiceClass),
Headers: canonicalHeaders(request.Headers),
BodyBase64: base64.StdEncoding.EncodeToString(request.Body),
ObservedAt: observedAt.Format(time.RFC3339Nano),
EnvelopedAt: now.Format(time.RFC3339Nano),
}
}
func canonicalHeaders(headers http.Header) map[string][]string {
if len(headers) == 0 {
return nil
}
out := map[string][]string{}
for key, values := range headers {
canonicalKey := http.CanonicalHeaderKey(strings.TrimSpace(key))
if canonicalKey == "" || !safeRequestHeader(canonicalKey) {
continue
}
copied := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
copied = append(copied, value)
}
}
if len(copied) == 0 {
continue
}
sort.Strings(copied)
out[canonicalKey] = copied
}
if len(out) == 0 {
return nil
}
return out
}
@@ -0,0 +1,163 @@
package webingress
import (
"bytes"
"context"
"encoding/json"
"errors"
"net/http"
"testing"
"time"
)
func TestDefaultFabricBinderBuildsSignedEnvelopeAndSendsIt(t *testing.T) {
signer := &recordingEnvelopeSigner{
signature: FabricEnvelopeSignature{KeyID: "node-key-1", Alg: "ed25519", Signature: "sig-1", SignedAt: "2026-05-17T00:00:02Z"},
}
sender := &recordingEnvelopeSender{
response: FabricResponse{StatusCode: http.StatusAccepted, Body: []byte(`{"accepted":true}`)},
}
binder := DefaultFabricBinder{Signer: signer, Sender: sender, Now: fixedEnvelopeNow}
response, err := binder.Forward(context.Background(), FabricRequest{
SchemaVersion: "rap.web_ingress.fabric_request.v1",
Method: "post",
Path: "/platform-admin/root",
Query: "tab=nodes",
Host: "admin.example.test",
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClass: "platform_admin",
Headers: http.Header{
"X-Trace-Id": []string{"trace-b", "trace-a"},
"Authorization": []string{"Bearer should-not-forward"},
"X-Empty-Header": []string{" "},
},
Body: []byte(`{"hello":"world"}`),
ObservedAt: fixedNow(),
})
if err != nil {
t.Fatalf("Forward failed: %v", err)
}
if response.StatusCode != http.StatusAccepted {
t.Fatalf("response = %+v", response)
}
if len(signer.canonical) == 0 {
t.Fatal("signer did not receive canonical envelope")
}
if !bytes.Equal(sender.envelope.Canonical, signer.canonical) {
t.Fatalf("sender canonical does not match signer canonical")
}
if sender.envelope.SchemaVersion != "rap.web_ingress.signed_fabric_service_channel_envelope.v1" {
t.Fatalf("signed schema = %q", sender.envelope.SchemaVersion)
}
if sender.envelope.Signature.KeyID != "node-key-1" || sender.envelope.Signature.Signature != "sig-1" {
t.Fatalf("signature = %+v", sender.envelope.Signature)
}
var canonical FabricServiceChannelEnvelope
if err := json.Unmarshal(signer.canonical, &canonical); err != nil {
t.Fatalf("decode canonical: %v", err)
}
if canonical.SchemaVersion != FabricServiceChannelEnvelopeSchema ||
canonical.RequestSchema != "rap.web_ingress.fabric_request.v1" ||
canonical.Method != http.MethodPost ||
canonical.Scope != "platform" ||
canonical.ServiceClass != "platform_admin" ||
canonical.BodyBase64 != "eyJoZWxsbyI6IndvcmxkIn0=" ||
canonical.ObservedAt != "2026-05-17T00:00:00Z" ||
canonical.EnvelopedAt != "2026-05-17T00:00:01Z" {
t.Fatalf("canonical envelope = %+v", canonical)
}
if got := canonical.Headers["X-Trace-Id"]; len(got) != 2 || got[0] != "trace-a" || got[1] != "trace-b" {
t.Fatalf("canonical headers = %#v", canonical.Headers)
}
if canonical.Headers["Authorization"] != nil || canonical.Headers["X-Empty-Header"] != nil {
t.Fatalf("unsafe/empty headers leaked: %#v", canonical.Headers)
}
}
func TestDefaultFabricBinderRequiresSignerAndSender(t *testing.T) {
request := FabricRequest{Scope: "platform", ServiceClass: "platform_admin"}
_, err := (DefaultFabricBinder{Sender: &recordingEnvelopeSender{}}).Forward(context.Background(), request)
if !errors.Is(err, ErrFabricEnvelopeSignerRequired) {
t.Fatalf("signer error = %v", err)
}
_, err = (DefaultFabricBinder{Signer: &recordingEnvelopeSigner{}}).Forward(context.Background(), request)
if !errors.Is(err, ErrFabricEnvelopeSenderRequired) {
t.Fatalf("sender error = %v", err)
}
}
func TestDefaultFabricBinderRequiresScopeAndServiceClass(t *testing.T) {
binder := DefaultFabricBinder{Signer: &recordingEnvelopeSigner{}, Sender: &recordingEnvelopeSender{}}
_, err := binder.Forward(context.Background(), FabricRequest{ServiceClass: "platform_admin"})
if !errors.Is(err, ErrFabricEnvelopeScopeRequired) {
t.Fatalf("scope error = %v", err)
}
_, err = binder.Forward(context.Background(), FabricRequest{Scope: "platform"})
if !errors.Is(err, ErrFabricEnvelopeClassRequired) {
t.Fatalf("class error = %v", err)
}
}
func TestDefaultFabricBinderPropagatesSignerAndSenderFailures(t *testing.T) {
signerErr := errors.New("sign failed")
senderErr := errors.New("send failed")
request := FabricRequest{Scope: "platform", ServiceClass: "platform_admin"}
_, err := (DefaultFabricBinder{
Signer: &recordingEnvelopeSigner{err: signerErr},
Sender: &recordingEnvelopeSender{},
}).Forward(context.Background(), request)
if !errors.Is(err, signerErr) {
t.Fatalf("signer error = %v", err)
}
_, err = (DefaultFabricBinder{
Signer: &recordingEnvelopeSigner{},
Sender: &recordingEnvelopeSender{err: senderErr},
}).Forward(context.Background(), request)
if !errors.Is(err, senderErr) {
t.Fatalf("sender error = %v", err)
}
}
func fixedEnvelopeNow() time.Time {
return time.Date(2026, 5, 17, 0, 0, 1, 0, time.UTC)
}
type recordingEnvelopeSigner struct {
canonical []byte
signature FabricEnvelopeSignature
err error
}
func (s *recordingEnvelopeSigner) Sign(_ context.Context, canonical []byte) (FabricEnvelopeSignature, error) {
s.canonical = append([]byte{}, canonical...)
if s.err != nil {
return FabricEnvelopeSignature{}, s.err
}
if s.signature.KeyID == "" {
s.signature = FabricEnvelopeSignature{KeyID: "test-key", Alg: "ed25519", Signature: "test-signature"}
}
return s.signature, nil
}
type recordingEnvelopeSender struct {
envelope SignedFabricServiceChannelEnvelope
response FabricResponse
err error
}
func (s *recordingEnvelopeSender) Send(_ context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error) {
s.envelope = envelope
if s.err != nil {
return FabricResponse{}, s.err
}
return s.response, nil
}
@@ -0,0 +1,64 @@
package webingress
import (
"crypto/ed25519"
"encoding/base64"
"encoding/json"
"fmt"
"strings"
)
type TrustedKeyConfig struct {
KeyID string `json:"key_id"`
PublicKey string `json:"public_key"`
}
func ParseTrustedKeysJSON(value string) (StaticEnvelopeKeyResolver, error) {
value = strings.TrimSpace(value)
if value == "" {
return nil, nil
}
resolver := StaticEnvelopeKeyResolver{}
var byID map[string]string
if err := json.Unmarshal([]byte(value), &byID); err == nil && len(byID) > 0 {
for keyID, publicKeyB64 := range byID {
if err := resolver.addBase64(keyID, publicKeyB64); err != nil {
return nil, err
}
}
return resolver, nil
}
var list []TrustedKeyConfig
if err := json.Unmarshal([]byte(value), &list); err != nil {
return nil, fmt.Errorf("%w: trusted keys json must be object or array", ErrFabricEnvelopeSignatureInvalid)
}
for _, item := range list {
if err := resolver.addBase64(item.KeyID, item.PublicKey); err != nil {
return nil, err
}
}
return resolver, nil
}
func (r StaticEnvelopeKeyResolver) addBase64(keyID string, publicKeyB64 string) error {
keyID = strings.TrimSpace(keyID)
if keyID == "" {
return fmt.Errorf("%w: trusted key id required", ErrFabricEnvelopeSignatureInvalid)
}
decoded, err := decodeEnvelopeBase64(strings.TrimSpace(publicKeyB64))
if err != nil {
return fmt.Errorf("%w: trusted public key must be base64 encoded", ErrFabricEnvelopeSignatureInvalid)
}
if len(decoded) != ed25519.PublicKeySize {
return fmt.Errorf("%w: trusted public key must decode to %d bytes", ErrFabricEnvelopeSignatureInvalid, ed25519.PublicKeySize)
}
r[keyID] = append(ed25519.PublicKey(nil), decoded...)
return nil
}
func TrustedKeysJSONForPublicKey(keyID string, publicKey ed25519.PublicKey) string {
payload, _ := json.Marshal(map[string]string{
strings.TrimSpace(keyID): base64.StdEncoding.EncodeToString(publicKey),
})
return string(payload)
}
@@ -0,0 +1,64 @@
package webingress
import (
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"errors"
"testing"
)
func TestParseTrustedKeysJSONAcceptsMapAndArray(t *testing.T) {
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
keyB64 := base64.StdEncoding.EncodeToString(publicKey)
resolver, err := ParseTrustedKeysJSON(`{"key-1":"` + keyB64 + `"}`)
if err != nil {
t.Fatalf("parse map: %v", err)
}
if got, ok, err := resolver.PublicKey(nil, "key-1"); err != nil || !ok || string(got) != string(publicKey) {
t.Fatalf("map resolver got=%x ok=%t err=%v", got, ok, err)
}
resolver, err = ParseTrustedKeysJSON(`[{"key_id":"key-2","public_key":"` + keyB64 + `"}]`)
if err != nil {
t.Fatalf("parse array: %v", err)
}
if _, ok, err := resolver.PublicKey(nil, "key-2"); err != nil || !ok {
t.Fatalf("array resolver ok=%t err=%v", ok, err)
}
}
func TestParseTrustedKeysJSONRejectsInvalidKeys(t *testing.T) {
_, err := ParseTrustedKeysJSON(`{"":"abc"}`)
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
t.Fatalf("empty key err = %v", err)
}
_, err = ParseTrustedKeysJSON(`{"key-1":"abc"}`)
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
t.Fatalf("bad public key err = %v", err)
}
_, err = ParseTrustedKeysJSON(`not-json`)
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
t.Fatalf("bad json err = %v", err)
}
}
func TestTrustedKeysJSONForPublicKey(t *testing.T) {
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
resolver, err := ParseTrustedKeysJSON(TrustedKeysJSONForPublicKey("key-1", publicKey))
if err != nil {
t.Fatalf("parse generated json: %v", err)
}
if _, ok, err := resolver.PublicKey(nil, "key-1"); err != nil || !ok {
t.Fatalf("generated resolver ok=%t err=%v", ok, err)
}
}
@@ -0,0 +1,182 @@
package webingress
import (
"context"
"crypto/tls"
"errors"
"net"
"net/http"
"strings"
"sync"
"time"
)
type ListenerConfig struct {
RuntimeConfig
HTTPAddr string
HTTPSAddr string
TLSCertFile string
TLSKeyFile string
Binder FabricBinder
}
type ListenerStatus struct {
SchemaVersion string `json:"schema_version"`
Running bool `json:"running"`
HTTPRunning bool `json:"http_running"`
HTTPSRunning bool `json:"https_running"`
HTTPAddr string `json:"http_addr,omitempty"`
HTTPSAddr string `json:"https_addr,omitempty"`
Reason string `json:"reason,omitempty"`
Errors []string `json:"errors,omitempty"`
ObservedAt string `json:"observed_at"`
}
type Manager struct {
mu sync.Mutex
http *http.Server
https *http.Server
status ListenerStatus
now func() time.Time
}
func NewManager() *Manager {
return &Manager{now: time.Now}
}
func (m *Manager) Apply(ctx context.Context, cfg ListenerConfig) ListenerStatus {
m.mu.Lock()
defer m.mu.Unlock()
_ = m.stopLocked(ctx)
runtime := Runtime{Config: cfg.RuntimeConfig, Binder: cfg.Binder, Now: m.now}
status := ListenerStatus{
SchemaVersion: "rap.web_ingress.listener_status.v1",
Reason: "started",
ObservedAt: m.observedAt(),
}
errorsOut := []string{}
if strings.TrimSpace(cfg.HTTPAddr) == "" {
cfg.HTTPAddr = ":80"
}
if strings.TrimSpace(cfg.HTTPSAddr) == "" {
cfg.HTTPSAddr = ":443"
}
if server, addr, err := startHTTPServer(ctx, cfg.HTTPAddr, runtime.HTTPHandler()); err == nil {
m.http = server
status.HTTPRunning = true
status.HTTPAddr = addr
} else {
errorsOut = append(errorsOut, "http:"+err.Error())
}
if cfg.TLSCertFile == "" || cfg.TLSKeyFile == "" {
errorsOut = append(errorsOut, "https:tls_cert_file_and_key_file_required")
} else if server, addr, err := startHTTPSServer(ctx, cfg.HTTPSAddr, cfg.TLSCertFile, cfg.TLSKeyFile, runtime.HTTPSHandler()); err == nil {
m.https = server
status.HTTPSRunning = true
status.HTTPSAddr = addr
} else {
errorsOut = append(errorsOut, "https:"+err.Error())
}
status.Running = status.HTTPRunning || status.HTTPSRunning
if len(errorsOut) > 0 {
status.Errors = errorsOut
if status.Running {
status.Reason = "partial"
} else {
status.Reason = "blocked"
}
}
m.status = status
return status
}
func (m *Manager) Stop(ctx context.Context) ListenerStatus {
m.mu.Lock()
defer m.mu.Unlock()
_ = m.stopLocked(ctx)
m.status = ListenerStatus{
SchemaVersion: "rap.web_ingress.listener_status.v1",
Reason: "stopped",
ObservedAt: m.observedAt(),
}
return m.status
}
func (m *Manager) Status() ListenerStatus {
m.mu.Lock()
defer m.mu.Unlock()
if m.status.SchemaVersion == "" {
return ListenerStatus{
SchemaVersion: "rap.web_ingress.listener_status.v1",
Reason: "not_started",
ObservedAt: m.observedAt(),
}
}
return m.status
}
func (m *Manager) stopLocked(ctx context.Context) error {
var out error
if m.http != nil {
out = errors.Join(out, m.http.Shutdown(ctx))
m.http = nil
}
if m.https != nil {
out = errors.Join(out, m.https.Shutdown(ctx))
m.https = nil
}
return out
}
func (m *Manager) observedAt() string {
now := time.Now().UTC()
if m.now != nil {
now = m.now().UTC()
}
return now.Format(time.RFC3339Nano)
}
func startHTTPServer(ctx context.Context, addr string, handler http.Handler) (*http.Server, string, error) {
listener, err := net.Listen("tcp", addr)
if err != nil {
return nil, "", err
}
server := &http.Server{Handler: handler, ReadHeaderTimeout: 5 * time.Second}
go func() {
<-ctx.Done()
_ = server.Shutdown(context.Background())
}()
go func() {
if err := server.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
_ = server.Close()
}
}()
return server, listener.Addr().String(), nil
}
func startHTTPSServer(ctx context.Context, addr, certFile, keyFile string, handler http.Handler) (*http.Server, string, error) {
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
if err != nil {
return nil, "", err
}
listener, err := net.Listen("tcp", addr)
if err != nil {
return nil, "", err
}
server := &http.Server{
Handler: handler,
ReadHeaderTimeout: 5 * time.Second,
TLSConfig: &tls.Config{MinVersion: tls.VersionTLS12, Certificates: []tls.Certificate{cert}},
}
go func() {
<-ctx.Done()
_ = server.Shutdown(context.Background())
}()
go func() {
if err := server.ServeTLS(listener, "", ""); err != nil && !errors.Is(err, http.ErrServerClosed) {
_ = server.Close()
}
}()
return server, listener.Addr().String(), nil
}
@@ -0,0 +1,105 @@
package webingress
import (
"context"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"math/big"
"net/http"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestManagerStartsHTTPRedirectAndStops(t *testing.T) {
manager := NewManager()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
status := manager.Apply(ctx, ListenerConfig{
RuntimeConfig: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
HTTPAddr: "127.0.0.1:0",
HTTPSAddr: "127.0.0.1:0",
})
if !status.HTTPRunning || status.HTTPSRunning || !status.Running || status.HTTPAddr == "" {
t.Fatalf("status = %+v", status)
}
if status.Reason != "partial" || !containsError(status.Errors, "https:tls_cert_file_and_key_file_required") {
t.Fatalf("status = %+v", status)
}
client := &http.Client{CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse }}
resp, err := client.Get("http://" + status.HTTPAddr + "/cluster-admin")
if err != nil {
t.Fatalf("http get: %v", err)
}
_ = resp.Body.Close()
if resp.StatusCode != http.StatusPermanentRedirect {
t.Fatalf("status = %d", resp.StatusCode)
}
stopped := manager.Stop(context.Background())
if stopped.Running || stopped.Reason != "stopped" {
t.Fatalf("stopped = %+v", stopped)
}
}
func TestManagerStartsHTTPSWhenCertificateProvided(t *testing.T) {
dir := t.TempDir()
certFile, keyFile := writeSelfSignedCert(t, dir)
manager := NewManager()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
status := manager.Apply(ctx, ListenerConfig{
RuntimeConfig: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
HTTPAddr: "127.0.0.1:0",
HTTPSAddr: "127.0.0.1:0",
TLSCertFile: certFile,
TLSKeyFile: keyFile,
})
if !status.HTTPRunning || !status.HTTPSRunning || status.HTTPAddr == "" || status.HTTPSAddr == "" || len(status.Errors) != 0 {
t.Fatalf("status = %+v", status)
}
}
func writeSelfSignedCert(t *testing.T, dir string) (string, string) {
t.Helper()
key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
t.Fatalf("generate key: %v", err)
}
template := x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: "localhost"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(time.Hour),
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
DNSNames: []string{"localhost"},
}
der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
if err != nil {
t.Fatalf("create cert: %v", err)
}
certFile := filepath.Join(dir, "cert.pem")
keyFile := filepath.Join(dir, "key.pem")
if err := os.WriteFile(certFile, pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}), 0o600); err != nil {
t.Fatalf("write cert: %v", err)
}
if err := os.WriteFile(keyFile, pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}), 0o600); err != nil {
t.Fatalf("write key: %v", err)
}
return certFile, keyFile
}
func containsError(values []string, needle string) bool {
for _, value := range values {
if value == needle || strings.Contains(value, needle) {
return true
}
}
return false
}
@@ -0,0 +1,217 @@
package webingress
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
)
var (
ErrMeshEnvelopeRuntimeRequired = errors.New("web ingress mesh envelope runtime required")
ErrMeshEnvelopeRouteRequired = errors.New("web ingress mesh envelope route set required")
ErrMeshEnvelopeIdentityInvalid = errors.New("web ingress mesh envelope identity invalid")
)
type FabricChannelReliableRuntime interface {
SendReliable(ctx context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payloads [][]byte) (mesh.FabricChannelRuntimeResult, error)
}
type FabricChannelRequestResponseRuntime interface {
SendRequestResponse(ctx context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payload []byte) (mesh.FabricChannelRequestResponseResult, error)
}
type MeshEnvelopeSender struct {
Runtime FabricChannelReliableRuntime
ResponseRuntime FabricChannelRequestResponseRuntime
RouteSet mesh.FabricRouteSet
ClusterID string
SourceNodeID string
TargetKind mesh.FabricChannelTargetKind
TargetID string
ChannelID string
Now func() time.Time
}
type MeshEnvelopeDeliveryResponse struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
ChannelID string `json:"channel_id"`
RouteID string `json:"route_id,omitempty"`
TargetNode string `json:"target_node,omitempty"`
BytesSent uint64 `json:"bytes_sent"`
FramesSent uint64 `json:"frames_sent"`
AcksReceived uint64 `json:"acks_received"`
MigrationEvents int `json:"migration_events"`
}
func (s MeshEnvelopeSender) Send(ctx context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error) {
if s.Runtime == nil && s.ResponseRuntime == nil {
return FabricResponse{}, ErrMeshEnvelopeRuntimeRequired
}
if strings.TrimSpace(s.RouteSet.Primary.RouteID) == "" && len(s.RouteSet.WarmStandby) == 0 && len(s.RouteSet.ColdFallbacks) == 0 {
return FabricResponse{}, ErrMeshEnvelopeRouteRequired
}
spec, err := s.channelSpec(envelope)
if err != nil {
return FabricResponse{}, err
}
payload, err := json.Marshal(envelope)
if err != nil {
return FabricResponse{}, err
}
if s.ResponseRuntime != nil {
result, err := s.ResponseRuntime.SendRequestResponse(ctx, spec, s.routeSet(spec), payload)
if err != nil {
return FabricResponse{}, err
}
responsePayload, err := unwrapWebIngressForwardResponse(result.ResponsePayload)
if err != nil {
return FabricResponse{}, err
}
if response, ok := decodeRuntimeHTTPResponse(responsePayload); ok {
return response, nil
}
return acceptedDeliveryResponse(spec.ChannelID, result.FabricChannelRuntimeResult)
}
result, err := s.Runtime.SendReliable(ctx, spec, s.routeSet(spec), [][]byte{payload})
if err != nil {
return FabricResponse{}, err
}
return acceptedDeliveryResponse(spec.ChannelID, result)
}
func unwrapWebIngressForwardResponse(payload []byte) ([]byte, error) {
var response struct {
Payload json.RawMessage `json:"payload,omitempty"`
Error string `json:"error,omitempty"`
}
if len(payload) == 0 || json.Unmarshal(payload, &response) != nil {
return payload, nil
}
if strings.TrimSpace(response.Error) != "" {
return nil, fmt.Errorf("%w: %s", ErrMeshEnvelopeRuntimeRequired, response.Error)
}
if len(response.Payload) == 0 {
return payload, nil
}
return append([]byte(nil), response.Payload...), nil
}
func acceptedDeliveryResponse(channelID string, result mesh.FabricChannelRuntimeResult) (FabricResponse, error) {
response, err := json.Marshal(MeshEnvelopeDeliveryResponse{
SchemaVersion: "rap.web_ingress.mesh_envelope_delivery_response.v1",
Status: "accepted",
ChannelID: channelID,
RouteID: result.Channel.RouteID,
TargetNode: result.Channel.TargetNode,
BytesSent: result.BytesSent,
FramesSent: result.FramesSent,
AcksReceived: result.AcksReceived,
MigrationEvents: result.MigrationEvents,
})
if err != nil {
return FabricResponse{}, err
}
return FabricResponse{
StatusCode: http.StatusAccepted,
Headers: http.Header{"Content-Type": []string{"application/json"}},
Body: response,
}, nil
}
func decodeRuntimeHTTPResponse(payload []byte) (FabricResponse, bool) {
var response struct {
SchemaVersion string `json:"schema_version"`
StatusCode int `json:"status_code"`
Headers map[string][]string `json:"headers,omitempty"`
BodyBase64 string `json:"body_b64,omitempty"`
Body string `json:"body,omitempty"`
}
if len(payload) == 0 || json.Unmarshal(payload, &response) != nil {
return FabricResponse{}, false
}
if response.SchemaVersion != FabricRuntimeResponseSchema {
return FabricResponse{}, false
}
body := []byte(response.Body)
if response.BodyBase64 != "" {
decoded, err := decodeEnvelopeBase64(response.BodyBase64)
if err != nil {
return FabricResponse{}, false
}
body = decoded
}
headers := http.Header{}
for key, values := range response.Headers {
if !safeResponseHeader(key) {
continue
}
for _, value := range values {
headers.Add(key, value)
}
}
return FabricResponse{StatusCode: response.StatusCode, Headers: headers, Body: body}, true
}
func (s MeshEnvelopeSender) channelSpec(envelope SignedFabricServiceChannelEnvelope) (mesh.FabricChannelSpec, error) {
clusterID := strings.TrimSpace(s.ClusterID)
sourceNodeID := strings.TrimSpace(s.SourceNodeID)
targetID := strings.TrimSpace(s.TargetID)
if clusterID == "" || sourceNodeID == "" || targetID == "" {
return mesh.FabricChannelSpec{}, ErrMeshEnvelopeIdentityInvalid
}
targetKind := s.TargetKind
if targetKind == "" {
targetKind = mesh.FabricChannelTargetPool
}
channelID := strings.TrimSpace(s.ChannelID)
if channelID == "" {
channelID = defaultMeshEnvelopeChannelID(envelope, s.now())
}
spec := mesh.FabricChannelSpec{
ChannelID: channelID,
ClusterID: clusterID,
SourceNodeID: sourceNodeID,
TargetKind: targetKind,
TargetID: targetID,
TrafficClass: "control",
StickyKey: envelope.Envelope.Scope + ":" + envelope.Envelope.ServiceClass,
CreatedAt: s.now(),
}
if err := mesh.ValidateFabricChannelSpec(spec); err != nil {
return mesh.FabricChannelSpec{}, err
}
return spec, nil
}
func (s MeshEnvelopeSender) routeSet(spec mesh.FabricChannelSpec) mesh.FabricRouteSet {
routeSet := s.RouteSet
if routeSet.TargetKind == "" {
routeSet.TargetKind = spec.TargetKind
}
if strings.TrimSpace(routeSet.TargetID) == "" {
routeSet.TargetID = spec.TargetID
}
return routeSet
}
func (s MeshEnvelopeSender) now() time.Time {
if s.Now != nil {
return s.Now().UTC()
}
return time.Now().UTC()
}
func defaultMeshEnvelopeChannelID(envelope SignedFabricServiceChannelEnvelope, now time.Time) string {
serviceClass := strings.ReplaceAll(strings.TrimSpace(envelope.Envelope.ServiceClass), "_", "-")
if serviceClass == "" {
serviceClass = "web-ingress"
}
return fmt.Sprintf("web-ingress-%s-%d", serviceClass, now.UnixNano())
}
@@ -0,0 +1,267 @@
package webingress
import (
"context"
"encoding/json"
"errors"
"net/http"
"testing"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
)
func TestMeshEnvelopeSenderSendsSignedEnvelopeOverReliableFabricRuntime(t *testing.T) {
runtime := &recordingReliableRuntime{
result: mesh.FabricChannelRuntimeResult{
Channel: mesh.FabricChannel{RouteID: "route-fast", TargetNode: "node-runtime"},
BytesSent: 123,
FramesSent: 1,
AcksReceived: 1,
},
}
sender := MeshEnvelopeSender{
Runtime: runtime,
RouteSet: testWebIngressRouteSet(),
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetKind: mesh.FabricChannelTargetPool,
TargetID: "pool-admin-runtime",
ChannelID: "channel-web-1",
Now: fixedEnvelopeNow,
}
envelope := SignedFabricServiceChannelEnvelope{
SchemaVersion: "rap.web_ingress.signed_fabric_service_channel_envelope.v1",
Envelope: FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
Scope: "platform",
ServiceClass: "platform_admin",
},
Signature: FabricEnvelopeSignature{KeyID: "node-key", Alg: "ed25519", Signature: "sig"},
}
response, err := sender.Send(context.Background(), envelope)
if err != nil {
t.Fatalf("send: %v", err)
}
if response.StatusCode != http.StatusAccepted || response.Headers.Get("Content-Type") != "application/json" {
t.Fatalf("response = %+v", response)
}
if runtime.spec.ChannelID != "channel-web-1" ||
runtime.spec.ClusterID != "cluster-1" ||
runtime.spec.SourceNodeID != "node-ingress" ||
runtime.spec.TargetID != "pool-admin-runtime" ||
runtime.spec.TargetKind != mesh.FabricChannelTargetPool ||
runtime.spec.TrafficClass != "control" ||
runtime.spec.StickyKey != "platform:platform_admin" {
t.Fatalf("spec = %+v", runtime.spec)
}
if runtime.routeSet.TargetID != "pool-admin-runtime" || len(runtime.payloads) != 1 {
t.Fatalf("route/payload = %+v payloads=%d", runtime.routeSet, len(runtime.payloads))
}
var delivered SignedFabricServiceChannelEnvelope
if err := json.Unmarshal(runtime.payloads[0], &delivered); err != nil {
t.Fatalf("decode delivered envelope: %v", err)
}
if delivered.Signature.Signature != "sig" || delivered.Envelope.ServiceClass != "platform_admin" {
t.Fatalf("delivered = %+v", delivered)
}
var body MeshEnvelopeDeliveryResponse
if err := json.Unmarshal(response.Body, &body); err != nil {
t.Fatalf("decode response: %v", err)
}
if body.SchemaVersion != "rap.web_ingress.mesh_envelope_delivery_response.v1" ||
body.Status != "accepted" ||
body.RouteID != "route-fast" ||
body.AcksReceived != 1 {
t.Fatalf("body = %+v", body)
}
}
func TestMeshEnvelopeSenderReturnsRuntimeHTTPResponse(t *testing.T) {
runtime := &recordingRequestResponseRuntime{
result: mesh.FabricChannelRequestResponseResult{
FabricChannelRuntimeResult: mesh.FabricChannelRuntimeResult{
Channel: mesh.FabricChannel{RouteID: "route-runtime", TargetNode: "node-runtime"},
BytesSent: 123,
BytesRecv: 16,
FramesSent: 1,
FramesRecv: 1,
AcksReceived: 1,
},
ResponsePayload: []byte(`{"payload":{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":201,"headers":{"X-RAP-Runtime":["ok"],"Set-Cookie":["blocked"]},"body_b64":"eyJvayI6dHJ1ZX0="}}`),
},
}
sender := MeshEnvelopeSender{
ResponseRuntime: runtime,
RouteSet: testWebIngressRouteSet(),
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetKind: mesh.FabricChannelTargetPool,
TargetID: "pool-admin-runtime",
ChannelID: "channel-web-1",
Now: fixedEnvelopeNow,
}
response, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
SchemaVersion: "rap.web_ingress.signed_fabric_service_channel_envelope.v1",
Envelope: FabricServiceChannelEnvelope{SchemaVersion: FabricServiceChannelEnvelopeSchema, Scope: "platform", ServiceClass: "platform_admin"},
Signature: FabricEnvelopeSignature{KeyID: "node-key", Alg: "ed25519", Signature: "sig"},
})
if err != nil {
t.Fatalf("send: %v", err)
}
if response.StatusCode != http.StatusCreated || response.Headers.Get("X-RAP-Runtime") != "ok" || response.Headers.Get("Set-Cookie") != "" || string(response.Body) != `{"ok":true}` {
t.Fatalf("response = %+v body=%s", response, string(response.Body))
}
if runtime.spec.ChannelID != "channel-web-1" || len(runtime.payload) == 0 {
t.Fatalf("runtime spec=%+v payload=%s", runtime.spec, string(runtime.payload))
}
}
func TestMeshEnvelopeSenderReportsWrappedRuntimeError(t *testing.T) {
sender := MeshEnvelopeSender{
ResponseRuntime: &recordingRequestResponseRuntime{
result: mesh.FabricChannelRequestResponseResult{ResponsePayload: []byte(`{"error":"runtime unavailable"}`)},
},
RouteSet: testWebIngressRouteSet(),
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetID: "pool-admin-runtime",
ChannelID: "channel-web-1",
}
_, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
Envelope: FabricServiceChannelEnvelope{Scope: "platform", ServiceClass: "platform_admin"},
})
if !errors.Is(err, ErrMeshEnvelopeRuntimeRequired) {
t.Fatalf("err = %v", err)
}
}
func TestMeshEnvelopeSenderFallsBackToDeliveryAckForNonHTTPRuntimePayload(t *testing.T) {
runtime := &recordingRequestResponseRuntime{
result: mesh.FabricChannelRequestResponseResult{
FabricChannelRuntimeResult: mesh.FabricChannelRuntimeResult{
Channel: mesh.FabricChannel{RouteID: "route-runtime", TargetNode: "node-runtime"},
BytesSent: 123,
FramesSent: 1,
AcksReceived: 1,
},
ResponsePayload: []byte(`{"not":"http"}`),
},
}
sender := MeshEnvelopeSender{
ResponseRuntime: runtime,
RouteSet: testWebIngressRouteSet(),
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetID: "pool-admin-runtime",
ChannelID: "channel-web-1",
}
response, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
Envelope: FabricServiceChannelEnvelope{Scope: "platform", ServiceClass: "platform_admin"},
})
if err != nil {
t.Fatalf("send: %v", err)
}
if response.StatusCode != http.StatusAccepted {
t.Fatalf("response = %+v", response)
}
var body MeshEnvelopeDeliveryResponse
if err := json.Unmarshal(response.Body, &body); err != nil {
t.Fatalf("decode response: %v", err)
}
if body.Status != "accepted" || body.RouteID != "route-runtime" {
t.Fatalf("body = %+v", body)
}
}
func TestMeshEnvelopeSenderReportsRuntimeRouteAndIdentityErrors(t *testing.T) {
_, err := (MeshEnvelopeSender{}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
if !errors.Is(err, ErrMeshEnvelopeRuntimeRequired) {
t.Fatalf("runtime error = %v", err)
}
_, err = (MeshEnvelopeSender{
Runtime: &recordingReliableRuntime{},
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetID: "pool-admin-runtime",
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
if !errors.Is(err, ErrMeshEnvelopeRouteRequired) {
t.Fatalf("route error = %v", err)
}
_, err = (MeshEnvelopeSender{
Runtime: &recordingReliableRuntime{},
RouteSet: testWebIngressRouteSet(),
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
if !errors.Is(err, ErrMeshEnvelopeIdentityInvalid) {
t.Fatalf("identity error = %v", err)
}
}
func TestMeshEnvelopeSenderPropagatesReliableRuntimeFailure(t *testing.T) {
sendErr := errors.New("send failed")
_, err := (MeshEnvelopeSender{
Runtime: &recordingReliableRuntime{err: sendErr},
RouteSet: testWebIngressRouteSet(),
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
TargetID: "pool-admin-runtime",
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
if !errors.Is(err, sendErr) {
t.Fatalf("send error = %v", err)
}
}
type recordingReliableRuntime struct {
spec mesh.FabricChannelSpec
routeSet mesh.FabricRouteSet
payloads [][]byte
result mesh.FabricChannelRuntimeResult
err error
}
type recordingRequestResponseRuntime struct {
spec mesh.FabricChannelSpec
routeSet mesh.FabricRouteSet
payload []byte
result mesh.FabricChannelRequestResponseResult
err error
}
func (r *recordingRequestResponseRuntime) SendRequestResponse(_ context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payload []byte) (mesh.FabricChannelRequestResponseResult, error) {
r.spec = spec
r.routeSet = routeSet
r.payload = payload
if r.err != nil {
return mesh.FabricChannelRequestResponseResult{}, r.err
}
return r.result, nil
}
func (r *recordingReliableRuntime) SendReliable(_ context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payloads [][]byte) (mesh.FabricChannelRuntimeResult, error) {
r.spec = spec
r.routeSet = routeSet
r.payloads = payloads
if r.err != nil {
return mesh.FabricChannelRuntimeResult{}, r.err
}
return r.result, nil
}
func testWebIngressRouteSet() mesh.FabricRouteSet {
return mesh.FabricRouteSet{
Primary: mesh.FabricRoute{
RouteID: "route-fast",
ClusterID: "cluster-1",
SourceNodeID: "node-ingress",
DestinationNodeID: "node-runtime",
PoolID: "pool-admin-runtime",
Healthy: true,
Capacity: 100,
},
}
}
@@ -0,0 +1,219 @@
package webingress
import (
"context"
"crypto/ed25519"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
"time"
)
const (
SignedFabricServiceChannelEnvelopeSchema = "rap.web_ingress.signed_fabric_service_channel_envelope.v1"
FabricRuntimeResponseSchema = "rap.web_ingress.fabric_runtime_response.v1"
)
var (
ErrFabricEnvelopeSignatureInvalid = errors.New("web ingress fabric envelope signature invalid")
ErrFabricEnvelopeUnauthorized = errors.New("web ingress fabric envelope unauthorized")
ErrFabricEnvelopeRuntimeRequired = errors.New("web ingress fabric runtime handler required")
)
type EnvelopeKeyResolver interface {
PublicKey(ctx context.Context, keyID string) (ed25519.PublicKey, bool, error)
}
type EnvelopeRuntimeHandler interface {
HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error)
}
type RuntimeHandlerFunc func(ctx context.Context, request FabricRequest) (FabricResponse, error)
func (f RuntimeHandlerFunc) HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error) {
return f(ctx, request)
}
type ReceiverConfig struct {
ServiceType string
Scope string
ServiceClasses []string
MaxClockSkew time.Duration
}
type FabricRuntimeReceiver struct {
Config ReceiverConfig
Keys EnvelopeKeyResolver
Handler EnvelopeRuntimeHandler
Now func() time.Time
}
type StaticEnvelopeKeyResolver map[string]ed25519.PublicKey
func (r StaticEnvelopeKeyResolver) PublicKey(_ context.Context, keyID string) (ed25519.PublicKey, bool, error) {
key, ok := r[strings.TrimSpace(keyID)]
if !ok {
return nil, false, nil
}
return append(ed25519.PublicKey(nil), key...), true, nil
}
func (r FabricRuntimeReceiver) Receive(ctx context.Context, payload []byte) ([]byte, error) {
response, err := r.ReceiveResponse(ctx, payload)
if err != nil {
return nil, err
}
return encodeFabricRuntimeResponse(response)
}
func (r FabricRuntimeReceiver) ReceiveResponse(ctx context.Context, payload []byte) (FabricResponse, error) {
if r.Handler == nil {
return FabricResponse{}, ErrFabricEnvelopeRuntimeRequired
}
var signed SignedFabricServiceChannelEnvelope
if err := json.Unmarshal(payload, &signed); err != nil {
return FabricResponse{}, fmt.Errorf("%w: invalid signed envelope json", ErrFabricEnvelopeSignatureInvalid)
}
if err := r.verify(ctx, signed); err != nil {
return FabricResponse{}, err
}
request, err := requestFromEnvelope(signed.Envelope)
if err != nil {
return FabricResponse{}, err
}
return r.Handler.HandleFabricRequest(ctx, request)
}
func (r FabricRuntimeReceiver) verify(ctx context.Context, signed SignedFabricServiceChannelEnvelope) error {
if signed.SchemaVersion != SignedFabricServiceChannelEnvelopeSchema {
return fmt.Errorf("%w: signed schema mismatch", ErrFabricEnvelopeSignatureInvalid)
}
if signed.Envelope.SchemaVersion != FabricServiceChannelEnvelopeSchema ||
strings.TrimSpace(signed.Envelope.Scope) == "" ||
strings.TrimSpace(signed.Envelope.ServiceClass) == "" {
return fmt.Errorf("%w: envelope contract invalid", ErrFabricEnvelopeSignatureInvalid)
}
if scope := strings.TrimSpace(r.Config.Scope); scope != "" && signed.Envelope.Scope != scope {
return fmt.Errorf("%w: scope mismatch", ErrFabricEnvelopeUnauthorized)
}
if len(r.Config.ServiceClasses) > 0 && !contains(r.Config.ServiceClasses, signed.Envelope.ServiceClass) {
return fmt.Errorf("%w: service class not allowed", ErrFabricEnvelopeUnauthorized)
}
if err := r.verifyClock(signed.Envelope); err != nil {
return err
}
if r.Keys == nil {
return fmt.Errorf("%w: key resolver required", ErrFabricEnvelopeSignatureInvalid)
}
keyID := strings.TrimSpace(signed.Signature.KeyID)
publicKey, ok, err := r.Keys.PublicKey(ctx, keyID)
if err != nil {
return err
}
if !ok || len(publicKey) != ed25519.PublicKeySize {
return fmt.Errorf("%w: signing key not trusted", ErrFabricEnvelopeUnauthorized)
}
if signed.Signature.Alg != "ed25519" {
return fmt.Errorf("%w: algorithm mismatch", ErrFabricEnvelopeSignatureInvalid)
}
signature, err := decodeEnvelopeBase64(signed.Signature.Signature)
if err != nil || len(signature) != ed25519.SignatureSize {
return fmt.Errorf("%w: signature must be base64 ed25519", ErrFabricEnvelopeSignatureInvalid)
}
canonical, err := json.Marshal(signed.Envelope)
if err != nil {
return err
}
if !ed25519.Verify(publicKey, canonical, signature) {
return ErrFabricEnvelopeSignatureInvalid
}
return nil
}
func (r FabricRuntimeReceiver) verifyClock(envelope FabricServiceChannelEnvelope) error {
maxSkew := r.Config.MaxClockSkew
if maxSkew <= 0 {
maxSkew = 5 * time.Minute
}
now := time.Now().UTC()
if r.Now != nil {
now = r.Now().UTC()
}
for _, value := range []string{envelope.ObservedAt, envelope.EnvelopedAt} {
if strings.TrimSpace(value) == "" {
continue
}
parsed, err := time.Parse(time.RFC3339Nano, value)
if err != nil {
return fmt.Errorf("%w: invalid envelope timestamp", ErrFabricEnvelopeSignatureInvalid)
}
if parsed.After(now.Add(maxSkew)) || parsed.Before(now.Add(-maxSkew)) {
return fmt.Errorf("%w: envelope timestamp outside skew", ErrFabricEnvelopeUnauthorized)
}
}
return nil
}
func requestFromEnvelope(envelope FabricServiceChannelEnvelope) (FabricRequest, error) {
body, err := base64.StdEncoding.DecodeString(envelope.BodyBase64)
if err != nil && envelope.BodyBase64 != "" {
return FabricRequest{}, fmt.Errorf("%w: invalid body_b64", ErrFabricEnvelopeSignatureInvalid)
}
observedAt, _ := time.Parse(time.RFC3339Nano, envelope.ObservedAt)
headers := http.Header{}
for key, values := range envelope.Headers {
if !safeRequestHeader(key) {
continue
}
for _, value := range values {
headers.Add(key, value)
}
}
return FabricRequest{
SchemaVersion: envelope.RequestSchema,
Method: envelope.Method,
Path: envelope.Path,
Query: envelope.Query,
Host: envelope.Host,
ServiceType: envelope.ServiceType,
Scope: envelope.Scope,
ServiceClass: envelope.ServiceClass,
Headers: headers,
Body: body,
ObservedAt: observedAt,
}, nil
}
func encodeFabricRuntimeResponse(response FabricResponse) ([]byte, error) {
headers := map[string][]string{}
for key, values := range response.Headers {
if !safeResponseHeader(key) {
continue
}
copied := append([]string(nil), values...)
if len(copied) > 0 {
headers[http.CanonicalHeaderKey(key)] = copied
}
}
payload := struct {
SchemaVersion string `json:"schema_version"`
StatusCode int `json:"status_code"`
Headers map[string][]string `json:"headers,omitempty"`
BodyBase64 string `json:"body_b64,omitempty"`
}{
SchemaVersion: FabricRuntimeResponseSchema,
StatusCode: response.StatusCode,
Headers: headers,
BodyBase64: base64.StdEncoding.EncodeToString(response.Body),
}
if payload.StatusCode < 100 || payload.StatusCode > 599 {
payload.StatusCode = http.StatusOK
}
if len(payload.Headers) == 0 {
payload.Headers = nil
}
return json.Marshal(payload)
}
@@ -0,0 +1,194 @@
package webingress
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
"net/http"
"testing"
)
func TestFabricRuntimeReceiverVerifiesEnvelopeAndReturnsRuntimeResponse(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
keyID := ed25519EnvelopeKeyID(publicKey)
receiver := FabricRuntimeReceiver{
Config: ReceiverConfig{ServiceType: "global-admin-runtime", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
Keys: StaticEnvelopeKeyResolver{keyID: publicKey},
Handler: recordingRuntimeHandler{response: FabricResponse{
StatusCode: http.StatusCreated,
Headers: http.Header{"X-RAP-Runtime": []string{"ok"}, "Set-Cookie": []string{"blocked"}},
Body: []byte(`{"ok":true}`),
}},
Now: fixedEnvelopeNow,
}
payload := signedReceiverEnvelope(t, privateKey, keyID, FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
RequestSchema: "rap.web_ingress.fabric_request.v1",
Method: http.MethodPost,
Path: "/platform-admin/root",
Query: "tab=nodes",
Host: "admin.example.test",
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClass: "platform_admin",
Headers: map[string][]string{"X-Trace-Id": {"trace-1"}},
BodyBase64: base64.StdEncoding.EncodeToString([]byte(`{"hello":"world"}`)),
ObservedAt: "2026-05-17T00:00:00Z",
EnvelopedAt: "2026-05-17T00:00:01Z",
})
responsePayload, err := receiver.Receive(context.Background(), payload)
if err != nil {
t.Fatalf("receive: %v", err)
}
var response struct {
SchemaVersion string `json:"schema_version"`
StatusCode int `json:"status_code"`
Headers map[string][]string `json:"headers"`
BodyBase64 string `json:"body_b64"`
}
if err := json.Unmarshal(responsePayload, &response); err != nil {
t.Fatalf("decode response: %v", err)
}
if response.SchemaVersion != FabricRuntimeResponseSchema ||
response.StatusCode != http.StatusCreated ||
response.Headers["X-Rap-Runtime"][0] != "ok" ||
response.Headers["Set-Cookie"] != nil ||
response.BodyBase64 != "eyJvayI6dHJ1ZX0=" {
t.Fatalf("response = %+v", response)
}
}
func TestFabricRuntimeReceiverRejectsBadSignatureScopeClassAndStaleEnvelope(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
keyID := ed25519EnvelopeKeyID(publicKey)
receiver := FabricRuntimeReceiver{
Config: ReceiverConfig{Scope: "platform", ServiceClasses: []string{"platform_admin"}},
Keys: StaticEnvelopeKeyResolver{keyID: publicKey},
Handler: recordingRuntimeHandler{},
Now: fixedEnvelopeNow,
}
base := FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
RequestSchema: "rap.web_ingress.fabric_request.v1",
Method: http.MethodGet,
Path: "/platform-admin/root",
Scope: "platform",
ServiceClass: "platform_admin",
ObservedAt: "2026-05-17T00:00:00Z",
EnvelopedAt: "2026-05-17T00:00:01Z",
}
badSignature := signedReceiverEnvelope(t, privateKey, keyID, base)
badSignature[len(badSignature)-2] = 'x'
if _, err := receiver.Receive(context.Background(), badSignature); !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
t.Fatalf("bad signature err = %v", err)
}
wrongScope := base
wrongScope.Scope = "organization"
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, wrongScope)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
t.Fatalf("wrong scope err = %v", err)
}
wrongClass := base
wrongClass.ServiceClass = "cluster_admin"
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, wrongClass)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
t.Fatalf("wrong class err = %v", err)
}
stale := base
stale.EnvelopedAt = "2026-05-16T00:00:00Z"
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, stale)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
t.Fatalf("stale err = %v", err)
}
}
func TestFabricRuntimeReceiverRequiresTrustedKeyAndHandler(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
keyID := ed25519EnvelopeKeyID(publicKey)
payload := signedReceiverEnvelope(t, privateKey, keyID, FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
Scope: "platform",
ServiceClass: "platform_admin",
ObservedAt: "2026-05-17T00:00:00Z",
EnvelopedAt: "2026-05-17T00:00:01Z",
})
_, err = (FabricRuntimeReceiver{Keys: StaticEnvelopeKeyResolver{keyID: publicKey}, Now: fixedEnvelopeNow}).Receive(context.Background(), payload)
if !errors.Is(err, ErrFabricEnvelopeRuntimeRequired) {
t.Fatalf("handler err = %v", err)
}
_, err = (FabricRuntimeReceiver{Handler: recordingRuntimeHandler{}, Now: fixedEnvelopeNow}).Receive(context.Background(), payload)
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
t.Fatalf("key resolver err = %v", err)
}
_, otherPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate other key: %v", err)
}
untrusted := signedReceiverEnvelope(t, otherPrivateKey, "other-key", FabricServiceChannelEnvelope{
SchemaVersion: FabricServiceChannelEnvelopeSchema,
Scope: "platform",
ServiceClass: "platform_admin",
ObservedAt: "2026-05-17T00:00:00Z",
EnvelopedAt: "2026-05-17T00:00:01Z",
})
_, err = (FabricRuntimeReceiver{Keys: StaticEnvelopeKeyResolver{keyID: publicKey}, Handler: recordingRuntimeHandler{}, Now: fixedEnvelopeNow}).Receive(context.Background(), untrusted)
if !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
t.Fatalf("untrusted key err = %v", err)
}
}
func signedReceiverEnvelope(t *testing.T, privateKey ed25519.PrivateKey, keyID string, envelope FabricServiceChannelEnvelope) []byte {
t.Helper()
canonical, err := json.Marshal(envelope)
if err != nil {
t.Fatalf("marshal envelope: %v", err)
}
payload, err := json.Marshal(SignedFabricServiceChannelEnvelope{
SchemaVersion: SignedFabricServiceChannelEnvelopeSchema,
Envelope: envelope,
Signature: FabricEnvelopeSignature{
KeyID: keyID,
Alg: "ed25519",
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
SignedAt: "2026-05-17T00:00:01Z",
},
})
if err != nil {
t.Fatalf("marshal signed envelope: %v", err)
}
return payload
}
type recordingRuntimeHandler struct {
request FabricRequest
response FabricResponse
err error
}
func (h recordingRuntimeHandler) HandleFabricRequest(_ context.Context, request FabricRequest) (FabricResponse, error) {
h.request = request
if h.err != nil {
return FabricResponse{}, h.err
}
if h.response.StatusCode == 0 {
h.response = FabricResponse{StatusCode: http.StatusOK, Body: []byte(`{"ready":true}`)}
}
return h.response, nil
}
@@ -0,0 +1,243 @@
package webingress
import (
"context"
"encoding/json"
"io"
"net/http"
"strings"
"time"
)
type RuntimeConfig struct {
ServiceType string
Scope string
ServiceClasses []string
TLSMode string
HTTPPort int
HTTPSPort int
}
type Runtime struct {
Config RuntimeConfig
Binder FabricBinder
Now func() time.Time
}
type FabricBinder interface {
Forward(ctx context.Context, request FabricRequest) (FabricResponse, error)
}
type FabricRequest struct {
SchemaVersion string
Method string
Path string
Query string
Host string
ServiceType string
Scope string
ServiceClass string
Headers http.Header
Body []byte
ObservedAt time.Time
}
type FabricResponse struct {
StatusCode int
Headers http.Header
Body []byte
}
type Response struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
ServiceType string `json:"service_type,omitempty"`
Scope string `json:"scope,omitempty"`
ServiceClass string `json:"service_class,omitempty"`
Allowed []string `json:"allowed_service_classes,omitempty"`
ObservedAt string `json:"observed_at"`
}
func (r Runtime) HTTPHandler() http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
if strings.HasPrefix(req.URL.Path, "/.well-known/acme-challenge/") {
writeJSON(w, http.StatusNotFound, r.response("not_found", "acme_challenge_backend_not_configured", ""))
return
}
if req.URL.Path == "/healthz" || req.URL.Path == "/readyz" {
writeJSON(w, http.StatusOK, r.response("ready", "http_redirect_runtime_ready", ""))
return
}
target := "https://" + req.Host + req.URL.RequestURI()
w.Header().Set("Location", target)
w.Header().Set("Cache-Control", "no-store")
w.WriteHeader(http.StatusPermanentRedirect)
})
}
func (r Runtime) HTTPSHandler() http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
if req.URL.Path == "/healthz" || req.URL.Path == "/readyz" {
writeJSON(w, http.StatusOK, r.response("ready", "https_runtime_ready", ""))
return
}
serviceClass := strings.TrimSpace(req.Header.Get("X-RAP-Service-Class"))
if serviceClass == "" {
serviceClass = serviceClassFromPath(req.URL.Path)
}
if serviceClass == "" {
writeJSON(w, http.StatusBadRequest, r.response("blocked", "service_class_required", ""))
return
}
if !contains(r.Config.ServiceClasses, serviceClass) {
writeJSON(w, http.StatusForbidden, r.response("blocked", "service_class_not_allowed", serviceClass))
return
}
if r.Binder == nil {
writeJSON(w, http.StatusNotImplemented, r.response("blocked", "fabric_service_channel_binding_not_implemented", serviceClass))
return
}
scope := scopeForServiceClass(serviceClass, r.Config.Scope)
body, err := io.ReadAll(http.MaxBytesReader(w, req.Body, 1<<20))
if err != nil {
writeJSON(w, http.StatusRequestEntityTooLarge, r.response("blocked", "request_body_too_large", serviceClass))
return
}
now := time.Now().UTC()
if r.Now != nil {
now = r.Now().UTC()
}
fabricResponse, err := r.Binder.Forward(req.Context(), FabricRequest{
SchemaVersion: "rap.web_ingress.fabric_request.v1",
Method: req.Method,
Path: req.URL.Path,
Query: req.URL.RawQuery,
Host: req.Host,
ServiceType: strings.TrimSpace(r.Config.ServiceType),
Scope: scope,
ServiceClass: serviceClass,
Headers: cloneSafeHeaders(req.Header),
Body: body,
ObservedAt: now,
})
if err != nil {
writeJSON(w, http.StatusBadGateway, r.response("blocked", "fabric_service_channel_forward_failed", serviceClass))
return
}
writeFabricResponse(w, fabricResponse)
})
}
func (r Runtime) response(status, reason, serviceClass string) Response {
now := time.Now().UTC()
if r.Now != nil {
now = r.Now().UTC()
}
return Response{
SchemaVersion: "rap.web_ingress.runtime_response.v1",
Status: status,
Reason: reason,
ServiceType: strings.TrimSpace(r.Config.ServiceType),
Scope: strings.TrimSpace(r.Config.Scope),
ServiceClass: serviceClass,
Allowed: append([]string{}, r.Config.ServiceClasses...),
ObservedAt: now.Format(time.RFC3339Nano),
}
}
func scopeForServiceClass(serviceClass string, fallback string) string {
switch strings.TrimSpace(serviceClass) {
case "platform_admin":
return "platform"
case "cluster_admin":
return "cluster"
case "organization_portal":
return "organization"
case "user_portal":
return "user"
default:
return strings.TrimSpace(fallback)
}
}
func serviceClassFromPath(path string) string {
path = strings.Trim(strings.ToLower(path), "/")
switch {
case strings.HasPrefix(path, "platform-admin"):
return "platform_admin"
case strings.HasPrefix(path, "cluster-admin"):
return "cluster_admin"
case strings.HasPrefix(path, "organizations/"):
return "organization_portal"
case strings.HasPrefix(path, "users/"):
return "user_portal"
default:
return ""
}
}
func writeJSON(w http.ResponseWriter, status int, payload Response) {
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Cache-Control", "no-store")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(payload)
}
func writeFabricResponse(w http.ResponseWriter, payload FabricResponse) {
for key, values := range payload.Headers {
if !safeResponseHeader(key) {
continue
}
for _, value := range values {
w.Header().Add(key, value)
}
}
w.Header().Set("Cache-Control", "no-store")
status := payload.StatusCode
if status < 100 || status > 599 {
status = http.StatusOK
}
w.WriteHeader(status)
_, _ = w.Write(payload.Body)
}
func cloneSafeHeaders(headers http.Header) http.Header {
out := http.Header{}
for key, values := range headers {
if !safeRequestHeader(key) {
continue
}
for _, value := range values {
out.Add(key, value)
}
}
return out
}
func safeRequestHeader(key string) bool {
switch strings.ToLower(strings.TrimSpace(key)) {
case "authorization", "cookie", "set-cookie", "x-rap-service-channel-token":
return false
default:
return true
}
}
func safeResponseHeader(key string) bool {
switch strings.ToLower(strings.TrimSpace(key)) {
case "set-cookie", "transfer-encoding", "connection":
return false
default:
return true
}
}
func contains(values []string, needle string) bool {
for _, value := range values {
if value == needle {
return true
}
}
return false
}
@@ -0,0 +1,206 @@
package webingress
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func TestHTTPHandlerRedirectsToHTTPS(t *testing.T) {
runtime := Runtime{Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform"}}
req := httptest.NewRequest(http.MethodGet, "http://admin.example.test/cluster-admin/dashboard?x=1", nil)
rec := httptest.NewRecorder()
runtime.HTTPHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusPermanentRedirect {
t.Fatalf("status = %d", rec.Code)
}
if rec.Header().Get("Location") != "https://admin.example.test/cluster-admin/dashboard?x=1" {
t.Fatalf("Location = %q", rec.Header().Get("Location"))
}
}
func TestHTTPSHandlerBlocksUnknownServiceClass(t *testing.T) {
runtime := Runtime{
Config: RuntimeConfig{
ServiceType: "public-ingress",
Scope: "organization",
ServiceClasses: []string{"organization_portal", "user_portal"},
},
Now: fixedNow,
}
req := httptest.NewRequest(http.MethodGet, "https://org.example.test/platform-admin/root", nil)
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusForbidden {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
var payload Response
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if payload.Reason != "service_class_not_allowed" || payload.ServiceClass != "platform_admin" || payload.Scope != "organization" {
t.Fatalf("payload = %+v", payload)
}
}
func TestHTTPSHandlerRequiresFabricServiceChannelBinding(t *testing.T) {
runtime := Runtime{
Config: RuntimeConfig{
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClasses: []string{"platform_admin", "cluster_admin"},
},
Now: fixedNow,
}
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root", nil)
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusNotImplemented {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
var payload Response
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if payload.Reason != "fabric_service_channel_binding_not_implemented" ||
payload.ServiceClass != "platform_admin" ||
payload.ObservedAt != "2026-05-17T00:00:00Z" {
t.Fatalf("payload = %+v", payload)
}
}
func TestHTTPSHandlerForwardsAllowedRequestToBinder(t *testing.T) {
binder := &recordingBinder{
response: FabricResponse{
StatusCode: http.StatusAccepted,
Headers: http.Header{"X-RAP-Result": []string{"accepted"}},
Body: []byte(`{"ok":true}`),
},
}
runtime := Runtime{
Config: RuntimeConfig{
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClasses: []string{"platform_admin", "cluster_admin"},
},
Binder: binder,
Now: fixedNow,
}
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root?tab=nodes", strings.NewReader(`{"hello":"world"}`))
req.Header.Set("X-RAP-Service-Class", "platform_admin")
req.Header.Set("Authorization", "Bearer secret")
req.Header.Set("X-Trace-ID", "trace-1")
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusAccepted {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
if rec.Header().Get("X-RAP-Result") != "accepted" || rec.Body.String() != `{"ok":true}` {
t.Fatalf("unexpected response headers=%v body=%s", rec.Header(), rec.Body.String())
}
if binder.request.ServiceClass != "platform_admin" ||
binder.request.Scope != "platform" ||
binder.request.Path != "/platform-admin/root" ||
binder.request.Query != "tab=nodes" ||
string(binder.request.Body) != `{"hello":"world"}` {
t.Fatalf("request = %+v", binder.request)
}
if binder.request.Headers.Get("Authorization") != "" || binder.request.Headers.Get("X-Trace-ID") != "trace-1" {
t.Fatalf("headers = %#v", binder.request.Headers)
}
}
func TestHTTPSHandlerDerivesFabricScopeFromServiceClass(t *testing.T) {
binder := &recordingBinder{response: FabricResponse{StatusCode: http.StatusOK}}
runtime := Runtime{
Config: RuntimeConfig{
ServiceType: "admin-ingress",
Scope: "platform",
ServiceClasses: []string{"platform_admin", "cluster_admin"},
},
Binder: binder,
Now: fixedNow,
}
req := httptest.NewRequest(http.MethodGet, "https://admin.example.test/cluster-admin/ui-manifest", nil)
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
if binder.request.ServiceClass != "cluster_admin" || binder.request.Scope != "cluster" {
t.Fatalf("request = %+v", binder.request)
}
}
func TestHTTPSHandlerReportsBinderFailure(t *testing.T) {
runtime := Runtime{
Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
Binder: failingBinder{},
Now: fixedNow,
}
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root", nil)
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusBadGateway {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
var payload Response
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
t.Fatalf("decode response: %v", err)
}
if payload.Reason != "fabric_service_channel_forward_failed" {
t.Fatalf("payload = %+v", payload)
}
}
func TestHTTPSHandlerHealth(t *testing.T) {
runtime := Runtime{Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform"}, Now: fixedNow}
req := httptest.NewRequest(http.MethodGet, "https://admin.example.test/healthz", nil)
rec := httptest.NewRecorder()
runtime.HTTPSHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
}
}
func fixedNow() time.Time {
return time.Date(2026, 5, 17, 0, 0, 0, 0, time.UTC)
}
type recordingBinder struct {
request FabricRequest
response FabricResponse
}
func (b *recordingBinder) Forward(_ context.Context, request FabricRequest) (FabricResponse, error) {
b.request = request
return b.response, nil
}
type failingBinder struct{}
func (failingBinder) Forward(context.Context, FabricRequest) (FabricResponse, error) {
return FabricResponse{}, errTestBinderFailure{}
}
type errTestBinderFailure struct{}
func (errTestBinderFailure) Error() string { return "binder failed" }
@@ -0,0 +1,95 @@
package webingress
import (
"context"
"crypto/ed25519"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"strings"
"time"
)
var ErrFabricEnvelopeSigningKeyInvalid = errors.New("web ingress fabric envelope signing key invalid")
type Ed25519EnvelopeSigner struct {
PrivateKey ed25519.PrivateKey
KeyID string
Now func() time.Time
}
func NewEd25519EnvelopeSigner(privateKeyB64, keyID string) (Ed25519EnvelopeSigner, error) {
privateKey, err := decodeEd25519PrivateKey(privateKeyB64)
if err != nil {
return Ed25519EnvelopeSigner{}, err
}
keyID = strings.TrimSpace(keyID)
if keyID == "" {
publicKey, ok := privateKey.Public().(ed25519.PublicKey)
if !ok {
return Ed25519EnvelopeSigner{}, ErrFabricEnvelopeSigningKeyInvalid
}
keyID = ed25519EnvelopeKeyID(publicKey)
}
return Ed25519EnvelopeSigner{PrivateKey: privateKey, KeyID: keyID}, nil
}
func (s Ed25519EnvelopeSigner) Sign(_ context.Context, canonical []byte) (FabricEnvelopeSignature, error) {
if len(s.PrivateKey) != ed25519.PrivateKeySize {
return FabricEnvelopeSignature{}, ErrFabricEnvelopeSigningKeyInvalid
}
if len(canonical) == 0 {
return FabricEnvelopeSignature{}, fmt.Errorf("%w: canonical envelope empty", ErrFabricEnvelopeSigningKeyInvalid)
}
keyID := strings.TrimSpace(s.KeyID)
if keyID == "" {
publicKey, ok := s.PrivateKey.Public().(ed25519.PublicKey)
if !ok {
return FabricEnvelopeSignature{}, ErrFabricEnvelopeSigningKeyInvalid
}
keyID = ed25519EnvelopeKeyID(publicKey)
}
now := time.Now().UTC()
if s.Now != nil {
now = s.Now().UTC()
}
return FabricEnvelopeSignature{
KeyID: keyID,
Alg: "ed25519",
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(s.PrivateKey, canonical)),
SignedAt: now.Format(time.RFC3339Nano),
}, nil
}
func decodeEd25519PrivateKey(value string) (ed25519.PrivateKey, error) {
decoded, err := decodeEnvelopeBase64(strings.TrimSpace(value))
if err != nil {
return nil, fmt.Errorf("%w: private key must be base64 encoded", ErrFabricEnvelopeSigningKeyInvalid)
}
if len(decoded) != ed25519.PrivateKeySize {
return nil, fmt.Errorf("%w: private key must decode to %d bytes", ErrFabricEnvelopeSigningKeyInvalid, ed25519.PrivateKeySize)
}
return ed25519.PrivateKey(decoded), nil
}
func decodeEnvelopeBase64(value string) ([]byte, error) {
if value == "" {
return nil, errors.New("empty base64 value")
}
decoded, err := base64.StdEncoding.DecodeString(value)
if err == nil {
return decoded, nil
}
decoded, err = base64.RawStdEncoding.DecodeString(value)
if err == nil {
return decoded, nil
}
return base64.RawURLEncoding.DecodeString(value)
}
func ed25519EnvelopeKeyID(publicKey ed25519.PublicKey) string {
sum := sha256.Sum256(publicKey)
return "rap-node-ed25519-" + hex.EncodeToString(sum[:16])
}
@@ -0,0 +1,80 @@
package webingress
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"errors"
"testing"
)
func TestEd25519EnvelopeSignerSignsCanonicalEnvelope(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
signer, err := NewEd25519EnvelopeSigner(base64.StdEncoding.EncodeToString(privateKey), "")
if err != nil {
t.Fatalf("new signer: %v", err)
}
signer.Now = fixedEnvelopeNow
signature, err := signer.Sign(context.Background(), []byte(`{"schema_version":"test"}`))
if err != nil {
t.Fatalf("sign: %v", err)
}
decoded, err := base64.StdEncoding.DecodeString(signature.Signature)
if err != nil {
t.Fatalf("decode signature: %v", err)
}
if !ed25519.Verify(publicKey, []byte(`{"schema_version":"test"}`), decoded) {
t.Fatal("signature did not verify")
}
if signature.KeyID != ed25519EnvelopeKeyID(publicKey) ||
signature.Alg != "ed25519" ||
signature.SignedAt != "2026-05-17T00:00:01Z" {
t.Fatalf("signature metadata = %+v", signature)
}
}
func TestEd25519EnvelopeSignerUsesExplicitKeyID(t *testing.T) {
_, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
signer, err := NewEd25519EnvelopeSigner(base64.RawStdEncoding.EncodeToString(privateKey), "node-explicit")
if err != nil {
t.Fatalf("new signer: %v", err)
}
signature, err := signer.Sign(context.Background(), []byte(`{}`))
if err != nil {
t.Fatalf("sign: %v", err)
}
if signature.KeyID != "node-explicit" {
t.Fatalf("key id = %q", signature.KeyID)
}
}
func TestEd25519EnvelopeSignerRejectsInvalidKeyAndPayload(t *testing.T) {
_, err := NewEd25519EnvelopeSigner("not-base64", "")
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
t.Fatalf("invalid key error = %v", err)
}
signer := Ed25519EnvelopeSigner{}
_, err = signer.Sign(context.Background(), []byte(`{}`))
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
t.Fatalf("missing key error = %v", err)
}
_, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
signer = Ed25519EnvelopeSigner{PrivateKey: privateKey}
_, err = signer.Sign(context.Background(), nil)
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
t.Fatalf("empty canonical error = %v", err)
}
}
@@ -0,0 +1,504 @@
package fabricvpn
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"net"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/vpnruntime"
"github.com/quic-go/quic-go"
)
type endpointConfig struct {
EndpointID string `json:"endpoint_id"`
NodeID string `json:"node_id"`
Transport string `json:"transport"`
Address string `json:"address"`
PeerCertSHA256 string `json:"peer_cert_sha256"`
TLSCertSHA256 string `json:"tls_cert_sha256"`
Priority int `json:"priority"`
}
type runtimeConfig struct {
ClusterID string `json:"cluster_id"`
LocalNodeID string `json:"local_node_id"`
ExitNodeID string `json:"exit_node_id"`
VPNConnectionID string `json:"vpn_connection_id"`
Endpoints []endpointConfig `json:"endpoints"`
RouteBundle routeBundleConfig `json:"route_bundle"`
ServiceChannelRequest serviceChannelRequest `json:"service_channel_request"`
StreamShards int `json:"stream_shards"`
}
type routeBundleConfig struct {
SchemaVersion string `json:"schema_version"`
RouteAuthority string `json:"route_authority"`
SelectedTargetNode string `json:"selected_target_node_id"`
EndpointCandidates []endpointConfig `json:"endpoint_candidates"`
TargetCandidates []endpointConfig `json:"target_candidates"`
RouteLease routeLeaseConfig `json:"route_lease"`
}
type routeLeaseConfig struct {
SchemaVersion string `json:"schema_version"`
LeaseID string `json:"lease_id"`
SelectedTargetNode string `json:"selected_target_node"`
PrimaryPath routeLeasePath `json:"primary_path"`
WarmStandbyPaths []routeLeasePath `json:"warm_standby_paths"`
Multipath map[string]any `json:"multipath"`
RebuildPolicy map[string]any `json:"rebuild_policy"`
}
type routeLeasePath struct {
PathID string `json:"path_id"`
TargetNodeID string `json:"target_node_id"`
Status string `json:"status"`
EndpointCandidates []endpointConfig `json:"endpoint_candidates"`
}
type serviceChannelRequest struct {
SchemaVersion string `json:"schema_version"`
ChannelID string `json:"channel_id"`
ServiceClass string `json:"service_class"`
SourceRole string `json:"source_role"`
}
type SocketProtector interface {
Protect(fd int64) bool
}
type Manager struct {
opMu sync.Mutex
mu sync.Mutex
cancel context.CancelFunc
transport *mesh.QUICFabricTransport
session mesh.FabricTransportSession
packet *vpnruntime.FabricSessionPacketTransport
inbox *vpnruntime.FabricPacketInbox
cfg runtimeConfig
lastErr string
endpoint string
protector SocketProtector
uplinkPackets atomic.Uint64
uplinkBytes atomic.Uint64
downlinkPackets atomic.Uint64
downlinkBytes atomic.Uint64
}
func NewManager() *Manager {
return &Manager{}
}
func (m *Manager) SetSocketProtector(protector SocketProtector) {
m.mu.Lock()
m.protector = protector
m.mu.Unlock()
}
func (m *Manager) Start(configJSON string) error {
var cfg runtimeConfig
if err := json.Unmarshal([]byte(configJSON), &cfg); err != nil {
return err
}
cfg.ClusterID = strings.TrimSpace(cfg.ClusterID)
cfg.LocalNodeID = strings.TrimSpace(cfg.LocalNodeID)
cfg.ExitNodeID = strings.TrimSpace(cfg.ExitNodeID)
cfg.VPNConnectionID = strings.TrimSpace(cfg.VPNConnectionID)
cfg.Endpoints = fabricRuntimeEndpoints(cfg)
cfg.ExitNodeID = firstNonEmpty(cfg.ExitNodeID, fabricRuntimeTargetNodeID(cfg))
if cfg.ClusterID == "" || cfg.LocalNodeID == "" || cfg.VPNConnectionID == "" {
return fmt.Errorf("cluster, local node and vpn connection id are required")
}
if strings.TrimSpace(cfg.ServiceChannelRequest.SchemaVersion) == "" {
return fmt.Errorf("fabric service channel request is required")
}
if len(cfg.Endpoints) == 0 {
return fmt.Errorf("fabric route lease has no QUIC candidates")
}
if cfg.StreamShards <= 0 {
cfg.StreamShards = 4
}
if cfg.StreamShards > 32 {
cfg.StreamShards = 32
}
m.Stop()
ctx, cancel := context.WithCancel(context.Background())
if err := m.connect(ctx, cfg, cancel); err != nil {
cancel()
m.setErr(err)
return err
}
return nil
}
func fabricRuntimeEndpoints(cfg runtimeConfig) []endpointConfig {
if len(cfg.RouteBundle.RouteLease.PrimaryPath.EndpointCandidates) > 0 {
return cfg.RouteBundle.RouteLease.PrimaryPath.EndpointCandidates
}
for _, path := range cfg.RouteBundle.RouteLease.WarmStandbyPaths {
if len(path.EndpointCandidates) > 0 {
return path.EndpointCandidates
}
}
if len(cfg.RouteBundle.EndpointCandidates) > 0 {
return cfg.RouteBundle.EndpointCandidates
}
if len(cfg.RouteBundle.TargetCandidates) > 0 {
return cfg.RouteBundle.TargetCandidates
}
return cfg.Endpoints
}
func fabricRuntimeTargetNodeID(cfg runtimeConfig) string {
if cfg.RouteBundle.RouteLease.PrimaryPath.TargetNodeID != "" {
return cfg.RouteBundle.RouteLease.PrimaryPath.TargetNodeID
}
if cfg.RouteBundle.RouteLease.SelectedTargetNode != "" {
return cfg.RouteBundle.RouteLease.SelectedTargetNode
}
return cfg.RouteBundle.SelectedTargetNode
}
func (m *Manager) connect(ctx context.Context, cfg runtimeConfig, cancel context.CancelFunc) error {
quicTransport := mesh.NewQUICFabricTransport(nil)
quicTransport.SetLocalPeerID(cfg.LocalNodeID)
quicTransport.DialAddr = m.protectedQUICDialer()
inbox := vpnruntime.NewFabricPacketInbox(4096)
quicTransport.SetInboundHandlers(func(ctx context.Context, envelope mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
if err := inbox.DeliverProductionEnvelope(ctx, envelope); err != nil {
return mesh.ProductionForwardResult{}, err
}
return mesh.ProductionForwardResult{Delivered: true, MessageID: envelope.MessageID}, nil
}, nil, nil)
var lastErr error
for _, endpoint := range cfg.Endpoints {
target := mesh.FabricTransportTarget{
EndpointID: firstNonEmpty(endpoint.EndpointID, endpoint.Address),
PeerID: firstNonEmpty(endpoint.NodeID, cfg.ExitNodeID),
Endpoint: endpoint.Address,
Transport: firstNonEmpty(endpoint.Transport, "direct_quic"),
PeerCertSHA256: firstNonEmpty(endpoint.PeerCertSHA256, endpoint.TLSCertSHA256),
Timeout: 5 * time.Second,
OutboundBuffer: 512,
InboundBuffer: 512,
ErrorBuffer: 32,
}
carrier, selected, err := mesh.FabricTransportForTarget(target, quicTransport)
if err != nil {
lastErr = err
continue
}
dialCtx, dialCancel := context.WithTimeout(ctx, 5*time.Second)
session, err := carrier.Connect(dialCtx, selected)
if err != nil {
dialCancel()
lastErr = err
continue
}
streamIDs, streamID, err := openStreams(dialCtx, session, cfg.StreamShards)
dialCancel()
if err != nil {
_ = session.Close()
lastErr = err
continue
}
m.mu.Lock()
m.cancel = cancel
m.transport = quicTransport
m.session = session
m.inbox = inbox
m.cfg = cfg
m.endpoint = endpoint.Address
m.lastErr = ""
m.packet = &vpnruntime.FabricSessionPacketTransport{
Sender: session,
Receiver: session,
Inbox: inbox,
StreamID: streamID,
StreamIDsByTrafficClass: streamIDs,
VPNConnectionID: cfg.VPNConnectionID,
SendDirection: vpnruntime.FabricDirectionClientToGateway,
ReceiveDirection: vpnruntime.FabricDirectionGatewayToClient,
}
m.mu.Unlock()
return nil
}
if lastErr == nil {
lastErr = fmt.Errorf("no QUIC exit endpoints available")
}
return lastErr
}
func (m *Manager) protectedQUICDialer() func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error) {
m.mu.Lock()
protector := m.protector
m.mu.Unlock()
if protector == nil {
return nil
}
return func(ctx context.Context, endpoint string, tlsConfig *tls.Config, config *quic.Config) (*quic.Conn, error) {
network := "udp4"
if strings.Contains(endpoint, "[") {
network = "udp6"
}
conn, err := net.ListenPacket(network, ":0")
if err != nil {
return nil, err
}
raw, ok := conn.(interface {
SyscallConn() (syscall.RawConn, error)
})
if !ok {
_ = conn.Close()
return nil, fmt.Errorf("udp socket does not expose raw connection for vpn protection")
}
rawConn, err := raw.SyscallConn()
if err != nil {
_ = conn.Close()
return nil, err
}
var protectErr error
if err := rawConn.Control(func(fd uintptr) {
if !protector.Protect(int64(fd)) {
protectErr = fmt.Errorf("android vpn socket protect failed")
}
}); err != nil {
_ = conn.Close()
return nil, err
}
if protectErr != nil {
_ = conn.Close()
return nil, protectErr
}
return mesh.DialQUICAddrWithPacketConn(ctx, endpoint, conn, tlsConfig, config)
}
}
func (m *Manager) Stop() {
m.opMu.Lock()
defer m.opMu.Unlock()
m.stopLocked()
}
func (m *Manager) stopLocked() {
m.mu.Lock()
cancel := m.cancel
session := m.session
transport := m.transport
m.cancel = nil
m.session = nil
m.transport = nil
m.packet = nil
m.mu.Unlock()
if cancel != nil {
cancel()
}
if session != nil {
_ = session.Close()
}
if transport != nil {
_ = transport.Close()
}
}
func (m *Manager) SendPacket(packet []byte) error {
if len(packet) == 0 {
return nil
}
m.opMu.Lock()
defer m.opMu.Unlock()
if err := m.ensureConnectedLocked(); err != nil {
return err
}
m.mu.Lock()
transport := m.packet
m.mu.Unlock()
if transport == nil {
return fmt.Errorf("fabric vpn runtime is not connected")
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := transport.SendGatewayPacketBatch(ctx, [][]byte{append([]byte(nil), packet...)}); err != nil {
m.setErr(err)
if reconnectErr := m.reconnectLocked(); reconnectErr != nil {
return err
}
m.mu.Lock()
transport = m.packet
m.mu.Unlock()
if transport == nil {
return err
}
retryCtx, retryCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer retryCancel()
if retryErr := transport.SendGatewayPacketBatch(retryCtx, [][]byte{append([]byte(nil), packet...)}); retryErr != nil {
m.setErr(retryErr)
return retryErr
}
}
m.uplinkPackets.Add(1)
m.uplinkBytes.Add(uint64(len(packet)))
return nil
}
func (m *Manager) ReceivePacket(timeoutMillis int) ([]byte, error) {
m.opMu.Lock()
defer m.opMu.Unlock()
if err := m.ensureConnectedLocked(); err != nil {
return nil, err
}
m.mu.Lock()
transport := m.packet
m.mu.Unlock()
if transport == nil {
return nil, fmt.Errorf("fabric vpn runtime is not connected")
}
timeout := time.Duration(timeoutMillis) * time.Millisecond
if timeout <= 0 {
timeout = 100 * time.Millisecond
}
ctx, cancel := context.WithTimeout(context.Background(), timeout+time.Second)
defer cancel()
packets, err := transport.ReceiveGatewayPacketBatch(ctx, timeout)
if err != nil {
m.setErr(err)
_ = m.reconnectLocked()
return nil, err
}
if len(packets) == 0 {
return nil, nil
}
packet := append([]byte(nil), packets[0]...)
m.downlinkPackets.Add(1)
m.downlinkBytes.Add(uint64(len(packet)))
return packet, nil
}
func (m *Manager) Reconnect() error {
m.opMu.Lock()
defer m.opMu.Unlock()
return m.reconnectLocked()
}
func (m *Manager) ensureConnectedLocked() error {
m.mu.Lock()
connected := m.packet != nil
cancel := m.cancel
m.mu.Unlock()
if connected {
return nil
}
if cancel == nil {
return fmt.Errorf("fabric vpn runtime is stopped")
}
return m.reconnectLocked()
}
func (m *Manager) reconnectLocked() error {
m.mu.Lock()
cfg := m.cfg
oldSession := m.session
oldTransport := m.transport
cancel := m.cancel
m.session = nil
m.transport = nil
m.packet = nil
m.mu.Unlock()
if oldSession != nil {
_ = oldSession.Close()
}
if oldTransport != nil {
_ = oldTransport.Close()
}
if cancel == nil {
return fmt.Errorf("fabric vpn runtime is stopped")
}
ctx, ctxCancel := context.WithTimeout(context.Background(), 8*time.Second)
defer ctxCancel()
if err := m.connect(ctx, cfg, cancel); err != nil {
m.setErr(err)
return err
}
return nil
}
func (m *Manager) SnapshotJSON() string {
m.mu.Lock()
connected := m.packet != nil
endpoint := m.endpoint
lastErr := m.lastErr
vpnConnectionID := m.cfg.VPNConnectionID
localNodeID := m.cfg.LocalNodeID
exitNodeID := m.cfg.ExitNodeID
m.mu.Unlock()
payload, _ := json.Marshal(map[string]any{
"schema_version": "rap.android_fabric_vpn_runtime.v1",
"connected": connected,
"endpoint": endpoint,
"last_error": lastErr,
"vpn_connection": vpnConnectionID,
"local_node_id": localNodeID,
"exit_node_id": exitNodeID,
"uplink_packets": m.uplinkPackets.Load(),
"uplink_bytes": m.uplinkBytes.Load(),
"downlink_packets": m.downlinkPackets.Load(),
"downlink_bytes": m.downlinkBytes.Load(),
})
return string(payload)
}
func (m *Manager) setErr(err error) {
if err == nil {
return
}
m.mu.Lock()
m.lastErr = err.Error()
m.mu.Unlock()
}
func openStreams(ctx context.Context, session mesh.FabricTransportSession, shards int) (map[string][]uint64, uint64, error) {
base := uint64(time.Now().UnixNano())
classes := []struct {
name string
trafficClass fabricproto.TrafficClass
}{
{name: vpnruntime.FabricTrafficClassInteractive, trafficClass: fabricproto.TrafficClassInteractive},
{name: vpnruntime.FabricTrafficClassBulk, trafficClass: fabricproto.TrafficClassBulk},
}
out := make(map[string][]uint64, len(classes))
var primary uint64
for classIndex, class := range classes {
for shard := 0; shard < shards; shard++ {
streamID := base + uint64(classIndex*shards+shard)
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FrameOpenStream, StreamID: streamID, TrafficClass: class.trafficClass}); err != nil {
return nil, 0, err
}
if primary == 0 {
primary = streamID
}
out[class.name] = append(out[class.name], streamID)
}
}
return out, primary, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
@@ -0,0 +1,137 @@
package fabricvpn
import (
"os"
"testing"
)
func TestFabricRuntimeEndpointsPreferRouteBundle(t *testing.T) {
cfg := runtimeConfig{
Endpoints: []endpointConfig{{EndpointID: "legacy", Address: "quic://legacy.example:19131"}},
RouteBundle: routeBundleConfig{
EndpointCandidates: []endpointConfig{{EndpointID: "bundle", Address: "quic://bundle.example:19131"}},
},
}
got := fabricRuntimeEndpoints(cfg)
if len(got) != 1 || got[0].EndpointID != "bundle" {
t.Fatalf("endpoints = %+v, want route bundle endpoint", got)
}
}
func TestFabricRuntimeEndpointsPreferRouteLease(t *testing.T) {
cfg := runtimeConfig{
Endpoints: []endpointConfig{{EndpointID: "legacy", Address: "quic://legacy.example:19131"}},
RouteBundle: routeBundleConfig{
EndpointCandidates: []endpointConfig{{EndpointID: "bundle", Address: "quic://bundle.example:19131"}},
RouteLease: routeLeaseConfig{
SelectedTargetNode: "exit-1",
PrimaryPath: routeLeasePath{
TargetNodeID: "exit-1",
EndpointCandidates: []endpointConfig{{EndpointID: "lease-primary", Address: "quic://lease.example:19131"}},
},
},
},
}
got := fabricRuntimeEndpoints(cfg)
if len(got) != 1 || got[0].EndpointID != "lease-primary" {
t.Fatalf("endpoints = %+v, want route lease primary endpoint", got)
}
if target := fabricRuntimeTargetNodeID(cfg); target != "exit-1" {
t.Fatalf("target = %q, want exit-1", target)
}
}
func TestFabricRuntimeEndpointsFallbackToLegacyEndpoints(t *testing.T) {
cfg := runtimeConfig{
Endpoints: []endpointConfig{{EndpointID: "legacy", Address: "quic://legacy.example:19131"}},
}
got := fabricRuntimeEndpoints(cfg)
if len(got) != 1 || got[0].EndpointID != "legacy" {
t.Fatalf("endpoints = %+v, want legacy endpoint fallback", got)
}
}
func TestLiveFabricVPNRuntimeStartsFromRouteLease(t *testing.T) {
raw := os.Getenv("RAP_LIVE_FABRICVPN_CONFIG")
if raw == "" {
t.Skip("RAP_LIVE_FABRICVPN_CONFIG is not set")
}
manager := NewManager()
if err := manager.Start(raw); err != nil {
t.Fatalf("start live fabric vpn runtime: %v", err)
}
defer manager.Stop()
if snapshot := manager.SnapshotJSON(); snapshot == "" {
t.Fatal("empty live fabric vpn snapshot")
}
if os.Getenv("RAP_LIVE_FABRICVPN_PACKET_PROBE") == "" {
return
}
if err := manager.SendPacket(testDNSIPv4Packet()); err != nil {
t.Fatalf("send live dns packet: %v", err)
}
for i := 0; i < 20; i++ {
packet, err := manager.ReceivePacket(500)
if err != nil {
t.Fatalf("receive live dns packet: %v", err)
}
if len(packet) > 0 {
if packet[9] != 17 || packet[12] != 1 || packet[13] != 1 || packet[14] != 1 || packet[15] != 1 {
t.Fatalf("unexpected response packet header: %v", packet[:min(20, len(packet))])
}
return
}
}
t.Fatal("timed out waiting for live dns response through fabric vpn")
}
func testDNSIPv4Packet() []byte {
dns := []byte{
0x12, 0x34, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x07, 'e', 'x', 'a',
'm', 'p', 'l', 'e', 0x03, 'c', 'o', 'm', 0x00,
0x00, 0x01, 0x00, 0x01,
}
udpLen := 8 + len(dns)
totalLen := 20 + udpLen
packet := make([]byte, totalLen)
packet[0] = 0x45
packet[2] = byte(totalLen >> 8)
packet[3] = byte(totalLen)
packet[8] = 64
packet[9] = 17
copy(packet[12:16], []byte{10, 77, 0, 2})
copy(packet[16:20], []byte{1, 1, 1, 1})
packet[20] = 0xcf
packet[21] = 0x08
packet[22] = 0x00
packet[23] = 0x35
packet[24] = byte(udpLen >> 8)
packet[25] = byte(udpLen)
copy(packet[28:], dns)
sum := ipv4HeaderChecksum(packet[:20])
packet[10] = byte(sum >> 8)
packet[11] = byte(sum)
return packet
}
func ipv4HeaderChecksum(header []byte) uint16 {
var sum uint32
for i := 0; i+1 < len(header); i += 2 {
if i == 10 {
continue
}
sum += uint32(header[i])<<8 | uint32(header[i+1])
}
for sum > 0xffff {
sum = (sum & 0xffff) + (sum >> 16)
}
return ^uint16(sum)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}