Refactor RDP proxy handling and update related tests
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,17 @@
|
|||||||
|
FROM golang:1.25-bookworm AS build
|
||||||
|
|
||||||
|
WORKDIR /src
|
||||||
|
COPY agents/rap-node-agent/go.mod ./
|
||||||
|
COPY agents/rap-node-agent/go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
COPY agents/rap-node-agent/ ./
|
||||||
|
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/fabric-loadtest ./cmd/fabric-loadtest
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends ca-certificates iproute2 iptables iputils-ping procps \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
COPY --from=build /out/fabric-loadtest /usr/local/bin/fabric-loadtest
|
||||||
|
ENTRYPOINT ["/usr/local/bin/fabric-loadtest"]
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,760 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRouteModeCoverageVerdictRequiresMixedModes(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
TopologyProfile: "mixed-public-nat-lan-relay",
|
||||||
|
Targets: []string{"a", "b", "c", "d"},
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
},
|
||||||
|
SuccessfulStreams: 4,
|
||||||
|
TargetStats: map[string]targetStats{
|
||||||
|
"a": {RouteModes: map[string]int{string(mesh.FabricRouteLAN): 1}},
|
||||||
|
"b": {RouteModes: map[string]int{string(mesh.FabricRouteICE): 1}},
|
||||||
|
"c": {RouteModes: map[string]int{string(mesh.FabricRouteReverse): 1}},
|
||||||
|
"d": {RouteModes: map[string]int{}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := routeModeCoverageVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.Contains(reasons[0], string(mesh.FabricRouteRelay)) {
|
||||||
|
t.Fatalf("reasons = %v, want missing relay route mode", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.TargetStats["d"] = targetStats{RouteModes: map[string]int{string(mesh.FabricRouteRelay): 1}}
|
||||||
|
if reasons := routeModeCoverageVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want full coverage pass", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLegacyRouteModeVerdictRejectsNonQUICModes(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
TargetStats: map[string]targetStats{
|
||||||
|
"a": {RouteModes: map[string]int{
|
||||||
|
"direct_quic": 4,
|
||||||
|
"relay": 1,
|
||||||
|
"outbound_reverse": 2,
|
||||||
|
"wss": 3,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
reasons := legacyRouteModeVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 ||
|
||||||
|
!strings.Contains(reasons[0], "relay:1") ||
|
||||||
|
!strings.Contains(reasons[0], "outbound_reverse:2") ||
|
||||||
|
!strings.Contains(reasons[0], "wss:3") {
|
||||||
|
t.Fatalf("reasons = %v, want legacy route mode failure", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.TargetStats["a"] = targetStats{RouteModes: map[string]int{
|
||||||
|
string(mesh.FabricRouteDirect): 1,
|
||||||
|
string(mesh.FabricRouteLAN): 1,
|
||||||
|
string(mesh.FabricRouteICE): 1,
|
||||||
|
string(mesh.FabricRouteReverse): 1,
|
||||||
|
string(mesh.FabricRouteRelay): 1,
|
||||||
|
}}
|
||||||
|
if reasons := legacyRouteModeVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want QUIC modes accepted", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTargetEndpointPolicyVerdictRejectsNonQUICTargets(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{
|
||||||
|
"quic://a:19443",
|
||||||
|
"http://b:19443",
|
||||||
|
"ws://c:19443",
|
||||||
|
"d:19443",
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := targetEndpointPolicyVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 ||
|
||||||
|
!strings.Contains(reasons[0], "http://b:19443") ||
|
||||||
|
!strings.Contains(reasons[0], "ws://c:19443") ||
|
||||||
|
!strings.Contains(reasons[0], "d:19443") ||
|
||||||
|
!strings.Contains(reasons[0], "<empty>") {
|
||||||
|
t.Fatalf("reasons = %v, want non-QUIC target failure", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.Config.Targets = []string{"quic://a:19443", " QUIC://b:19443 "}
|
||||||
|
if reasons := targetEndpointPolicyVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want QUIC targets accepted", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunClientRejectsNonQUICTargetBeforeDial(t *testing.T) {
|
||||||
|
_, err := runClient(context.Background(), loadtestConfig{
|
||||||
|
Targets: []string{"http://127.0.0.1:19443"},
|
||||||
|
Streams: 1,
|
||||||
|
Concurrency: 1,
|
||||||
|
BytesPerStream: 1,
|
||||||
|
PayloadSize: 1,
|
||||||
|
})
|
||||||
|
if err == nil || !strings.Contains(err.Error(), "non_quic_targets=http://127.0.0.1:19443") {
|
||||||
|
t.Fatalf("err = %v, want non-QUIC target validation error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFillLoadtestPayloadVariesByStreamAndSequence(t *testing.T) {
|
||||||
|
first := make([]byte, 128)
|
||||||
|
second := make([]byte, 128)
|
||||||
|
third := make([]byte, 128)
|
||||||
|
|
||||||
|
fillLoadtestPayload(first, 7, 9, 1, 0)
|
||||||
|
fillLoadtestPayload(second, 7, 9, 2, int64(len(first)))
|
||||||
|
fillLoadtestPayload(third, 8, 10, 1, 0)
|
||||||
|
|
||||||
|
if bytes.Equal(first, second) {
|
||||||
|
t.Fatal("payload did not vary by sequence/offset")
|
||||||
|
}
|
||||||
|
if bytes.Equal(first, third) {
|
||||||
|
t.Fatal("payload did not vary by stream")
|
||||||
|
}
|
||||||
|
if bytes.Count(first, []byte{first[0]}) == len(first) {
|
||||||
|
t.Fatal("payload collapsed to a constant byte")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFillLoadtestPayloadIsDeterministic(t *testing.T) {
|
||||||
|
first := make([]byte, 128)
|
||||||
|
second := make([]byte, 128)
|
||||||
|
|
||||||
|
fillLoadtestPayload(first, 7, 9, 1, 0)
|
||||||
|
fillLoadtestPayload(second, 7, 9, 1, 0)
|
||||||
|
|
||||||
|
if !bytes.Equal(first, second) {
|
||||||
|
t.Fatal("payload is not deterministic")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFillLoadtestPayloadHandlesShortFinalChunk(t *testing.T) {
|
||||||
|
chunk := make([]byte, 17)
|
||||||
|
fillLoadtestPayload(chunk, 7, 9, 3, 256)
|
||||||
|
if bytes.Equal(chunk, make([]byte, len(chunk))) {
|
||||||
|
t.Fatal("short payload chunk stayed zeroed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsSuccessfulStreamAckMismatch(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 2,
|
||||||
|
AcksReceived: 1,
|
||||||
|
AckMismatchedStreams: 1,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if reason == "ack_mismatched_streams=1" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("reasons = %v, want ack mismatch reason", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsAckIntegrityError(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
FailedStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
AckIntegrityErrors: 1,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if reason == "ack_integrity_errors=1" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("reasons = %v, want ack integrity reason", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsBelowMinimumThroughput(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
MinThroughputMbps: 100,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
ThroughputBps: 99 * 1000 * 1000,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if strings.HasPrefix(reason, "throughput_bps=") {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("reasons = %v, want throughput reason", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.ThroughputBps = 100 * 1000 * 1000
|
||||||
|
if gotVerdict, reasons := verdict(report); gotVerdict != "pass" {
|
||||||
|
t.Fatalf("verdict = %q reasons=%v, want pass at threshold", gotVerdict, reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsBelowMinimumChannelChurn(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
MinChannelChurn: 1000,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
ChannelChurnPerSec: 999,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if strings.HasPrefix(reason, "channel_churn_per_sec=") {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("reasons = %v, want channel churn reason", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.ChannelChurnPerSec = 1000
|
||||||
|
if gotVerdict, reasons := verdict(report); gotVerdict != "pass" {
|
||||||
|
t.Fatalf("verdict = %q reasons=%v, want pass at threshold", gotVerdict, reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTargetByteDistributionVerdictDetectsSkew(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"a", "b", "c", "d"},
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
BytesPerStream: 100,
|
||||||
|
},
|
||||||
|
SuccessfulStreams: 40,
|
||||||
|
BytesSent: 4000,
|
||||||
|
TargetStreams: map[string]int{
|
||||||
|
"a": 10,
|
||||||
|
"b": 10,
|
||||||
|
"c": 10,
|
||||||
|
"d": 10,
|
||||||
|
},
|
||||||
|
TargetBytes: map[string]int64{
|
||||||
|
"a": 2500,
|
||||||
|
"b": 500,
|
||||||
|
"c": 500,
|
||||||
|
"d": 500,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := targetByteDistributionVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_byte_distribution_skew=") {
|
||||||
|
t.Fatalf("reasons = %v, want byte skew reason", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.TargetBytes = map[string]int64{
|
||||||
|
"a": 1000,
|
||||||
|
"b": 1000,
|
||||||
|
"c": 1000,
|
||||||
|
"d": 1000,
|
||||||
|
}
|
||||||
|
if reasons := targetByteDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want balanced bytes pass", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDistributionVerdictChecksSurvivingTargetsAfterFailure(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"quic://a:1", "quic://b:1", "quic://c:1", "quic://d:1"},
|
||||||
|
FailTarget: 0,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 8,
|
||||||
|
},
|
||||||
|
SuccessfulStreams: 90,
|
||||||
|
TargetStreams: map[string]int{
|
||||||
|
"quic://b:1": 90,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := targetDistributionVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_distribution_collapsed=1/3_targets_used") {
|
||||||
|
t.Fatalf("reasons = %v, want surviving-target collapse", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.TargetStreams = map[string]int{
|
||||||
|
"quic://b:1": 30,
|
||||||
|
"quic://c:1": 30,
|
||||||
|
"quic://d:1": 30,
|
||||||
|
}
|
||||||
|
if reasons := targetDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want balanced surviving targets pass", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRoutePressureVerdictChecksSurvivingTargetsAfterFailure(t *testing.T) {
|
||||||
|
targets := []string{"quic://a:1", "quic://b:1", "quic://c:1", "quic://d:1"}
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: targets,
|
||||||
|
FailTarget: 0,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 12,
|
||||||
|
},
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{
|
||||||
|
MaxActive: map[string]int{
|
||||||
|
loadtestRouteID(1, targets[1]): 12,
|
||||||
|
},
|
||||||
|
MaxActiveTotal: 12,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := routePressureDistributionVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_collapsed=1/3_targets_used") {
|
||||||
|
t.Fatalf("reasons = %v, want surviving-route-pressure collapse", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.RoutePressure.MaxActive = map[string]int{
|
||||||
|
loadtestRouteID(1, targets[1]): 4,
|
||||||
|
loadtestRouteID(2, targets[2]): 4,
|
||||||
|
loadtestRouteID(3, targets[3]): 4,
|
||||||
|
}
|
||||||
|
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want balanced surviving route pressure pass", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsOverallAckLatencySLO(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
MaxAckP95Ms: 10,
|
||||||
|
MaxAckP99Ms: 20,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
AckP95Ms: 11,
|
||||||
|
AckP99Ms: 21,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
foundP95 := false
|
||||||
|
foundP99 := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if strings.HasPrefix(reason, "ack_p95_ms=") {
|
||||||
|
foundP95 = true
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(reason, "ack_p99_ms=") {
|
||||||
|
foundP99 = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundP95 || !foundP99 {
|
||||||
|
t.Fatalf("reasons = %v, want ACK p95 and p99 reasons", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTargetAckVerdictDetectsSlowHealthyTarget(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"a", "b"},
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
MaxTargetAckMs: 10,
|
||||||
|
},
|
||||||
|
TargetStats: map[string]targetStats{
|
||||||
|
"a": {Streams: 10, MaxAckMs: 4},
|
||||||
|
"b": {Streams: 10, MaxAckMs: 11},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := targetAckVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "target_ack_ms=b:11>10") {
|
||||||
|
t.Fatalf("reasons = %v, want slow target ack reason", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.TargetStats["b"] = targetStats{Streams: 10, MaxAckMs: 10}
|
||||||
|
if reasons := targetAckVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want target ack pass at threshold", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsSetupLatencySLO(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
MaxSetupP95Ms: 10,
|
||||||
|
MaxSetupP99Ms: 20,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
SetupLatencyP95Ms: 11,
|
||||||
|
SetupLatencyP99Ms: 21,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
foundP95 := false
|
||||||
|
foundP99 := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if strings.HasPrefix(reason, "setup_p95_ms=") {
|
||||||
|
foundP95 = true
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(reason, "setup_p99_ms=") {
|
||||||
|
foundP99 = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundP95 || !foundP99 {
|
||||||
|
t.Fatalf("reasons = %v, want setup p95 and p99 reasons", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerdictFailsRerouteLatencySLO(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 1,
|
||||||
|
MaxRerouteP95Ms: 10,
|
||||||
|
MaxRerouteP99Ms: 20,
|
||||||
|
},
|
||||||
|
TotalStreams: 1,
|
||||||
|
SuccessfulStreams: 1,
|
||||||
|
BytesSent: 1024,
|
||||||
|
FramesSent: 1,
|
||||||
|
AcksReceived: 1,
|
||||||
|
RerouteLatencyP95Ms: 11,
|
||||||
|
RerouteLatencyP99Ms: 21,
|
||||||
|
ChannelOpens: 1,
|
||||||
|
ChannelCloses: 1,
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{AcquiredTotal: 1, ReleasedTotal: 1, MaxActiveTotal: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
gotVerdict, reasons := verdict(report)
|
||||||
|
if gotVerdict != "fail" {
|
||||||
|
t.Fatalf("verdict = %q, want fail", gotVerdict)
|
||||||
|
}
|
||||||
|
foundP95 := false
|
||||||
|
foundP99 := false
|
||||||
|
for _, reason := range reasons {
|
||||||
|
if strings.HasPrefix(reason, "reroute_p95_ms=") {
|
||||||
|
foundP95 = true
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(reason, "reroute_p99_ms=") {
|
||||||
|
foundP99 = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundP95 || !foundP99 {
|
||||||
|
t.Fatalf("reasons = %v, want reroute p95 and p99 reasons", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldQuarantineTarget(t *testing.T) {
|
||||||
|
quarantined := []string{
|
||||||
|
"ack timeout or session closed",
|
||||||
|
"deadline exceeded",
|
||||||
|
"connection refused",
|
||||||
|
"connection reset by peer",
|
||||||
|
"no route to host",
|
||||||
|
}
|
||||||
|
for _, reason := range quarantined {
|
||||||
|
if !shouldQuarantineTarget(reason) {
|
||||||
|
t.Fatalf("shouldQuarantineTarget(%q) = false, want true", reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if shouldQuarantineTarget("ack payload checksum mismatch") {
|
||||||
|
t.Fatal("checksum mismatch should not quarantine a target")
|
||||||
|
}
|
||||||
|
if shouldQuarantineTarget("context deadline exceeded") {
|
||||||
|
t.Fatal("context deadline should not quarantine a target")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpreadStartDistributesQuarantinedSlot(t *testing.T) {
|
||||||
|
targets := []string{"a", "b", "c", "d"}
|
||||||
|
health := newTargetHealthTracker()
|
||||||
|
health.MarkDegraded("a", "connection refused", time.Minute)
|
||||||
|
counts := map[string]int{}
|
||||||
|
for index := 0; index < 40; index += len(targets) {
|
||||||
|
initial, spread := loadtestSpreadStart(index, len(targets))
|
||||||
|
targetIndex := loadtestPreferredTargetIndex(targets, initial, spread, health, -1)
|
||||||
|
counts[targets[targetIndex]]++
|
||||||
|
}
|
||||||
|
if counts["b"] == 0 || counts["c"] == 0 || counts["d"] == 0 {
|
||||||
|
t.Fatalf("counts = %v, want degraded slot spread across surviving targets", counts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpreadUsableTargetDistributesRetries(t *testing.T) {
|
||||||
|
targets := []string{"a", "b", "c", "d"}
|
||||||
|
health := newTargetHealthTracker()
|
||||||
|
health.MarkDegraded("a", "connection refused", time.Minute)
|
||||||
|
counts := map[string]int{}
|
||||||
|
for cohort := 0; cohort < 90; cohort++ {
|
||||||
|
targetIndex := loadtestSpreadUsableTargetIndex(targets, cohort, health, 0)
|
||||||
|
counts[targets[targetIndex]]++
|
||||||
|
}
|
||||||
|
if counts["b"] != 30 || counts["c"] != 30 || counts["d"] != 30 {
|
||||||
|
t.Fatalf("counts = %v, want retry load spread evenly across surviving targets", counts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadtestLogicalStreamIDAvoidsReservedTransportStreams(t *testing.T) {
|
||||||
|
for _, index := range []int{-1, 0, 1, 999, 1000, 10_000} {
|
||||||
|
streamID := loadtestLogicalStreamID(index)
|
||||||
|
if streamID == mesh.ProductionForwardQUICStreamID || streamID == mesh.SyntheticForwardQUICStreamID {
|
||||||
|
t.Fatalf("loadtestLogicalStreamID(%d) = %d, collides with reserved transport stream", index, streamID)
|
||||||
|
}
|
||||||
|
if streamID < 10_000 {
|
||||||
|
t.Fatalf("loadtestLogicalStreamID(%d) = %d, want loadtest stream range", index, streamID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatencyAwareTargetIndexKeepsSlowWANFromOwningPool(t *testing.T) {
|
||||||
|
targets := []string{"lan-a", "lan-b", "wan"}
|
||||||
|
health := newTargetHealthTracker()
|
||||||
|
health.RecordProbes([]targetProbeResult{
|
||||||
|
{Target: "lan-a", RTTMs: 4, Usable: true},
|
||||||
|
{Target: "lan-b", RTTMs: 5, Usable: true},
|
||||||
|
{Target: "wan", RTTMs: 400, Usable: true},
|
||||||
|
})
|
||||||
|
counts := map[string]int{}
|
||||||
|
for index := 0; index < 300; index++ {
|
||||||
|
targetIndex := loadtestSpreadUsableTargetIndex(targets, index, health, -1)
|
||||||
|
counts[targets[targetIndex]]++
|
||||||
|
}
|
||||||
|
if counts["wan"] == 0 {
|
||||||
|
t.Fatalf("counts = %v, want slow WAN to stay represented", counts)
|
||||||
|
}
|
||||||
|
if counts["wan"] >= counts["lan-a"] || counts["wan"] >= counts["lan-b"] {
|
||||||
|
t.Fatalf("counts = %v, want latency-aware placement to prefer LAN capacity", counts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatencyAwarePreferredTargetUsesAbsolutePlacementOrdinal(t *testing.T) {
|
||||||
|
targets := []string{"lan-a", "lan-b", "lan-c", "wan"}
|
||||||
|
health := newTargetHealthTracker()
|
||||||
|
health.RecordProbes([]targetProbeResult{
|
||||||
|
{Target: "lan-a", RTTMs: 4, Usable: true},
|
||||||
|
{Target: "lan-b", RTTMs: 4, Usable: true},
|
||||||
|
{Target: "lan-c", RTTMs: 4, Usable: true},
|
||||||
|
{Target: "wan", RTTMs: 400, Usable: true},
|
||||||
|
})
|
||||||
|
counts := map[string]int{}
|
||||||
|
for index := 0; index < 500; index++ {
|
||||||
|
preferred, spread := loadtestSpreadStart(index, len(targets))
|
||||||
|
targetIndex := loadtestPreferredTargetIndex(targets, preferred, spread, health, -1)
|
||||||
|
counts[targets[targetIndex]]++
|
||||||
|
}
|
||||||
|
if len(counts) < len(targets) {
|
||||||
|
t.Fatalf("counts = %v, want every probed target represented", counts)
|
||||||
|
}
|
||||||
|
if counts["wan"] >= counts["lan-a"] || counts["wan"] >= counts["lan-b"] || counts["wan"] >= counts["lan-c"] {
|
||||||
|
t.Fatalf("counts = %v, want slow WAN weighted below LAN targets", counts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHeterogeneousProbeRTTRelaxesEqualDistributionVerdict(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"lan", "wan"},
|
||||||
|
Concurrency: 64,
|
||||||
|
},
|
||||||
|
SuccessfulStreams: 100,
|
||||||
|
BytesSent: 100 * 1024,
|
||||||
|
TargetStreams: map[string]int{
|
||||||
|
"lan": 96,
|
||||||
|
"wan": 4,
|
||||||
|
},
|
||||||
|
TargetBytes: map[string]int64{
|
||||||
|
"lan": 96 * 1024,
|
||||||
|
"wan": 4 * 1024,
|
||||||
|
},
|
||||||
|
TargetProbes: []targetProbeResult{
|
||||||
|
{Target: "lan", RTTMs: 4, Usable: true},
|
||||||
|
{Target: "wan", RTTMs: 400, Usable: true},
|
||||||
|
},
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{
|
||||||
|
MaxActive: map[string]int{
|
||||||
|
loadtestRouteID(0, "lan"): 32,
|
||||||
|
loadtestRouteID(1, "wan"): 1,
|
||||||
|
},
|
||||||
|
MaxActiveTotal: 32,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if reasons := targetDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("targetDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
|
||||||
|
}
|
||||||
|
if reasons := targetByteDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("targetByteDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
|
||||||
|
}
|
||||||
|
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("routePressureDistributionVerdictReasons = %v, want heterogeneous RTT tolerated", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTargetHealthQuarantineExpiresButSnapshotKeepsObservation(t *testing.T) {
|
||||||
|
health := newTargetHealthTracker()
|
||||||
|
health.MarkDegraded("a", "ack timeout", time.Nanosecond)
|
||||||
|
if !health.IsDegraded("a") {
|
||||||
|
t.Fatal("target should be degraded immediately")
|
||||||
|
}
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
if health.IsDegraded("a") {
|
||||||
|
t.Fatal("target quarantine did not expire")
|
||||||
|
}
|
||||||
|
snapshot := health.Snapshot()
|
||||||
|
if snapshot["a"] != "ack timeout" {
|
||||||
|
t.Fatalf("snapshot = %v, want historical degraded observation", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRoutePressureDistributionVerdictDetectsCollapse(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"a", "b", "c", "d"},
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 16,
|
||||||
|
},
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{
|
||||||
|
MaxActive: map[string]int{
|
||||||
|
loadtestRouteID(0, "a"): 16,
|
||||||
|
},
|
||||||
|
MaxActiveTotal: 16,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := routePressureDistributionVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_collapsed=") {
|
||||||
|
t.Fatalf("reasons = %v, want collapsed route pressure reason", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRoutePressureDistributionVerdictDetectsSkew(t *testing.T) {
|
||||||
|
report := loadtestReport{
|
||||||
|
Config: loadtestConfig{
|
||||||
|
Targets: []string{"a", "b", "c", "d"},
|
||||||
|
FailTarget: -1,
|
||||||
|
ImpairTarget: -1,
|
||||||
|
Concurrency: 16,
|
||||||
|
},
|
||||||
|
RoutePressure: mesh.FabricRoutePressureSnapshot{
|
||||||
|
MaxActive: map[string]int{
|
||||||
|
loadtestRouteID(0, "a"): 14,
|
||||||
|
loadtestRouteID(1, "b"): 2,
|
||||||
|
loadtestRouteID(2, "c"): 2,
|
||||||
|
loadtestRouteID(3, "d"): 2,
|
||||||
|
},
|
||||||
|
MaxActiveTotal: 16,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reasons := routePressureDistributionVerdictReasons(report)
|
||||||
|
if len(reasons) != 1 || !strings.HasPrefix(reasons[0], "route_pressure_distribution_skew=") {
|
||||||
|
t.Fatalf("reasons = %v, want route pressure skew reason", reasons)
|
||||||
|
}
|
||||||
|
|
||||||
|
report.RoutePressure.MaxActive = map[string]int{
|
||||||
|
loadtestRouteID(0, "a"): 6,
|
||||||
|
loadtestRouteID(1, "b"): 6,
|
||||||
|
loadtestRouteID(2, "c"): 5,
|
||||||
|
loadtestRouteID(3, "d"): 5,
|
||||||
|
}
|
||||||
|
if reasons := routePressureDistributionVerdictReasons(report); len(reasons) != 0 {
|
||||||
|
t.Fatalf("reasons = %v, want balanced route pressure pass", reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,199 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||||
|
)
|
||||||
|
|
||||||
|
type smokeOutput struct {
|
||||||
|
OK bool `json:"ok"`
|
||||||
|
Endpoint string `json:"endpoint"`
|
||||||
|
EntryNodeID string `json:"entry_node_id"`
|
||||||
|
NextHopID string `json:"next_hop_node_id"`
|
||||||
|
RouteID string `json:"route_id"`
|
||||||
|
ElapsedMS int64 `json:"elapsed_ms"`
|
||||||
|
Result mesh.ProductionForwardResult `json:"result"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
EnvelopePath []string `json:"envelope_path,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type productionForwardResponse struct {
|
||||||
|
Result mesh.ProductionForwardResult `json:"result,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var (
|
||||||
|
endpoint = flag.String("endpoint", "", "QUIC fabric endpoint for the entry node, for example quic://host:19131.")
|
||||||
|
peerCert = flag.String("peer-cert-sha256", "", "Expected entry node QUIC TLS certificate SHA-256 fingerprint.")
|
||||||
|
clusterID = flag.String("cluster-id", "", "Cluster ID.")
|
||||||
|
routeID = flag.String("route-id", "", "Configured production route ID.")
|
||||||
|
sourceNodeID = flag.String("source-node-id", "", "Route source node ID.")
|
||||||
|
destNodeID = flag.String("destination-node-id", "", "Route destination node ID.")
|
||||||
|
currentNodeID = flag.String("current-hop-node-id", "", "Current hop node ID expected by the entry node.")
|
||||||
|
nextHopNodeID = flag.String("next-hop-node-id", "", "Next hop node ID from the entry node.")
|
||||||
|
routePath = flag.String("route-path", "", "Comma-separated route path.")
|
||||||
|
channel = flag.String("channel", mesh.ProductionChannelFabricControl, "Production channel class.")
|
||||||
|
timeout = flag.Duration("timeout", 10*time.Second, "Smoke request timeout.")
|
||||||
|
payloadText = flag.String("payload", `{"kind":"fabric-production-smoke"}`, "JSON payload string.")
|
||||||
|
payloadB64 = flag.String("payload-b64", "", "Base64-encoded JSON payload string.")
|
||||||
|
)
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if *endpoint == "" || *clusterID == "" || *routeID == "" || *sourceNodeID == "" || *destNodeID == "" || *currentNodeID == "" || *nextHopNodeID == "" {
|
||||||
|
writeOutput(smokeOutput{OK: false, Error: "endpoint, cluster-id, route-id, source-node-id, destination-node-id, current-hop-node-id and next-hop-node-id are required"})
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
path := splitRoutePath(*routePath)
|
||||||
|
payloadSource := strings.TrimSpace(*payloadText)
|
||||||
|
if strings.TrimSpace(*payloadB64) != "" {
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(*payloadB64))
|
||||||
|
if err != nil {
|
||||||
|
writeOutput(smokeOutput{OK: false, Error: "payload-b64 must be valid base64"})
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
payloadSource = string(decoded)
|
||||||
|
}
|
||||||
|
payload := json.RawMessage(strings.TrimSpace(payloadSource))
|
||||||
|
if !json.Valid(payload) {
|
||||||
|
writeOutput(smokeOutput{OK: false, Error: "payload must be valid JSON"})
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
messageType := mesh.ProductionMessageFabricControl
|
||||||
|
if strings.TrimSpace(*channel) == mesh.ProductionChannelVPNPacket {
|
||||||
|
messageType = mesh.ProductionMessageVPNPacketBatch
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256(payload)
|
||||||
|
envelope := mesh.ProductionEnvelope{
|
||||||
|
FabricProtocolVersion: mesh.ProtocolVersion,
|
||||||
|
MessageID: fmt.Sprintf("fabric-production-smoke-%d", now.UnixNano()),
|
||||||
|
RouteID: strings.TrimSpace(*routeID),
|
||||||
|
ClusterID: strings.TrimSpace(*clusterID),
|
||||||
|
SourceNodeID: strings.TrimSpace(*sourceNodeID),
|
||||||
|
DestinationNodeID: strings.TrimSpace(*destNodeID),
|
||||||
|
CurrentHopNodeID: strings.TrimSpace(*currentNodeID),
|
||||||
|
NextHopNodeID: strings.TrimSpace(*nextHopNodeID),
|
||||||
|
RoutePath: path,
|
||||||
|
ChannelClass: strings.TrimSpace(*channel),
|
||||||
|
MessageType: messageType,
|
||||||
|
TTL: 8,
|
||||||
|
HopCount: 0,
|
||||||
|
CreatedAt: now,
|
||||||
|
ExpiresAt: now.Add(time.Minute),
|
||||||
|
PayloadLength: len(payload),
|
||||||
|
PayloadHash: hex.EncodeToString(sum[:]),
|
||||||
|
Payload: payload,
|
||||||
|
}
|
||||||
|
|
||||||
|
transport := mesh.NewQUICFabricTransport(nil)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), *timeout)
|
||||||
|
defer cancel()
|
||||||
|
started := time.Now()
|
||||||
|
result, err := sendProductionEnvelope(ctx, transport, mesh.FabricTransportTarget{
|
||||||
|
EndpointID: "fabric-production-smoke-entry",
|
||||||
|
PeerID: envelope.CurrentHopNodeID,
|
||||||
|
Endpoint: strings.TrimSpace(*endpoint),
|
||||||
|
Transport: "quic",
|
||||||
|
PeerCertSHA256: strings.TrimSpace(*peerCert),
|
||||||
|
Timeout: *timeout,
|
||||||
|
InboundBuffer: 8,
|
||||||
|
ErrorBuffer: 4,
|
||||||
|
}, envelope)
|
||||||
|
output := smokeOutput{
|
||||||
|
OK: err == nil && result.Accepted,
|
||||||
|
Endpoint: *endpoint,
|
||||||
|
EntryNodeID: envelope.CurrentHopNodeID,
|
||||||
|
NextHopID: envelope.NextHopNodeID,
|
||||||
|
RouteID: envelope.RouteID,
|
||||||
|
ElapsedMS: time.Since(started).Milliseconds(),
|
||||||
|
Result: result,
|
||||||
|
EnvelopePath: path,
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
output.Error = err.Error()
|
||||||
|
writeOutput(output)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
writeOutput(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendProductionEnvelope(ctx context.Context, transport *mesh.QUICFabricTransport, target mesh.FabricTransportTarget, envelope mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
|
||||||
|
session, err := transport.Connect(ctx, target)
|
||||||
|
if err != nil {
|
||||||
|
return mesh.ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
defer session.Close()
|
||||||
|
payload, err := json.Marshal(envelope)
|
||||||
|
if err != nil {
|
||||||
|
return mesh.ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: mesh.ProductionForwardQUICStreamID,
|
||||||
|
Sequence: 1,
|
||||||
|
Payload: payload,
|
||||||
|
}); err != nil {
|
||||||
|
return mesh.ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return mesh.ProductionForwardResult{}, ctx.Err()
|
||||||
|
case err := <-session.Errors():
|
||||||
|
if err != nil {
|
||||||
|
return mesh.ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
case frame := <-session.Frames():
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != mesh.ProductionForwardQUICStreamID || frame.Sequence != 1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var response productionForwardResponse
|
||||||
|
if err := json.Unmarshal(frame.Payload, &response); err != nil {
|
||||||
|
return mesh.ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(response.Error) != "" {
|
||||||
|
return mesh.ProductionForwardResult{}, errors.New(response.Error)
|
||||||
|
}
|
||||||
|
return response.Result, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitRoutePath(value string) []string {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
parts := strings.Split(value, ",")
|
||||||
|
out := make([]string, 0, len(parts))
|
||||||
|
for _, part := range parts {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if part != "" {
|
||||||
|
out = append(out, part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeOutput(output smokeOutput) {
|
||||||
|
payload, err := json.MarshalIndent(output, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "marshal smoke output: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println(string(payload))
|
||||||
|
}
|
||||||
@@ -28,6 +28,18 @@ type smokeNode struct {
|
|||||||
server *httptest.Server
|
server *httptest.Server
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type smokeSyntheticTransport struct {
|
||||||
|
peers map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t smokeSyntheticTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope mesh.SyntheticEnvelope) (mesh.SyntheticEnvelope, error) {
|
||||||
|
baseURL := t.peers[nextNodeID]
|
||||||
|
if baseURL == "" {
|
||||||
|
return mesh.SyntheticEnvelope{}, mesh.ErrSyntheticPeerUnavailable
|
||||||
|
}
|
||||||
|
return mesh.NewClient(baseURL).SendSynthetic(ctx, envelope)
|
||||||
|
}
|
||||||
|
|
||||||
type smokeReport struct {
|
type smokeReport struct {
|
||||||
Stage string `json:"stage"`
|
Stage string `json:"stage"`
|
||||||
ProductionForwarding bool `json:"production_forwarding"`
|
ProductionForwarding bool `json:"production_forwarding"`
|
||||||
@@ -433,7 +445,7 @@ func writeSmokeScopedConfig(local mesh.PeerIdentity, peers map[string]string, ro
|
|||||||
func newSmokeNode(local mesh.PeerIdentity) *smokeNode {
|
func newSmokeNode(local mesh.PeerIdentity) *smokeNode {
|
||||||
node := &smokeNode{Local: local}
|
node := &smokeNode{Local: local}
|
||||||
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime, FabricSessionEnabled: true}.Handler().ServeHTTP(w, r)
|
mesh.Server{Local: node.Local, SyntheticRuntime: node.Runtime, FabricSessionEnabled: true, FabricSessionWebSocketEnabled: true}.Handler().ServeHTTP(w, r)
|
||||||
}))
|
}))
|
||||||
node.URL = node.server.URL
|
node.URL = node.server.URL
|
||||||
return node
|
return node
|
||||||
@@ -454,7 +466,7 @@ func smokeRuntime(local mesh.PeerIdentity, routes []mesh.SyntheticRoute, peers m
|
|||||||
mesh.SyntheticChannelFabricControl,
|
mesh.SyntheticChannelFabricControl,
|
||||||
mesh.SyntheticChannelRouteControl,
|
mesh.SyntheticChannelRouteControl,
|
||||||
},
|
},
|
||||||
Transport: mesh.NewHTTPPeerTransport(peers),
|
Transport: smokeSyntheticTransport{peers: peers},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ func runInstallLinux(ctx context.Context, args []string) error {
|
|||||||
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||||
@@ -230,7 +230,7 @@ func runInstallLinux(ctx context.Context, args []string) error {
|
|||||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "direct_http"), "Advertised transport.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "linux"), "Region/site hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "linux"), "Region/site hint.")
|
||||||
@@ -305,7 +305,7 @@ func runInstallWindows(ctx context.Context, args []string) error {
|
|||||||
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
fs.BoolVar(&cfg.RuntimeConfig.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", true), "Enable synthetic mesh runtime.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
fs.BoolVar(&cfg.RuntimeConfig.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||||
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
fs.BoolVar(&cfg.RuntimeConfig.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||||
@@ -318,7 +318,7 @@ func runInstallWindows(ctx context.Context, args []string) error {
|
|||||||
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
fs.IntVar(&cfg.RuntimeConfig.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "direct_http"), "Advertised transport.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", "outbound_only"), "Connectivity mode hint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint.")
|
||||||
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "windows"), "Region/site hint.")
|
fs.StringVar(&cfg.RuntimeConfig.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", "windows"), "Region/site hint.")
|
||||||
@@ -799,7 +799,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
|||||||
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
|
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
|
||||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint.")
|
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getenvBool("RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint.")
|
||||||
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getenvBool("RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric sessions.")
|
||||||
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getenvBool("RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener.")
|
||||||
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getenv("RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "QUIC/UDP fabric listen address.")
|
||||||
@@ -812,7 +812,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
|||||||
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 0), "Last port used when mesh listen port mode is auto.")
|
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getenvInt("RAP_MESH_LISTEN_AUTO_PORT_END", 0), "Last port used when mesh listen port mode is auto.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getenv("RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getenv("RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "Advertised endpoint candidates JSON.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", ""), "Advertised transport.")
|
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getenv("RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Advertised transport.")
|
||||||
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", ""), "Connectivity mode hint.")
|
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getenv("RAP_MESH_CONNECTIVITY_MODE", ""), "Connectivity mode hint.")
|
||||||
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", ""), "NAT type hint.")
|
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getenv("RAP_MESH_NAT_TYPE", ""), "NAT type hint.")
|
||||||
fs.StringVar(&cfg.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint.")
|
fs.StringVar(&cfg.MeshRegion, "mesh-region", getenv("RAP_MESH_REGION", ""), "Region/site hint.")
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,15 +2,21 @@ module github.com/example/remote-access-platform/agents/rap-node-agent
|
|||||||
|
|
||||||
go 1.25.5
|
go 1.25.5
|
||||||
|
|
||||||
require golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
|
require (
|
||||||
|
github.com/gorilla/websocket v1.5.3
|
||||||
|
github.com/quic-go/quic-go v0.59.1
|
||||||
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/gorilla/websocket v1.5.3 // indirect
|
golang.org/x/crypto v0.51.0 // indirect
|
||||||
github.com/quic-go/quic-go v0.59.1 // indirect
|
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d // indirect
|
||||||
golang.org/x/crypto v0.50.0 // indirect
|
golang.org/x/mod v0.36.0 // indirect
|
||||||
golang.org/x/net v0.53.0 // indirect
|
golang.org/x/net v0.54.0 // indirect
|
||||||
golang.org/x/sys v0.43.0 // indirect
|
golang.org/x/sync v0.20.0 // indirect
|
||||||
|
golang.org/x/sys v0.44.0 // indirect
|
||||||
golang.org/x/time v0.15.0 // indirect
|
golang.org/x/time v0.15.0 // indirect
|
||||||
|
golang.org/x/tools v0.45.0 // indirect
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
|
||||||
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 // indirect
|
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,20 +1,38 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/quic-go/quic-go v0.59.1 h1:0Gmua0HW1Tv7ANR7hUYwRyD0MG5OJfgvYSZasGZzBic=
|
github.com/quic-go/quic-go v0.59.1 h1:0Gmua0HW1Tv7ANR7hUYwRyD0MG5OJfgvYSZasGZzBic=
|
||||||
github.com/quic-go/quic-go v0.59.1/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
|
github.com/quic-go/quic-go v0.59.1/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
|
||||||
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
|
go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
|
||||||
|
go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
|
||||||
|
golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI=
|
||||||
|
golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8=
|
||||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
|
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
|
||||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
|
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
|
||||||
golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
|
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d h1:XNPSUMmnREiyj6HdYfJjTJVQIC5c1b3+qV7mbxUjzwk=
|
||||||
golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
|
golang.org/x/mobile v0.0.0-20260514233045-7de0a8fa7f4d/go.mod h1:ltIbhcRzKgwHa4ZxKJeiv0nyzcXUUYCqMyO0Y+vPmXw=
|
||||||
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
|
golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
|
||||||
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
|
||||||
|
golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w=
|
||||||
|
golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ=
|
||||||
|
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||||
|
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||||
|
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
||||||
|
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
|
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
|
||||||
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
|
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
|
||||||
|
golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
|
||||||
|
golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 h1:YUPk0vGbex2+Jk7XXIgLIPG6oEAD9ml0x7wd6i/bmA4=
|
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943 h1:YUPk0vGbex2+Jk7XXIgLIPG6oEAD9ml0x7wd6i/bmA4=
|
||||||
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943/go.mod h1:xQ2PWgHmWJA/Ph4i1q1jBm39BKhc3W0DXqWoDSyuBOY=
|
gvisor.dev/gvisor v0.0.0-20260505022556-2306ef3db943/go.mod h1:xQ2PWgHmWJA/Ph4i1q1jBm39BKhc3W0DXqWoDSyuBOY=
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Version = "0.2.280-fabricsession"
|
const Version = "0.2.309-latencyaware"
|
||||||
|
|
||||||
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
|
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
|
||||||
return client.EnrollRequest{
|
return client.EnrollRequest{
|
||||||
@@ -38,9 +38,12 @@ func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) cli
|
|||||||
"vpn_local_gateway_shortcut": false,
|
"vpn_local_gateway_shortcut": false,
|
||||||
"vpn_farm_owned_dataplane": true,
|
"vpn_farm_owned_dataplane": true,
|
||||||
"fabric_data_session_v1": true,
|
"fabric_data_session_v1": true,
|
||||||
"fabric_session_websocket_smoke": true,
|
"fabric_session_quic_smoke": true,
|
||||||
"vpn_backend_relay_fallback": false,
|
"vpn_backend_relay_fallback": false,
|
||||||
"fabric_service_channel_required": true,
|
"fabric_service_channel_required": true,
|
||||||
|
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
|
||||||
|
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
|
||||||
|
"web_ingress_runtime_enabled": false,
|
||||||
"external_backend_entry_proxy": true,
|
"external_backend_entry_proxy": true,
|
||||||
},
|
},
|
||||||
ReportedFacts: map[string]any{
|
ReportedFacts: map[string]any{
|
||||||
@@ -67,9 +70,12 @@ func HeartbeatPayload() client.HeartbeatRequest {
|
|||||||
"vpn_local_gateway_shortcut": false,
|
"vpn_local_gateway_shortcut": false,
|
||||||
"vpn_farm_owned_dataplane": true,
|
"vpn_farm_owned_dataplane": true,
|
||||||
"fabric_data_session_v1": true,
|
"fabric_data_session_v1": true,
|
||||||
"fabric_session_websocket_smoke": true,
|
"fabric_session_quic_smoke": true,
|
||||||
"vpn_backend_relay_fallback": false,
|
"vpn_backend_relay_fallback": false,
|
||||||
"fabric_service_channel_required": true,
|
"fabric_service_channel_required": true,
|
||||||
|
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
|
||||||
|
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
|
||||||
|
"web_ingress_runtime_enabled": false,
|
||||||
"external_backend_entry_proxy": true,
|
"external_backend_entry_proxy": true,
|
||||||
},
|
},
|
||||||
ServiceStates: map[string]any{
|
ServiceStates: map[string]any{
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ import (
|
|||||||
const (
|
const (
|
||||||
AuthoritySchemaVersion = "rap.cluster_authority.v1"
|
AuthoritySchemaVersion = "rap.cluster_authority.v1"
|
||||||
SignatureSchemaVersion = "rap.cluster_authority.signature.v1"
|
SignatureSchemaVersion = "rap.cluster_authority.signature.v1"
|
||||||
|
QuorumSchemaVersion = "rap.cluster_authority.quorum.v1"
|
||||||
|
QuorumEnvelopeVersion = "rap.cluster_authority.quorum_envelope.v1"
|
||||||
AlgorithmEd25519 = "ed25519"
|
AlgorithmEd25519 = "ed25519"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,6 +32,34 @@ type Signature struct {
|
|||||||
Signature string `json:"signature"`
|
Signature string `json:"signature"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type QuorumMember struct {
|
||||||
|
NodeID string `json:"node_id,omitempty"`
|
||||||
|
Role string `json:"role,omitempty"`
|
||||||
|
PublicKey string `json:"public_key"`
|
||||||
|
PublicKeyFingerprint string `json:"public_key_fingerprint"`
|
||||||
|
Scopes []string `json:"scopes,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type QuorumDescriptor struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
ClusterID string `json:"cluster_id"`
|
||||||
|
Epoch string `json:"epoch"`
|
||||||
|
Threshold int `json:"threshold"`
|
||||||
|
Members []QuorumMember `json:"members"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type QuorumEnvelope struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
ClusterID string `json:"cluster_id"`
|
||||||
|
Epoch string `json:"epoch"`
|
||||||
|
Threshold int `json:"threshold"`
|
||||||
|
PayloadSHA256 string `json:"payload_sha256"`
|
||||||
|
QuorumSHA256 string `json:"quorum_sha256"`
|
||||||
|
Signatures []Signature `json:"signatures"`
|
||||||
|
AllowedScopes []string `json:"allowed_scopes,omitempty"`
|
||||||
|
DecisionReason string `json:"decision_reason,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature) error {
|
func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature) error {
|
||||||
if signature.SchemaVersion != SignatureSchemaVersion {
|
if signature.SchemaVersion != SignatureSchemaVersion {
|
||||||
return fmt.Errorf("%w: schema_version must be %s", ErrInvalidSignature, SignatureSchemaVersion)
|
return fmt.Errorf("%w: schema_version must be %s", ErrInvalidSignature, SignatureSchemaVersion)
|
||||||
@@ -58,6 +88,86 @@ func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func VerifyQuorumRaw(descriptor QuorumDescriptor, payload json.RawMessage, envelope QuorumEnvelope, requiredScope string) error {
|
||||||
|
if descriptor.SchemaVersion != QuorumSchemaVersion {
|
||||||
|
return fmt.Errorf("%w: quorum schema_version must be %s", ErrInvalidSignature, QuorumSchemaVersion)
|
||||||
|
}
|
||||||
|
if envelope.SchemaVersion != QuorumEnvelopeVersion {
|
||||||
|
return fmt.Errorf("%w: quorum envelope schema_version must be %s", ErrInvalidSignature, QuorumEnvelopeVersion)
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(descriptor.ClusterID) == "" || descriptor.ClusterID != envelope.ClusterID {
|
||||||
|
return fmt.Errorf("%w: quorum cluster mismatch", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(descriptor.Epoch) == "" || descriptor.Epoch != envelope.Epoch {
|
||||||
|
return fmt.Errorf("%w: quorum epoch mismatch", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
threshold := descriptor.Threshold
|
||||||
|
if envelope.Threshold > threshold {
|
||||||
|
threshold = envelope.Threshold
|
||||||
|
}
|
||||||
|
if threshold <= 0 || threshold > len(descriptor.Members) {
|
||||||
|
return fmt.Errorf("%w: invalid quorum threshold", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
payloadHash, err := HashRaw(payload)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if envelope.PayloadSHA256 != payloadHash {
|
||||||
|
return fmt.Errorf("%w: quorum payload hash mismatch", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
descriptorHash, err := HashRaw(mustMarshalQuorumDescriptor(descriptor))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if envelope.QuorumSHA256 != descriptorHash {
|
||||||
|
return fmt.Errorf("%w: quorum descriptor hash mismatch", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
members := map[string]QuorumMember{}
|
||||||
|
for _, member := range descriptor.Members {
|
||||||
|
fingerprint := strings.TrimSpace(member.PublicKeyFingerprint)
|
||||||
|
if fingerprint == "" {
|
||||||
|
publicKey, err := decodePublicKey(member.PublicKey)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fingerprint = Fingerprint(publicKey)
|
||||||
|
}
|
||||||
|
if _, exists := members[fingerprint]; exists {
|
||||||
|
return fmt.Errorf("%w: duplicate quorum member", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
member.PublicKeyFingerprint = fingerprint
|
||||||
|
members[fingerprint] = member
|
||||||
|
}
|
||||||
|
seen := map[string]bool{}
|
||||||
|
valid := 0
|
||||||
|
for _, signature := range envelope.Signatures {
|
||||||
|
fingerprint := strings.TrimSpace(signature.KeyFingerprint)
|
||||||
|
if seen[fingerprint] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
member, ok := members[fingerprint]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("%w: quorum signer is not a member", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
if requiredScope != "" && !memberAllowsScope(member, requiredScope) {
|
||||||
|
return fmt.Errorf("%w: quorum signer scope mismatch", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
if err := VerifyRaw(member.PublicKey, payload, signature); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
seen[fingerprint] = true
|
||||||
|
valid++
|
||||||
|
}
|
||||||
|
if valid < threshold {
|
||||||
|
return fmt.Errorf("%w: quorum threshold not met", ErrInvalidSignature)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func QuorumDescriptorHash(descriptor QuorumDescriptor) (string, error) {
|
||||||
|
return HashRaw(mustMarshalQuorumDescriptor(descriptor))
|
||||||
|
}
|
||||||
|
|
||||||
func Fingerprint(publicKey ed25519.PublicKey) string {
|
func Fingerprint(publicKey ed25519.PublicKey) string {
|
||||||
sum := sha256.Sum256(publicKey)
|
sum := sha256.Sum256(publicKey)
|
||||||
return "rap-ca-ed25519-" + hex.EncodeToString(sum[:16])
|
return "rap-ca-ed25519-" + hex.EncodeToString(sum[:16])
|
||||||
@@ -72,6 +182,28 @@ func HashRaw(raw json.RawMessage) (string, error) {
|
|||||||
return hex.EncodeToString(sum[:]), nil
|
return hex.EncodeToString(sum[:]), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mustMarshalQuorumDescriptor(descriptor QuorumDescriptor) json.RawMessage {
|
||||||
|
raw, err := json.Marshal(descriptor)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return raw
|
||||||
|
}
|
||||||
|
|
||||||
|
func memberAllowsScope(member QuorumMember, requiredScope string) bool {
|
||||||
|
requiredScope = strings.TrimSpace(requiredScope)
|
||||||
|
if requiredScope == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, scope := range member.Scopes {
|
||||||
|
scope = strings.TrimSpace(scope)
|
||||||
|
if scope == "*" || scope == requiredScope {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func CanonicalJSON(raw json.RawMessage) ([]byte, error) {
|
func CanonicalJSON(raw json.RawMessage) ([]byte, error) {
|
||||||
if len(raw) == 0 {
|
if len(raw) == 0 {
|
||||||
return nil, fmt.Errorf("%w: empty payload", ErrInvalidPayload)
|
return nil, fmt.Errorf("%w: empty payload", ErrInvalidPayload)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,3 +51,114 @@ func TestVerifyRawRejectsTamperedPayload(t *testing.T) {
|
|||||||
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestVerifyQuorumRawAcceptsThreshold(t *testing.T) {
|
||||||
|
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||||
|
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||||
|
payloadHash, err := HashRaw(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("payload hash: %v", err)
|
||||||
|
}
|
||||||
|
quorumHash, err := QuorumDescriptorHash(descriptor)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("quorum hash: %v", err)
|
||||||
|
}
|
||||||
|
envelope := QuorumEnvelope{
|
||||||
|
SchemaVersion: QuorumEnvelopeVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: 2,
|
||||||
|
PayloadSHA256: payloadHash,
|
||||||
|
QuorumSHA256: quorumHash,
|
||||||
|
Signatures: []Signature{
|
||||||
|
signTestPayload(t, payload, privateKeys[0]),
|
||||||
|
signTestPayload(t, payload, privateKeys[1]),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); err != nil {
|
||||||
|
t.Fatalf("VerifyQuorumRaw: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyQuorumRawRejectsBelowThreshold(t *testing.T) {
|
||||||
|
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||||
|
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||||
|
payloadHash, _ := HashRaw(payload)
|
||||||
|
quorumHash, _ := QuorumDescriptorHash(descriptor)
|
||||||
|
envelope := QuorumEnvelope{
|
||||||
|
SchemaVersion: QuorumEnvelopeVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: 2,
|
||||||
|
PayloadSHA256: payloadHash,
|
||||||
|
QuorumSHA256: quorumHash,
|
||||||
|
Signatures: []Signature{signTestPayload(t, payload, privateKeys[0])},
|
||||||
|
}
|
||||||
|
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
|
||||||
|
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyQuorumRawRejectsTamperedDescriptor(t *testing.T) {
|
||||||
|
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||||
|
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||||
|
payloadHash, _ := HashRaw(payload)
|
||||||
|
quorumHash, _ := QuorumDescriptorHash(descriptor)
|
||||||
|
descriptor.Threshold = 1
|
||||||
|
envelope := QuorumEnvelope{
|
||||||
|
SchemaVersion: QuorumEnvelopeVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: 2,
|
||||||
|
PayloadSHA256: payloadHash,
|
||||||
|
QuorumSHA256: quorumHash,
|
||||||
|
Signatures: []Signature{
|
||||||
|
signTestPayload(t, payload, privateKeys[0]),
|
||||||
|
signTestPayload(t, payload, privateKeys[1]),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
|
||||||
|
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testQuorumDescriptor(t *testing.T, members int, threshold int) (QuorumDescriptor, []ed25519.PrivateKey) {
|
||||||
|
t.Helper()
|
||||||
|
descriptor := QuorumDescriptor{
|
||||||
|
SchemaVersion: QuorumSchemaVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: threshold,
|
||||||
|
}
|
||||||
|
privateKeys := make([]ed25519.PrivateKey, 0, members)
|
||||||
|
for i := 0; i < members; i++ {
|
||||||
|
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GenerateKey: %v", err)
|
||||||
|
}
|
||||||
|
descriptor.Members = append(descriptor.Members, QuorumMember{
|
||||||
|
NodeID: fmt.Sprintf("authority-%d", i+1),
|
||||||
|
Role: "update-authority",
|
||||||
|
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||||
|
PublicKeyFingerprint: Fingerprint(publicKey),
|
||||||
|
Scopes: []string{"update-authority"},
|
||||||
|
})
|
||||||
|
privateKeys = append(privateKeys, privateKey)
|
||||||
|
}
|
||||||
|
return descriptor, privateKeys
|
||||||
|
}
|
||||||
|
|
||||||
|
func signTestPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) Signature {
|
||||||
|
t.Helper()
|
||||||
|
canonical, err := CanonicalJSON(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CanonicalJSON: %v", err)
|
||||||
|
}
|
||||||
|
publicKey := privateKey.Public().(ed25519.PublicKey)
|
||||||
|
return Signature{
|
||||||
|
SchemaVersion: SignatureSchemaVersion,
|
||||||
|
Algorithm: AlgorithmEd25519,
|
||||||
|
KeyFingerprint: Fingerprint(publicKey),
|
||||||
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,6 +18,17 @@ type Client struct {
|
|||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RawControlRequest struct {
|
||||||
|
Method string `json:"method"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
Body json.RawMessage `json:"body,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type RawControlResponse struct {
|
||||||
|
StatusCode int `json:"status_code"`
|
||||||
|
Body json.RawMessage `json:"body,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type EnrollRequest struct {
|
type EnrollRequest struct {
|
||||||
ClusterID string `json:"cluster_id"`
|
ClusterID string `json:"cluster_id"`
|
||||||
JoinToken string `json:"join_token"`
|
JoinToken string `json:"join_token"`
|
||||||
@@ -52,6 +64,7 @@ type NodeBootstrap struct {
|
|||||||
Certificate map[string]any `json:"certificate"`
|
Certificate map[string]any `json:"certificate"`
|
||||||
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
|
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
|
||||||
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
|
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
|
||||||
|
ClusterAuthorityQuorum json.RawMessage `json:"cluster_authority_quorum,omitempty"`
|
||||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||||
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -123,6 +136,7 @@ type NodeUpdatePlan struct {
|
|||||||
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
||||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||||
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
||||||
|
AuthorityQuorum *QuorumEnvelope `json:"authority_quorum,omitempty"`
|
||||||
ProductionForwarding bool `json:"production_forwarding"`
|
ProductionForwarding bool `json:"production_forwarding"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -293,6 +307,26 @@ type SyntheticMeshConfig struct {
|
|||||||
ProductionForwarding bool `json:"production_forwarding"`
|
ProductionForwarding bool `json:"production_forwarding"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AdminRuntimeProjectionRequest struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
Method string `json:"method"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
Query string `json:"query,omitempty"`
|
||||||
|
Host string `json:"host,omitempty"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
ServiceClass string `json:"service_class"`
|
||||||
|
ObservedAt string `json:"observed_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AdminRuntimeProjectionResponse struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
Reason string `json:"reason,omitempty"`
|
||||||
|
StatusCode int `json:"status_code"`
|
||||||
|
Headers map[string]string `json:"headers,omitempty"`
|
||||||
|
Body json.RawMessage `json:"body,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
func (c *SyntheticMeshConfig) UnmarshalJSON(data []byte) error {
|
func (c *SyntheticMeshConfig) UnmarshalJSON(data []byte) error {
|
||||||
type syntheticMeshConfigAlias SyntheticMeshConfig
|
type syntheticMeshConfigAlias SyntheticMeshConfig
|
||||||
var decoded syntheticMeshConfigAlias
|
var decoded syntheticMeshConfigAlias
|
||||||
@@ -448,6 +482,18 @@ type ClusterSignature struct {
|
|||||||
SignedAt time.Time `json:"signed_at"`
|
SignedAt time.Time `json:"signed_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type QuorumEnvelope struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
ClusterID string `json:"cluster_id"`
|
||||||
|
Epoch string `json:"epoch"`
|
||||||
|
Threshold int `json:"threshold"`
|
||||||
|
PayloadSHA256 string `json:"payload_sha256"`
|
||||||
|
QuorumSHA256 string `json:"quorum_sha256"`
|
||||||
|
Signatures []ClusterSignature `json:"signatures"`
|
||||||
|
AllowedScopes []string `json:"allowed_scopes,omitempty"`
|
||||||
|
DecisionReason string `json:"decision_reason,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type PeerDirectoryEntry struct {
|
type PeerDirectoryEntry struct {
|
||||||
NodeID string `json:"node_id"`
|
NodeID string `json:"node_id"`
|
||||||
RouteIDs []string `json:"route_ids,omitempty"`
|
RouteIDs []string `json:"route_ids,omitempty"`
|
||||||
@@ -744,6 +790,50 @@ func (c *Client) SyntheticMeshConfig(ctx context.Context, clusterID, nodeID stri
|
|||||||
return response.Config, nil
|
return response.Config, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) AdminRuntimeProjection(ctx context.Context, clusterID, nodeID string, request AdminRuntimeProjectionRequest) (AdminRuntimeProjectionResponse, error) {
|
||||||
|
var response AdminRuntimeProjectionResponse
|
||||||
|
path := fmt.Sprintf("/clusters/%s/nodes/%s/admin-runtime/projection", clusterID, nodeID)
|
||||||
|
if err := c.postJSON(ctx, path, request, &response); err != nil {
|
||||||
|
return AdminRuntimeProjectionResponse{}, err
|
||||||
|
}
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) RawControl(ctx context.Context, request RawControlRequest) (RawControlResponse, error) {
|
||||||
|
method := strings.ToUpper(strings.TrimSpace(request.Method))
|
||||||
|
if method == "" {
|
||||||
|
method = http.MethodGet
|
||||||
|
}
|
||||||
|
path := strings.TrimSpace(request.Path)
|
||||||
|
if !strings.HasPrefix(path, "/") {
|
||||||
|
return RawControlResponse{}, fmt.Errorf("control path must be relative")
|
||||||
|
}
|
||||||
|
var body io.Reader
|
||||||
|
if len(request.Body) > 0 && string(request.Body) != "null" {
|
||||||
|
body = bytes.NewReader(request.Body)
|
||||||
|
}
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
|
||||||
|
if err != nil {
|
||||||
|
return RawControlResponse{}, err
|
||||||
|
}
|
||||||
|
if body != nil {
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
}
|
||||||
|
httpResp, err := c.httpClient.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return RawControlResponse{}, err
|
||||||
|
}
|
||||||
|
defer httpResp.Body.Close()
|
||||||
|
payload, err := io.ReadAll(io.LimitReader(httpResp.Body, 2*1024*1024))
|
||||||
|
if err != nil {
|
||||||
|
return RawControlResponse{}, err
|
||||||
|
}
|
||||||
|
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||||
|
return RawControlResponse{}, fmt.Errorf("backend returned status %d: %s", httpResp.StatusCode, string(payload))
|
||||||
|
}
|
||||||
|
return RawControlResponse{StatusCode: httpResp.StatusCode, Body: json.RawMessage(payload)}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Client) getJSON(ctx context.Context, path string, response any) error {
|
func (c *Client) getJSON(ctx context.Context, path string, response any) error {
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -21,6 +21,11 @@ type Config struct {
|
|||||||
NodeName string
|
NodeName string
|
||||||
StateDir string
|
StateDir string
|
||||||
WorkloadSupervisionEnabled bool
|
WorkloadSupervisionEnabled bool
|
||||||
|
WebIngressRuntimeEnabled bool
|
||||||
|
WebIngressSigningPrivateKey string
|
||||||
|
WebIngressSigningKeyID string
|
||||||
|
WebIngressTrustedKeysJSON string
|
||||||
|
WebIngressRuntimeServiceClasses string
|
||||||
HeartbeatInterval time.Duration
|
HeartbeatInterval time.Duration
|
||||||
EnrollmentPollInterval time.Duration
|
EnrollmentPollInterval time.Duration
|
||||||
EnrollmentPollTimeout time.Duration
|
EnrollmentPollTimeout time.Duration
|
||||||
@@ -43,6 +48,12 @@ type Config struct {
|
|||||||
MeshAdvertiseTransport string
|
MeshAdvertiseTransport string
|
||||||
MeshConnectivityMode string
|
MeshConnectivityMode string
|
||||||
MeshNATType string
|
MeshNATType string
|
||||||
|
MeshLocalSegmentID string
|
||||||
|
MeshNATGroupID string
|
||||||
|
MeshSTUNReflexiveEndpoint string
|
||||||
|
MeshSTUNServer string
|
||||||
|
MeshRelayNodeID string
|
||||||
|
MeshRelayEndpoint string
|
||||||
MeshRegion string
|
MeshRegion string
|
||||||
MeshSyntheticConfigPath string
|
MeshSyntheticConfigPath string
|
||||||
MeshPeerEndpointsJSON string
|
MeshPeerEndpointsJSON string
|
||||||
@@ -68,9 +79,14 @@ func Load(args []string, env map[string]string) (Config, error) {
|
|||||||
fs.StringVar(&cfg.NodeName, "node-name", getEnv(env, "RAP_NODE_NAME", hostnameOrDefault()), "Node display name.")
|
fs.StringVar(&cfg.NodeName, "node-name", getEnv(env, "RAP_NODE_NAME", hostnameOrDefault()), "Node display name.")
|
||||||
fs.StringVar(&cfg.StateDir, "state-dir", getEnv(env, "RAP_NODE_STATE_DIR", defaultStateDir), "Local node-agent state directory.")
|
fs.StringVar(&cfg.StateDir, "state-dir", getEnv(env, "RAP_NODE_STATE_DIR", defaultStateDir), "Local node-agent state directory.")
|
||||||
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getEnvBool(env, "RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable desired workload polling and status reporting. Disabled by default while service runtime is not implemented.")
|
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getEnvBool(env, "RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable desired workload polling and status reporting. Disabled by default while service runtime is not implemented.")
|
||||||
|
fs.BoolVar(&cfg.WebIngressRuntimeEnabled, "web-ingress-runtime-enabled", getEnvBool(env, "RAP_WEB_INGRESS_RUNTIME_ENABLED", false), "Enable the future real 80/443 web ingress listener runtime. Disabled by default; contract probe remains safe without it.")
|
||||||
|
fs.StringVar(&cfg.WebIngressSigningPrivateKey, "web-ingress-signing-private-key", getEnv(env, "RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY", ""), "Base64 Ed25519 private key used to sign web ingress fabric envelopes. Empty keeps signing disabled.")
|
||||||
|
fs.StringVar(&cfg.WebIngressSigningKeyID, "web-ingress-signing-key-id", getEnv(env, "RAP_WEB_INGRESS_SIGNING_KEY_ID", ""), "Optional key id for web ingress envelope signatures.")
|
||||||
|
fs.StringVar(&cfg.WebIngressTrustedKeysJSON, "web-ingress-trusted-keys-json", getEnv(env, "RAP_WEB_INGRESS_TRUSTED_KEYS_JSON", ""), "JSON map or array of trusted Ed25519 public keys for web ingress runtime receiver.")
|
||||||
|
fs.StringVar(&cfg.WebIngressRuntimeServiceClasses, "web-ingress-runtime-service-classes", getEnv(env, "RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES", ""), "Optional comma-separated allow-list of web ingress runtime service classes accepted by this node.")
|
||||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getEnvBool(env, "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable C17A synthetic fabric probe runtime. Disabled by default.")
|
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getEnvBool(env, "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable C17A synthetic fabric probe runtime. Disabled by default.")
|
||||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getEnvBool(env, "RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production fabric-control direct next-hop forwarding gate. Disabled by default.")
|
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getEnvBool(env, "RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production fabric-control direct next-hop forwarding gate. Disabled by default.")
|
||||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint. Disabled by default.")
|
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint. Disabled by default.")
|
||||||
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getEnvBool(env, "RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric session when explicitly enabled. Disabled by default.")
|
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getEnvBool(env, "RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric session when explicitly enabled. Disabled by default.")
|
||||||
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getEnvBool(env, "RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener. Disabled by default.")
|
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getEnvBool(env, "RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener. Disabled by default.")
|
||||||
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getEnv(env, "RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "Listen address for QUIC/UDP fabric endpoint, for example :19443.")
|
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getEnv(env, "RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "Listen address for QUIC/UDP fabric endpoint, for example :19443.")
|
||||||
@@ -84,9 +100,15 @@ func Load(args []string, env map[string]string) (Config, error) {
|
|||||||
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint reported to the Control Plane. Empty disables endpoint reporting.")
|
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint reported to the Control Plane. Empty disables endpoint reporting.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "JSON array of advertised mesh endpoint candidates, including private/corporate endpoints.")
|
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "JSON array of advertised mesh endpoint candidates, including private/corporate endpoints.")
|
||||||
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "direct_tcp_tls"), "Transport label for the advertised mesh endpoint.")
|
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Transport label for the advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getEnv(env, "RAP_MESH_CONNECTIVITY_MODE", "direct"), "Connectivity mode reported with the advertised mesh endpoint.")
|
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getEnv(env, "RAP_MESH_CONNECTIVITY_MODE", "direct"), "Connectivity mode reported with the advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getEnv(env, "RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint reported with the advertised mesh endpoint.")
|
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getEnv(env, "RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint reported with the advertised mesh endpoint.")
|
||||||
|
fs.StringVar(&cfg.MeshLocalSegmentID, "mesh-local-segment-id", getEnv(env, "RAP_MESH_LOCAL_SEGMENT_ID", ""), "Optional local LAN/site segment ID advertised with QUIC endpoint candidates.")
|
||||||
|
fs.StringVar(&cfg.MeshNATGroupID, "mesh-nat-group-id", getEnv(env, "RAP_MESH_NAT_GROUP_ID", ""), "Optional NAT group ID advertised with QUIC endpoint candidates.")
|
||||||
|
fs.StringVar(&cfg.MeshSTUNReflexiveEndpoint, "mesh-stun-reflexive-endpoint", getEnv(env, "RAP_MESH_STUN_REFLEXIVE_ENDPOINT", ""), "Optional STUN-discovered reflexive QUIC endpoint, for example quic://203.0.113.10:19443.")
|
||||||
|
fs.StringVar(&cfg.MeshSTUNServer, "mesh-stun-server", getEnv(env, "RAP_MESH_STUN_SERVER", ""), "Optional STUN server name used to discover the reflexive endpoint.")
|
||||||
|
fs.StringVar(&cfg.MeshRelayNodeID, "mesh-relay-node-id", getEnv(env, "RAP_MESH_RELAY_NODE_ID", ""), "Optional relay node ID for relay-required QUIC fallback candidates.")
|
||||||
|
fs.StringVar(&cfg.MeshRelayEndpoint, "mesh-relay-endpoint", getEnv(env, "RAP_MESH_RELAY_ENDPOINT", ""), "Optional relay QUIC endpoint for relay-required fallback candidates.")
|
||||||
fs.StringVar(&cfg.MeshRegion, "mesh-region", getEnv(env, "RAP_MESH_REGION", ""), "Optional region/site hint for the advertised mesh endpoint.")
|
fs.StringVar(&cfg.MeshRegion, "mesh-region", getEnv(env, "RAP_MESH_REGION", ""), "Optional region/site hint for the advertised mesh endpoint.")
|
||||||
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
|
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
|
||||||
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
|
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
|
||||||
@@ -129,12 +151,27 @@ func Load(args []string, env map[string]string) (Config, error) {
|
|||||||
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
|
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
|
||||||
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
|
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
|
||||||
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
|
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
|
||||||
|
if cfg.MeshAdvertiseTransport == "" {
|
||||||
|
cfg.MeshAdvertiseTransport = "quic"
|
||||||
|
}
|
||||||
|
cfg.MeshAdvertiseTransport = normalizeLegacyAdvertiseTransport(cfg.MeshAdvertiseTransport)
|
||||||
|
cfg.MeshAdvertiseEndpoint = normalizeLegacyEndpointSchemeToQUIC(cfg.MeshAdvertiseEndpoint)
|
||||||
cfg.MeshConnectivityMode = strings.TrimSpace(cfg.MeshConnectivityMode)
|
cfg.MeshConnectivityMode = strings.TrimSpace(cfg.MeshConnectivityMode)
|
||||||
cfg.MeshNATType = strings.TrimSpace(cfg.MeshNATType)
|
cfg.MeshNATType = strings.TrimSpace(cfg.MeshNATType)
|
||||||
|
cfg.MeshLocalSegmentID = strings.TrimSpace(cfg.MeshLocalSegmentID)
|
||||||
|
cfg.MeshNATGroupID = strings.TrimSpace(cfg.MeshNATGroupID)
|
||||||
|
cfg.MeshSTUNReflexiveEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshSTUNReflexiveEndpoint), "/"))
|
||||||
|
cfg.MeshSTUNServer = strings.TrimSpace(cfg.MeshSTUNServer)
|
||||||
|
cfg.MeshRelayNodeID = strings.TrimSpace(cfg.MeshRelayNodeID)
|
||||||
|
cfg.MeshRelayEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshRelayEndpoint), "/"))
|
||||||
cfg.MeshRegion = strings.TrimSpace(cfg.MeshRegion)
|
cfg.MeshRegion = strings.TrimSpace(cfg.MeshRegion)
|
||||||
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
|
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
|
||||||
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
|
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
|
||||||
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
|
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
|
||||||
|
cfg.WebIngressSigningPrivateKey = strings.TrimSpace(cfg.WebIngressSigningPrivateKey)
|
||||||
|
cfg.WebIngressSigningKeyID = strings.TrimSpace(cfg.WebIngressSigningKeyID)
|
||||||
|
cfg.WebIngressTrustedKeysJSON = strings.TrimSpace(cfg.WebIngressTrustedKeysJSON)
|
||||||
|
cfg.WebIngressRuntimeServiceClasses = strings.TrimSpace(cfg.WebIngressRuntimeServiceClasses)
|
||||||
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
|
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
|
||||||
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
|
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
|
||||||
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
|
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
|
||||||
@@ -176,9 +213,62 @@ func Load(args []string, env map[string]string) (Config, error) {
|
|||||||
if cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
if cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
||||||
return Config{}, errors.New("mesh listen auto port start must be less than or equal to end")
|
return Config{}, errors.New("mesh listen auto port start must be less than or equal to end")
|
||||||
}
|
}
|
||||||
|
if !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
|
||||||
|
return Config{}, errors.New("mesh advertise transport must be a QUIC transport label")
|
||||||
|
}
|
||||||
|
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
|
||||||
|
return Config{}, errors.New("mesh advertise endpoint must be a QUIC endpoint")
|
||||||
|
}
|
||||||
|
if cfg.MeshSTUNReflexiveEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshSTUNReflexiveEndpoint) {
|
||||||
|
return Config{}, errors.New("mesh STUN reflexive endpoint must be a QUIC endpoint")
|
||||||
|
}
|
||||||
|
if cfg.MeshRelayEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshRelayEndpoint) {
|
||||||
|
return Config{}, errors.New("mesh relay endpoint must be a QUIC endpoint")
|
||||||
|
}
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isQUICAdvertiseTransport(label string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||||
|
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeLegacyAdvertiseTransport(label string) string {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||||
|
case "direct_http", "direct_https", "direct_tcp_tls", "http", "https", "ws", "wss", "websocket":
|
||||||
|
return "direct_quic"
|
||||||
|
case "outbound_reverse", "reverse", "reverse_outbound":
|
||||||
|
return "reverse_quic"
|
||||||
|
case "relay", "relay_control":
|
||||||
|
return "relay_quic"
|
||||||
|
default:
|
||||||
|
return strings.TrimSpace(label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeLegacyEndpointSchemeToQUIC(endpoint string) string {
|
||||||
|
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||||
|
lower := strings.ToLower(endpoint)
|
||||||
|
for _, prefix := range []string{"http://", "https://", "ws://", "wss://"} {
|
||||||
|
if strings.HasPrefix(lower, prefix) {
|
||||||
|
return "quic://" + endpoint[len(prefix):]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return endpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||||
|
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||||
|
return strings.HasPrefix(endpoint, "http://") ||
|
||||||
|
strings.HasPrefix(endpoint, "https://") ||
|
||||||
|
strings.HasPrefix(endpoint, "ws://") ||
|
||||||
|
strings.HasPrefix(endpoint, "wss://")
|
||||||
|
}
|
||||||
|
|
||||||
func readEnv() map[string]string {
|
func readEnv() map[string]string {
|
||||||
out := map[string]string{}
|
out := map[string]string{}
|
||||||
for _, pair := range os.Environ() {
|
for _, pair := range os.Environ() {
|
||||||
|
|||||||
@@ -15,6 +15,11 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
|||||||
"RAP_NODE_NAME": "node-a",
|
"RAP_NODE_NAME": "node-a",
|
||||||
"RAP_NODE_STATE_DIR": "/tmp/rap-node",
|
"RAP_NODE_STATE_DIR": "/tmp/rap-node",
|
||||||
"RAP_WORKLOAD_SUPERVISION_ENABLED": "true",
|
"RAP_WORKLOAD_SUPERVISION_ENABLED": "true",
|
||||||
|
"RAP_WEB_INGRESS_RUNTIME_ENABLED": "true",
|
||||||
|
"RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY": " private-key-b64 ",
|
||||||
|
"RAP_WEB_INGRESS_SIGNING_KEY_ID": " web-key-1 ",
|
||||||
|
"RAP_WEB_INGRESS_TRUSTED_KEYS_JSON": ` {"web-key-1":"public-key-b64"} `,
|
||||||
|
"RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES": " platform_admin, cluster_admin ",
|
||||||
"RAP_HEARTBEAT_INTERVAL_SECONDS": "7",
|
"RAP_HEARTBEAT_INTERVAL_SECONDS": "7",
|
||||||
"RAP_ENROLLMENT_POLL_INTERVAL_SECONDS": "3",
|
"RAP_ENROLLMENT_POLL_INTERVAL_SECONDS": "3",
|
||||||
"RAP_ENROLLMENT_POLL_TIMEOUT_SECONDS": "30",
|
"RAP_ENROLLMENT_POLL_TIMEOUT_SECONDS": "30",
|
||||||
@@ -32,11 +37,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
|||||||
"RAP_MESH_LISTEN_PORT_MODE": "auto",
|
"RAP_MESH_LISTEN_PORT_MODE": "auto",
|
||||||
"RAP_MESH_LISTEN_AUTO_PORT_START": "19010",
|
"RAP_MESH_LISTEN_AUTO_PORT_START": "19010",
|
||||||
"RAP_MESH_LISTEN_AUTO_PORT_END": "19020",
|
"RAP_MESH_LISTEN_AUTO_PORT_END": "19020",
|
||||||
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443/",
|
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443/",
|
||||||
"RAP_MESH_ADVERTISE_ENDPOINTS_JSON": `[{"endpoint_id":"node-a-lan","address":"10.10.0.20:19001"}]`,
|
"RAP_MESH_ADVERTISE_ENDPOINTS_JSON": `[{"endpoint_id":"node-a-lan","address":"10.10.0.20:19001"}]`,
|
||||||
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
|
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
|
||||||
"RAP_MESH_CONNECTIVITY_MODE": "outbound_only",
|
"RAP_MESH_CONNECTIVITY_MODE": "outbound_only",
|
||||||
"RAP_MESH_NAT_TYPE": "symmetric",
|
"RAP_MESH_NAT_TYPE": "symmetric",
|
||||||
|
"RAP_MESH_LOCAL_SEGMENT_ID": "site-a",
|
||||||
|
"RAP_MESH_NAT_GROUP_ID": "nat-a",
|
||||||
|
"RAP_MESH_STUN_REFLEXIVE_ENDPOINT": "quic://203.0.113.20:19443/",
|
||||||
|
"RAP_MESH_STUN_SERVER": "stun.example.test:3478",
|
||||||
|
"RAP_MESH_RELAY_NODE_ID": "node-r",
|
||||||
|
"RAP_MESH_RELAY_ENDPOINT": "quic://node-r.example.test:19443/",
|
||||||
"RAP_MESH_REGION": "eu",
|
"RAP_MESH_REGION": "eu",
|
||||||
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
|
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
|
||||||
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
|
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
|
||||||
@@ -67,6 +78,15 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
|||||||
if !cfg.WorkloadSupervisionEnabled {
|
if !cfg.WorkloadSupervisionEnabled {
|
||||||
t.Fatal("WorkloadSupervisionEnabled = false, want true")
|
t.Fatal("WorkloadSupervisionEnabled = false, want true")
|
||||||
}
|
}
|
||||||
|
if !cfg.WebIngressRuntimeEnabled {
|
||||||
|
t.Fatal("WebIngressRuntimeEnabled = false, want true")
|
||||||
|
}
|
||||||
|
if cfg.WebIngressSigningPrivateKey != "private-key-b64" ||
|
||||||
|
cfg.WebIngressSigningKeyID != "web-key-1" ||
|
||||||
|
cfg.WebIngressTrustedKeysJSON != `{"web-key-1":"public-key-b64"}` ||
|
||||||
|
cfg.WebIngressRuntimeServiceClasses != "platform_admin, cluster_admin" {
|
||||||
|
t.Fatalf("unexpected web ingress key config: %+v", cfg)
|
||||||
|
}
|
||||||
if !cfg.MeshSyntheticRuntimeEnabled {
|
if !cfg.MeshSyntheticRuntimeEnabled {
|
||||||
t.Fatal("MeshSyntheticRuntimeEnabled = false, want true")
|
t.Fatal("MeshSyntheticRuntimeEnabled = false, want true")
|
||||||
}
|
}
|
||||||
@@ -100,11 +120,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
|||||||
if cfg.MeshListenPortMode != "auto" || cfg.MeshListenAutoPortStart != 19010 || cfg.MeshListenAutoPortEnd != 19020 {
|
if cfg.MeshListenPortMode != "auto" || cfg.MeshListenAutoPortStart != 19010 || cfg.MeshListenAutoPortEnd != 19020 {
|
||||||
t.Fatalf("unexpected mesh listen port config: %+v", cfg)
|
t.Fatalf("unexpected mesh listen port config: %+v", cfg)
|
||||||
}
|
}
|
||||||
if cfg.MeshAdvertiseEndpoint != "https://node-a.example.test:443" ||
|
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:19443" ||
|
||||||
cfg.MeshAdvertiseEndpointsJSON == "" ||
|
cfg.MeshAdvertiseEndpointsJSON == "" ||
|
||||||
cfg.MeshAdvertiseTransport != "wss" ||
|
cfg.MeshAdvertiseTransport != "direct_quic" ||
|
||||||
cfg.MeshConnectivityMode != "outbound_only" ||
|
cfg.MeshConnectivityMode != "outbound_only" ||
|
||||||
cfg.MeshNATType != "symmetric" ||
|
cfg.MeshNATType != "symmetric" ||
|
||||||
|
cfg.MeshLocalSegmentID != "site-a" ||
|
||||||
|
cfg.MeshNATGroupID != "nat-a" ||
|
||||||
|
cfg.MeshSTUNReflexiveEndpoint != "quic://203.0.113.20:19443" ||
|
||||||
|
cfg.MeshSTUNServer != "stun.example.test:3478" ||
|
||||||
|
cfg.MeshRelayNodeID != "node-r" ||
|
||||||
|
cfg.MeshRelayEndpoint != "quic://node-r.example.test:19443" ||
|
||||||
cfg.MeshRegion != "eu" {
|
cfg.MeshRegion != "eu" {
|
||||||
t.Fatalf("unexpected mesh advertise config: %+v", cfg)
|
t.Fatalf("unexpected mesh advertise config: %+v", cfg)
|
||||||
}
|
}
|
||||||
@@ -139,6 +165,9 @@ func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
|
|||||||
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
|
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
|
||||||
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
|
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
|
||||||
}
|
}
|
||||||
|
if cfg.WebIngressRuntimeEnabled {
|
||||||
|
t.Fatalf("web ingress runtime should default disabled: %+v", cfg)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
|
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
|
||||||
@@ -162,3 +191,33 @@ func TestLoadConfigRejectsTooLargeProductionObservationSinkCapacity(t *testing.T
|
|||||||
t.Fatal("Load returned nil error for too-large sink capacity")
|
t.Fatal("Load returned nil error for too-large sink capacity")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadConfigNormalizesLegacyMeshAdvertiseTransport(t *testing.T) {
|
||||||
|
cfg, err := Load(nil, map[string]string{
|
||||||
|
"RAP_BACKEND_URL": "http://backend/api/v1",
|
||||||
|
"RAP_NODE_NAME": "node-a",
|
||||||
|
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443",
|
||||||
|
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Load returned error for legacy mesh advertise transport migration: %v", err)
|
||||||
|
}
|
||||||
|
if cfg.MeshAdvertiseTransport != "direct_quic" {
|
||||||
|
t.Fatalf("transport = %q, want direct_quic", cfg.MeshAdvertiseTransport)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadConfigNormalizesLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
|
||||||
|
cfg, err := Load(nil, map[string]string{
|
||||||
|
"RAP_BACKEND_URL": "http://backend/api/v1",
|
||||||
|
"RAP_NODE_NAME": "node-a",
|
||||||
|
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443",
|
||||||
|
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Load returned error for legacy mesh advertise endpoint migration: %v", err)
|
||||||
|
}
|
||||||
|
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:443" {
|
||||||
|
t.Fatalf("endpoint = %q, want quic scheme", cfg.MeshAdvertiseEndpoint)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
package fabricproto
|
package fabricproto
|
||||||
|
|
||||||
import "errors"
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"errors"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrUnsupportedSessionFrame = errors.New("unsupported fabric session frame")
|
ErrUnsupportedSessionFrame = errors.New("unsupported fabric session frame")
|
||||||
@@ -62,6 +65,7 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
|
|||||||
TrafficClass: frame.TrafficClass,
|
TrafficClass: frame.TrafficClass,
|
||||||
StreamID: frame.StreamID,
|
StreamID: frame.StreamID,
|
||||||
Sequence: frame.Sequence,
|
Sequence: frame.Sequence,
|
||||||
|
Payload: DataAckPayload(frame.Payload),
|
||||||
}}, nil
|
}}, nil
|
||||||
case FrameAck:
|
case FrameAck:
|
||||||
if err := s.Ack(frame.StreamID, frame.Sequence); err != nil {
|
if err := s.Ack(frame.StreamID, frame.Sequence); err != nil {
|
||||||
@@ -103,6 +107,11 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func DataAckPayload(payload []byte) []byte {
|
||||||
|
sum := sha256.Sum256(payload)
|
||||||
|
return sum[:]
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Session) handleDataFrame(frame Frame) (SessionEvent, error) {
|
func (s *Session) handleDataFrame(frame Frame) (SessionEvent, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package fabricproto
|
package fabricproto
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@@ -36,6 +37,9 @@ func TestHandleFrameOpensStreamAndReceivesData(t *testing.T) {
|
|||||||
if len(responses) != 1 || responses[0].Type != FrameAck || responses[0].StreamID != 7 || responses[0].Sequence != 11 {
|
if len(responses) != 1 || responses[0].Type != FrameAck || responses[0].StreamID != 7 || responses[0].Sequence != 11 {
|
||||||
t.Fatalf("responses = %+v, want ack for stream 7 seq 11", responses)
|
t.Fatalf("responses = %+v, want ack for stream 7 seq 11", responses)
|
||||||
}
|
}
|
||||||
|
if !bytes.Equal(responses[0].Payload, DataAckPayload([]byte("rdp-input"))) {
|
||||||
|
t.Fatalf("ack checksum = %x, want sha256 payload checksum", responses[0].Payload)
|
||||||
|
}
|
||||||
snapshot := session.Snapshot()
|
snapshot := session.Snapshot()
|
||||||
if snapshot.FramesReceived != 1 || snapshot.Streams[7].Received != 1 {
|
if snapshot.FramesReceived != 1 || snapshot.Streams[7].Received != 1 {
|
||||||
t.Fatalf("received metrics = %+v stream=%+v", snapshot, snapshot.Streams[7])
|
t.Fatalf("received metrics = %+v stream=%+v", snapshot, snapshot.Streams[7])
|
||||||
|
|||||||
@@ -136,6 +136,12 @@ func (cfg RuntimeConfig) ValidateInstall() error {
|
|||||||
if cfg.MeshListenAutoPortStart > 0 && cfg.MeshListenAutoPortEnd > 0 && cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
if cfg.MeshListenAutoPortStart > 0 && cfg.MeshListenAutoPortEnd > 0 && cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
||||||
return errors.New("mesh listen auto port start must be less than or equal to end")
|
return errors.New("mesh listen auto port start must be less than or equal to end")
|
||||||
}
|
}
|
||||||
|
if cfg.MeshAdvertiseTransport != "" && !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
|
||||||
|
return errors.New("mesh advertise transport must be a QUIC transport label")
|
||||||
|
}
|
||||||
|
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
|
||||||
|
return errors.New("mesh advertise endpoint must be a QUIC endpoint")
|
||||||
|
}
|
||||||
if cfg.ProductionObservationSinkCap < 0 {
|
if cfg.ProductionObservationSinkCap < 0 {
|
||||||
return errors.New("production observation sink capacity must not be negative")
|
return errors.New("production observation sink capacity must not be negative")
|
||||||
}
|
}
|
||||||
@@ -153,3 +159,20 @@ func firstNonEmpty(value, fallback string) string {
|
|||||||
}
|
}
|
||||||
return strings.TrimSpace(value)
|
return strings.TrimSpace(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isQUICAdvertiseTransport(label string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||||
|
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||||
|
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||||
|
return strings.HasPrefix(endpoint, "http://") ||
|
||||||
|
strings.HasPrefix(endpoint, "https://") ||
|
||||||
|
strings.HasPrefix(endpoint, "ws://") ||
|
||||||
|
strings.HasPrefix(endpoint, "wss://")
|
||||||
|
}
|
||||||
|
|||||||
@@ -73,7 +73,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
|||||||
VPNFabricQUICMaxStreamsPerConn: 24,
|
VPNFabricQUICMaxStreamsPerConn: 24,
|
||||||
VPNFabricQUICIdleTTLSeconds: 120,
|
VPNFabricQUICIdleTTLSeconds: 120,
|
||||||
MeshListenAddr: ":19131",
|
MeshListenAddr: ":19131",
|
||||||
MeshAdvertiseEndpoint: "http://10.0.0.11:19131/",
|
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443/",
|
||||||
|
MeshAdvertiseTransport: "direct_quic",
|
||||||
MeshConnectivityMode: "private_lan",
|
MeshConnectivityMode: "private_lan",
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -94,7 +95,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
|||||||
"RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN=24",
|
"RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN=24",
|
||||||
"RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS=120",
|
"RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS=120",
|
||||||
"RAP_MESH_LISTEN_ADDR=:19131",
|
"RAP_MESH_LISTEN_ADDR=:19131",
|
||||||
"RAP_MESH_ADVERTISE_ENDPOINT=http://10.0.0.11:19131",
|
"RAP_MESH_ADVERTISE_ENDPOINT=quic://10.0.0.11:19443",
|
||||||
|
"RAP_MESH_ADVERTISE_TRANSPORT=direct_quic",
|
||||||
"RAP_MESH_CONNECTIVITY_MODE=private_lan",
|
"RAP_MESH_CONNECTIVITY_MODE=private_lan",
|
||||||
"rap-node-agent:test",
|
"rap-node-agent:test",
|
||||||
} {
|
} {
|
||||||
@@ -384,3 +386,35 @@ func TestValidateRequiresJoinTokenUnlessReplacingExistingState(t *testing.T) {
|
|||||||
t.Fatalf("replace update should allow missing join token: %v", err)
|
t.Fatalf("replace update should allow missing join token: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestValidateRejectsLegacyMeshAdvertiseTransport(t *testing.T) {
|
||||||
|
err := RuntimeConfig{
|
||||||
|
BackendURL: "http://control/api/v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
JoinToken: "join-secret",
|
||||||
|
NodeName: "node-a",
|
||||||
|
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443",
|
||||||
|
MeshAdvertiseTransport: "wss",
|
||||||
|
MeshQUICFabricEnabled: true,
|
||||||
|
MeshQUICFabricListenAddr: ":19443",
|
||||||
|
}.ValidateInstall()
|
||||||
|
if err == nil || !strings.Contains(err.Error(), "QUIC transport") {
|
||||||
|
t.Fatalf("expected QUIC transport validation error, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateRejectsLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
|
||||||
|
err := RuntimeConfig{
|
||||||
|
BackendURL: "http://control/api/v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
JoinToken: "join-secret",
|
||||||
|
NodeName: "node-a",
|
||||||
|
MeshAdvertiseEndpoint: "http://10.0.0.11:19131",
|
||||||
|
MeshAdvertiseTransport: "direct_quic",
|
||||||
|
MeshQUICFabricEnabled: true,
|
||||||
|
MeshQUICFabricListenAddr: ":19443",
|
||||||
|
}.ValidateInstall()
|
||||||
|
if err == nil || !strings.Contains(err.Error(), "QUIC endpoint") {
|
||||||
|
t.Fatalf("expected QUIC endpoint validation error, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -119,6 +120,21 @@ type NodeUpdatePlan struct {
|
|||||||
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
||||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||||
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
|
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
|
||||||
|
AuthorityQuorum *clusterauth.QuorumEnvelope `json:"authority_quorum,omitempty"`
|
||||||
|
ProductionForwarding bool `json:"production_forwarding"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type nodeUpdatePlanAuthorityPayload struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
ClusterID string `json:"cluster_id"`
|
||||||
|
NodeID string `json:"node_id"`
|
||||||
|
Product string `json:"product"`
|
||||||
|
CurrentVersion string `json:"current_version,omitempty"`
|
||||||
|
Action string `json:"action"`
|
||||||
|
TargetVersion string `json:"target_version,omitempty"`
|
||||||
|
ArtifactSHA256 string `json:"artifact_sha256,omitempty"`
|
||||||
|
ArtifactURL string `json:"artifact_url,omitempty"`
|
||||||
|
ControlPlaneOnly bool `json:"control_plane_only"`
|
||||||
ProductionForwarding bool `json:"production_forwarding"`
|
ProductionForwarding bool `json:"production_forwarding"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -516,9 +532,87 @@ func FetchNodeUpdatePlan(ctx context.Context, req UpdateRequest) (NodeUpdatePlan
|
|||||||
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
||||||
return NodeUpdatePlan{}, err
|
return NodeUpdatePlan{}, err
|
||||||
}
|
}
|
||||||
|
if err := verifyNodeUpdatePlanAuthority(req, out.Plan); err != nil {
|
||||||
|
return NodeUpdatePlan{}, err
|
||||||
|
}
|
||||||
return out.Plan, nil
|
return out.Plan, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func verifyNodeUpdatePlanAuthority(req UpdateRequest, plan NodeUpdatePlan) error {
|
||||||
|
identity, ok := pinnedUpdatePlanAuthority(req)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if len(identity.ClusterAuthorityQuorum) > 0 {
|
||||||
|
if plan.AuthorityQuorum == nil {
|
||||||
|
return errors.New("update plan quorum authority is required by pinned cluster quorum")
|
||||||
|
}
|
||||||
|
var descriptor clusterauth.QuorumDescriptor
|
||||||
|
if err := json.Unmarshal(identity.ClusterAuthorityQuorum, &descriptor); err != nil {
|
||||||
|
return fmt.Errorf("invalid pinned cluster authority quorum: %w", err)
|
||||||
|
}
|
||||||
|
if len(plan.AuthorityPayload) == 0 {
|
||||||
|
return errors.New("update plan authority payload is required by pinned cluster quorum")
|
||||||
|
}
|
||||||
|
if err := clusterauth.VerifyQuorumRaw(descriptor, plan.AuthorityPayload, *plan.AuthorityQuorum, "update-authority"); err != nil {
|
||||||
|
return fmt.Errorf("update plan quorum authority rejected: %w", err)
|
||||||
|
}
|
||||||
|
return verifyNodeUpdatePlanAuthorityPayload(plan)
|
||||||
|
}
|
||||||
|
if len(plan.AuthorityPayload) == 0 || len(plan.AuthoritySignature) == 0 {
|
||||||
|
return errors.New("update plan authority signature is required by pinned cluster authority")
|
||||||
|
}
|
||||||
|
var signature clusterauth.Signature
|
||||||
|
if err := json.Unmarshal(plan.AuthoritySignature, &signature); err != nil {
|
||||||
|
return fmt.Errorf("invalid update plan authority signature: %w", err)
|
||||||
|
}
|
||||||
|
if identity.ClusterAuthorityFingerprint != "" && signature.KeyFingerprint != identity.ClusterAuthorityFingerprint {
|
||||||
|
return errors.New("update plan authority fingerprint mismatch")
|
||||||
|
}
|
||||||
|
if err := clusterauth.VerifyRaw(identity.ClusterAuthorityPublicKey, plan.AuthorityPayload, signature); err != nil {
|
||||||
|
return fmt.Errorf("update plan authority signature rejected: %w", err)
|
||||||
|
}
|
||||||
|
return verifyNodeUpdatePlanAuthorityPayload(plan)
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyNodeUpdatePlanAuthorityPayload(plan NodeUpdatePlan) error {
|
||||||
|
var payload nodeUpdatePlanAuthorityPayload
|
||||||
|
if err := json.Unmarshal(plan.AuthorityPayload, &payload); err != nil {
|
||||||
|
return fmt.Errorf("invalid update plan authority payload: %w", err)
|
||||||
|
}
|
||||||
|
if payload.SchemaVersion != "rap.node_update_plan_authority.v1" ||
|
||||||
|
payload.ClusterID != plan.ClusterID ||
|
||||||
|
payload.NodeID != plan.NodeID ||
|
||||||
|
payload.Product != plan.Product ||
|
||||||
|
payload.CurrentVersion != plan.CurrentVersion ||
|
||||||
|
payload.Action != plan.Action ||
|
||||||
|
payload.TargetVersion != plan.TargetVersion ||
|
||||||
|
payload.ProductionForwarding != plan.ProductionForwarding {
|
||||||
|
return errors.New("update plan authority payload mismatch")
|
||||||
|
}
|
||||||
|
if plan.Artifact != nil {
|
||||||
|
if payload.ArtifactSHA256 != plan.Artifact.SHA256 || payload.ArtifactURL != plan.Artifact.URL {
|
||||||
|
return errors.New("update plan artifact authority payload mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func pinnedUpdatePlanAuthority(req UpdateRequest) (state.Identity, bool) {
|
||||||
|
stateDir := strings.TrimSpace(req.StateDir)
|
||||||
|
if stateDir == "" {
|
||||||
|
return state.Identity{}, false
|
||||||
|
}
|
||||||
|
identity, err := state.Load(filepath.Join(stateDir, state.FileName))
|
||||||
|
if err != nil {
|
||||||
|
return state.Identity{}, false
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(identity.ClusterAuthorityPublicKey) == "" {
|
||||||
|
return state.Identity{}, false
|
||||||
|
}
|
||||||
|
return identity, true
|
||||||
|
}
|
||||||
|
|
||||||
func resolveUpdateRequest(req UpdateRequest) (UpdateRequest, error) {
|
func resolveUpdateRequest(req UpdateRequest) (UpdateRequest, error) {
|
||||||
req = req.Normalize()
|
req = req.Normalize()
|
||||||
if err := req.Validate(); err != nil {
|
if err := req.Validate(); err != nil {
|
||||||
|
|||||||
@@ -2,6 +2,9 @@ package hostagent
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/ed25519"
|
||||||
|
cryptorand "crypto/rand"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -12,6 +15,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -21,6 +25,101 @@ type updateRunner struct {
|
|||||||
inspectJSON string
|
inspectJSON string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func writePinnedAuthorityIdentity(t *testing.T) (string, ed25519.PublicKey, ed25519.PrivateKey) {
|
||||||
|
t.Helper()
|
||||||
|
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("generate authority key: %v", err)
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
identity := state.Identity{
|
||||||
|
NodeID: "node-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
NodeName: "node-a",
|
||||||
|
IdentityStatus: "active",
|
||||||
|
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||||
|
ClusterAuthorityFingerprint: clusterauth.Fingerprint(publicKey),
|
||||||
|
}
|
||||||
|
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
|
||||||
|
t.Fatalf("save identity: %v", err)
|
||||||
|
}
|
||||||
|
return dir, publicKey, privateKey
|
||||||
|
}
|
||||||
|
|
||||||
|
func writePinnedQuorumIdentity(t *testing.T) (string, clusterauth.QuorumDescriptor, []ed25519.PrivateKey) {
|
||||||
|
t.Helper()
|
||||||
|
descriptor := clusterauth.QuorumDescriptor{
|
||||||
|
SchemaVersion: clusterauth.QuorumSchemaVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: 2,
|
||||||
|
}
|
||||||
|
privateKeys := make([]ed25519.PrivateKey, 0, 3)
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("generate authority key: %v", err)
|
||||||
|
}
|
||||||
|
descriptor.Members = append(descriptor.Members, clusterauth.QuorumMember{
|
||||||
|
NodeID: fmt.Sprintf("authority-%d", i+1),
|
||||||
|
Role: "update-authority",
|
||||||
|
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||||
|
PublicKeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||||
|
Scopes: []string{"update-authority"},
|
||||||
|
})
|
||||||
|
privateKeys = append(privateKeys, privateKey)
|
||||||
|
}
|
||||||
|
rawQuorum, err := json.Marshal(descriptor)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal quorum: %v", err)
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
identity := state.Identity{
|
||||||
|
NodeID: "node-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
NodeName: "node-a",
|
||||||
|
IdentityStatus: "active",
|
||||||
|
ClusterAuthorityQuorum: rawQuorum,
|
||||||
|
}
|
||||||
|
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
|
||||||
|
t.Fatalf("save identity: %v", err)
|
||||||
|
}
|
||||||
|
return dir, descriptor, privateKeys
|
||||||
|
}
|
||||||
|
|
||||||
|
func signedAuthorityPayload(t *testing.T, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey, payload any) (json.RawMessage, clusterauth.Signature) {
|
||||||
|
t.Helper()
|
||||||
|
raw, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal payload: %v", err)
|
||||||
|
}
|
||||||
|
canonical, err := clusterauth.CanonicalJSON(raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("canonical payload: %v", err)
|
||||||
|
}
|
||||||
|
return raw, clusterauth.Signature{
|
||||||
|
SchemaVersion: clusterauth.SignatureSchemaVersion,
|
||||||
|
Algorithm: clusterauth.AlgorithmEd25519,
|
||||||
|
KeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||||
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func signHostAgentPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) clusterauth.Signature {
|
||||||
|
t.Helper()
|
||||||
|
canonical, err := clusterauth.CanonicalJSON(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("canonical payload: %v", err)
|
||||||
|
}
|
||||||
|
publicKey := privateKey.Public().(ed25519.PublicKey)
|
||||||
|
return clusterauth.Signature{
|
||||||
|
SchemaVersion: clusterauth.SignatureSchemaVersion,
|
||||||
|
Algorithm: clusterauth.AlgorithmEd25519,
|
||||||
|
KeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||||
|
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.T) {
|
func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.T) {
|
||||||
urls := artifactURLsForBackend(ReleaseArtifact{
|
urls := artifactURLsForBackend(ReleaseArtifact{
|
||||||
URL: "/downloads/rap-node-agent-0.2.92.tar",
|
URL: "/downloads/rap-node-agent-0.2.92.tar",
|
||||||
@@ -41,6 +140,161 @@ func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFetchNodeUpdatePlanRejectsUnsignedPlanWithPinnedAuthority(t *testing.T) {
|
||||||
|
stateDir, _, _ := writePinnedAuthorityIdentity(t)
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"node_update_plan": map[string]any{
|
||||||
|
"schema_version": "rap.node_update_plan.v1",
|
||||||
|
"cluster_id": "cluster-1",
|
||||||
|
"node_id": "node-1",
|
||||||
|
"product": "rap-node-agent",
|
||||||
|
"current_version": "0.1.0",
|
||||||
|
"action": "none",
|
||||||
|
"reason": "already_current",
|
||||||
|
"production_forwarding": false,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
_, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||||
|
BackendURL: server.URL,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
NodeID: "node-1",
|
||||||
|
StateDir: stateDir,
|
||||||
|
CurrentVersion: "0.1.0",
|
||||||
|
OS: "linux",
|
||||||
|
Arch: "amd64",
|
||||||
|
InstallType: "docker",
|
||||||
|
})
|
||||||
|
if err == nil || !strings.Contains(err.Error(), "authority signature is required") {
|
||||||
|
t.Fatalf("expected pinned authority rejection, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchNodeUpdatePlanAcceptsSignedPlanWithPinnedAuthority(t *testing.T) {
|
||||||
|
stateDir, publicKey, privateKey := writePinnedAuthorityIdentity(t)
|
||||||
|
plan := map[string]any{
|
||||||
|
"schema_version": "rap.node_update_plan.v1",
|
||||||
|
"cluster_id": "cluster-1",
|
||||||
|
"node_id": "node-1",
|
||||||
|
"product": "rap-node-agent",
|
||||||
|
"current_version": "0.1.0",
|
||||||
|
"action": "none",
|
||||||
|
"reason": "already_current",
|
||||||
|
"production_forwarding": false,
|
||||||
|
}
|
||||||
|
payload := map[string]any{
|
||||||
|
"schema_version": "rap.node_update_plan_authority.v1",
|
||||||
|
"cluster_id": "cluster-1",
|
||||||
|
"node_id": "node-1",
|
||||||
|
"product": "rap-node-agent",
|
||||||
|
"current_version": "0.1.0",
|
||||||
|
"action": "none",
|
||||||
|
"target_version": "",
|
||||||
|
"artifact_sha256": "",
|
||||||
|
"control_plane_only": true,
|
||||||
|
"production_forwarding": false,
|
||||||
|
}
|
||||||
|
rawPayload, signature := signedAuthorityPayload(t, publicKey, privateKey, payload)
|
||||||
|
plan["authority_payload"] = json.RawMessage(rawPayload)
|
||||||
|
plan["authority_signature"] = signature
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||||
|
BackendURL: server.URL,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
NodeID: "node-1",
|
||||||
|
StateDir: stateDir,
|
||||||
|
CurrentVersion: "0.1.0",
|
||||||
|
OS: "linux",
|
||||||
|
Arch: "amd64",
|
||||||
|
InstallType: "docker",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("fetch signed plan: %v", err)
|
||||||
|
}
|
||||||
|
if got.Action != "none" || got.Reason != "already_current" {
|
||||||
|
t.Fatalf("unexpected plan: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchNodeUpdatePlanAcceptsQuorumSignedPlan(t *testing.T) {
|
||||||
|
stateDir, descriptor, privateKeys := writePinnedQuorumIdentity(t)
|
||||||
|
plan := map[string]any{
|
||||||
|
"schema_version": "rap.node_update_plan.v1",
|
||||||
|
"cluster_id": "cluster-1",
|
||||||
|
"node_id": "node-1",
|
||||||
|
"product": "rap-node-agent",
|
||||||
|
"current_version": "0.1.0",
|
||||||
|
"action": "none",
|
||||||
|
"reason": "already_current",
|
||||||
|
"production_forwarding": false,
|
||||||
|
}
|
||||||
|
payload := map[string]any{
|
||||||
|
"schema_version": "rap.node_update_plan_authority.v1",
|
||||||
|
"cluster_id": "cluster-1",
|
||||||
|
"node_id": "node-1",
|
||||||
|
"product": "rap-node-agent",
|
||||||
|
"current_version": "0.1.0",
|
||||||
|
"action": "none",
|
||||||
|
"target_version": "",
|
||||||
|
"artifact_sha256": "",
|
||||||
|
"control_plane_only": true,
|
||||||
|
"production_forwarding": false,
|
||||||
|
}
|
||||||
|
rawPayload, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal payload: %v", err)
|
||||||
|
}
|
||||||
|
payloadHash, err := clusterauth.HashRaw(rawPayload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("payload hash: %v", err)
|
||||||
|
}
|
||||||
|
quorumHash, err := clusterauth.QuorumDescriptorHash(descriptor)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("quorum hash: %v", err)
|
||||||
|
}
|
||||||
|
plan["authority_payload"] = json.RawMessage(rawPayload)
|
||||||
|
plan["authority_quorum"] = clusterauth.QuorumEnvelope{
|
||||||
|
SchemaVersion: clusterauth.QuorumEnvelopeVersion,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Epoch: "epoch-1",
|
||||||
|
Threshold: 2,
|
||||||
|
PayloadSHA256: payloadHash,
|
||||||
|
QuorumSHA256: quorumHash,
|
||||||
|
Signatures: []clusterauth.Signature{
|
||||||
|
signHostAgentPayload(t, rawPayload, privateKeys[0]),
|
||||||
|
signHostAgentPayload(t, rawPayload, privateKeys[1]),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||||
|
BackendURL: server.URL,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
NodeID: "node-1",
|
||||||
|
StateDir: stateDir,
|
||||||
|
CurrentVersion: "0.1.0",
|
||||||
|
OS: "linux",
|
||||||
|
Arch: "amd64",
|
||||||
|
InstallType: "docker",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("fetch quorum plan: %v", err)
|
||||||
|
}
|
||||||
|
if got.Action != "none" {
|
||||||
|
t.Fatalf("unexpected plan: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (r *updateRunner) Run(_ context.Context, name string, args ...string) (string, error) {
|
func (r *updateRunner) Run(_ context.Context, name string, args ...string) (string, error) {
|
||||||
r.calls = append(r.calls, append([]string{name}, args...))
|
r.calls = append(r.calls, append([]string{name}, args...))
|
||||||
if len(args) >= 2 && args[0] == "inspect" && args[1] == "--format" {
|
if len(args) >= 2 && args[0] == "inspect" && args[1] == "--format" {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
}.Handler())
|
}.Handler())
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
@@ -39,6 +40,7 @@ func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
}.Handler())
|
}.Handler())
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
@@ -82,6 +84,7 @@ func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
}.Handler())
|
}.Handler())
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
@@ -137,6 +140,7 @@ func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
}.Handler())
|
}.Handler())
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
@@ -204,6 +208,7 @@ func TestClientFabricSessionReportsRejectedStatus(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
}.Handler())
|
}.Handler())
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|||||||
@@ -72,6 +72,10 @@ const (
|
|||||||
MaxProductionEnvelopePayloadBytes = 4096
|
MaxProductionEnvelopePayloadBytes = 4096
|
||||||
MaxProductionVPNPacketPayloadBytes = 256 * 1024
|
MaxProductionVPNPacketPayloadBytes = 256 * 1024
|
||||||
MaxProductionEnvelopeFutureSkew = time.Minute
|
MaxProductionEnvelopeFutureSkew = time.Minute
|
||||||
|
ProductionForwardQUICStreamID = 1
|
||||||
|
WebIngressForwardQUICStreamID = 2
|
||||||
|
FabricControlForwardQUICStreamID = 3
|
||||||
|
SyntheticForwardQUICStreamID = 1001
|
||||||
)
|
)
|
||||||
|
|
||||||
type PeerIdentity struct {
|
type PeerIdentity struct {
|
||||||
|
|||||||
@@ -47,6 +47,9 @@ func RankPeerEndpointCandidates(candidates []PeerEndpointCandidate, opts Endpoin
|
|||||||
}
|
}
|
||||||
out := make([]ScoredPeerEndpointCandidate, 0, len(candidates))
|
out := make([]ScoredPeerEndpointCandidate, 0, len(candidates))
|
||||||
for _, candidate := range candidates {
|
for _, candidate := range candidates {
|
||||||
|
if endpointHasUnspecifiedHost(candidate.Address) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
out = append(out, scorePeerEndpointCandidate(candidate, opts))
|
out = append(out, scorePeerEndpointCandidate(candidate, opts))
|
||||||
}
|
}
|
||||||
sort.SliceStable(out, func(i, j int) bool {
|
sort.SliceStable(out, func(i, j int) bool {
|
||||||
@@ -68,25 +71,25 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
|||||||
score := 100
|
score := 100
|
||||||
reasons := []string{"base"}
|
reasons := []string{"base"}
|
||||||
|
|
||||||
switch candidate.Transport {
|
switch strings.ToLower(strings.TrimSpace(candidate.Transport)) {
|
||||||
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
||||||
score += 45
|
score += 45
|
||||||
reasons = append(reasons, "transport:quic")
|
reasons = append(reasons, "transport:quic")
|
||||||
case "direct_tcp_tls", "direct_http", "direct_https":
|
case "lan_quic":
|
||||||
score += 35
|
score += 42
|
||||||
reasons = append(reasons, "transport:direct")
|
reasons = append(reasons, "transport:lan_quic")
|
||||||
case "wss":
|
case "ice_quic":
|
||||||
score += 25
|
score += 38
|
||||||
reasons = append(reasons, "transport:wss")
|
reasons = append(reasons, "transport:ice_quic")
|
||||||
case "outbound_reverse":
|
case "reverse_quic":
|
||||||
score += 10
|
score += 15
|
||||||
reasons = append(reasons, "transport:outbound_reverse")
|
reasons = append(reasons, "transport:reverse_quic")
|
||||||
case "relay":
|
case "relay_quic":
|
||||||
score += 5
|
score += 5
|
||||||
reasons = append(reasons, "transport:relay")
|
reasons = append(reasons, "transport:relay_quic")
|
||||||
default:
|
default:
|
||||||
score -= 100
|
score -= 100
|
||||||
reasons = append(reasons, "transport:unknown")
|
reasons = append(reasons, "transport:non_quic_rejected")
|
||||||
}
|
}
|
||||||
|
|
||||||
switch candidate.Reachability {
|
switch candidate.Reachability {
|
||||||
@@ -173,7 +176,8 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
|||||||
score += 8
|
score += 8
|
||||||
reasons = append(reasons, "channel:control-direct")
|
reasons = append(reasons, "channel:control-direct")
|
||||||
}
|
}
|
||||||
if candidate.Transport == "relay" {
|
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
||||||
|
if transport == "relay" || transport == "relay_quic" {
|
||||||
score -= 8
|
score -= 8
|
||||||
reasons = append(reasons, "channel:control-relay-penalty")
|
reasons = append(reasons, "channel:control-relay-penalty")
|
||||||
}
|
}
|
||||||
@@ -234,14 +238,20 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
|
|||||||
}
|
}
|
||||||
switch {
|
switch {
|
||||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
|
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
|
||||||
score += 18
|
score += 24
|
||||||
reasons = append(reasons, "latency:low")
|
reasons = append(reasons, "latency:low")
|
||||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 150:
|
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 150:
|
||||||
score += 8
|
score += 8
|
||||||
reasons = append(reasons, "latency:moderate")
|
reasons = append(reasons, "latency:moderate")
|
||||||
case observation.LastLatencyMs > 0:
|
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 300:
|
||||||
score -= 10
|
score -= 12
|
||||||
reasons = append(reasons, "latency:high")
|
reasons = append(reasons, "latency:high")
|
||||||
|
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 750:
|
||||||
|
score -= 32
|
||||||
|
reasons = append(reasons, "latency:very_high")
|
||||||
|
case observation.LastLatencyMs > 0:
|
||||||
|
score -= 60
|
||||||
|
reasons = append(reasons, "latency:extreme")
|
||||||
}
|
}
|
||||||
if observation.ReliabilityScore > 0 {
|
if observation.ReliabilityScore > 0 {
|
||||||
switch {
|
switch {
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-relay",
|
EndpointID: "node-b-relay",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "relay",
|
Transport: "relay_quic",
|
||||||
Address: "relay.example.test/node-b",
|
Address: "relay.example.test/node-b",
|
||||||
Reachability: "relay",
|
Reachability: "relay",
|
||||||
NATType: "symmetric",
|
NATType: "symmetric",
|
||||||
@@ -25,8 +25,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -38,8 +38,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-private-stale",
|
EndpointID: "node-b-private-stale",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "wss",
|
Transport: "lan_quic",
|
||||||
Address: "10.0.0.5:443",
|
Address: "quic://10.0.0.5:19443",
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
NATType: "restricted",
|
NATType: "restricted",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -74,8 +74,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "endpoint-b",
|
EndpointID: "endpoint-b",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.21:443",
|
Address: "quic://203.0.113.21:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -84,8 +84,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "endpoint-a",
|
EndpointID: "endpoint-a",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -103,10 +103,10 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
|
|||||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||||
candidates := []PeerEndpointCandidate{
|
candidates := []PeerEndpointCandidate{
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-wss",
|
EndpointID: "node-b-relay",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "wss",
|
Transport: "relay_quic",
|
||||||
Address: "wss://node-b.example.test",
|
Address: "quic://relay.example.test:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -138,14 +138,44 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRankPeerEndpointCandidatesDropsUnspecifiedQUICEndpoint(t *testing.T) {
|
||||||
|
candidates := []PeerEndpointCandidate{
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-unspecified",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://[::]:19131",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-public",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://203.0.113.20:19131",
|
||||||
|
Reachability: "public",
|
||||||
|
NATType: "none",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 10,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{})
|
||||||
|
if len(ranked) != 1 || ranked[0].Candidate.EndpointID != "node-b-public" {
|
||||||
|
t.Fatalf("unspecified endpoint was not dropped: %+v", ranked)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T) {
|
func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T) {
|
||||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
candidates := []PeerEndpointCandidate{
|
candidates := []PeerEndpointCandidate{
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -155,8 +185,8 @@ func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T)
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-corp-lan",
|
EndpointID: "node-b-corp-lan",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "lan_quic",
|
||||||
Address: "10.24.10.20:19001",
|
Address: "quic://10.24.10.20:19443",
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -184,7 +214,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-outbound",
|
EndpointID: "node-b-outbound",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "outbound_reverse",
|
Transport: "reverse_quic",
|
||||||
Address: "node-b.reverse.local",
|
Address: "node-b.reverse.local",
|
||||||
Reachability: "outbound_only",
|
Reachability: "outbound_only",
|
||||||
NATType: "symmetric",
|
NATType: "symmetric",
|
||||||
@@ -194,7 +224,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-relay",
|
EndpointID: "node-b-relay",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "relay",
|
Transport: "relay_quic",
|
||||||
Address: "relay.example.test/node-b",
|
Address: "relay.example.test/node-b",
|
||||||
Reachability: "relay",
|
Reachability: "relay",
|
||||||
NATType: "blocked",
|
NATType: "blocked",
|
||||||
@@ -222,18 +252,18 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-direct",
|
EndpointID: "node-b-direct",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-wss",
|
EndpointID: "node-b-ice",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "wss",
|
Transport: "ice_quic",
|
||||||
Address: "node-b.example.test",
|
Address: "quic://node-b.example.test:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "restricted",
|
NATType: "restricted",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -253,8 +283,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
|||||||
ReliabilityScore: 50,
|
ReliabilityScore: 50,
|
||||||
ObservedAt: now.Add(-time.Minute),
|
ObservedAt: now.Add(-time.Minute),
|
||||||
},
|
},
|
||||||
"node-b-wss": {
|
"node-b-ice": {
|
||||||
EndpointID: "node-b-wss",
|
EndpointID: "node-b-ice",
|
||||||
LastLatencyMs: 35,
|
LastLatencyMs: 35,
|
||||||
SuccessCount: 8,
|
SuccessCount: 8,
|
||||||
ReliabilityScore: 95,
|
ReliabilityScore: 95,
|
||||||
@@ -262,8 +292,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
if ranked[0].Candidate.EndpointID != "node-b-wss" {
|
if ranked[0].Candidate.EndpointID != "node-b-ice" {
|
||||||
t.Fatalf("top endpoint = %q, want node-b-wss: %+v", ranked[0].Candidate.EndpointID, ranked)
|
t.Fatalf("top endpoint = %q, want node-b-ice: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||||
}
|
}
|
||||||
if !containsReason(ranked[0].Reasons, "latency:low") || !containsReason(ranked[0].Reasons, "reliability:high") {
|
if !containsReason(ranked[0].Reasons, "latency:low") || !containsReason(ranked[0].Reasons, "reliability:high") {
|
||||||
t.Fatalf("top reasons missing health hints: %+v", ranked[0].Reasons)
|
t.Fatalf("top reasons missing health hints: %+v", ranked[0].Reasons)
|
||||||
@@ -279,8 +309,8 @@ func TestRankPeerEndpointCandidatesTreatsStaleObservationAsPenalty(t *testing.T)
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-direct",
|
EndpointID: "node-b-direct",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -321,10 +351,10 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
|
|||||||
LastVerifiedAt: &now,
|
LastVerifiedAt: &now,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-wss",
|
EndpointID: "node-b-ice",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "wss",
|
Transport: "ice_quic",
|
||||||
Address: "https://node-b.example.test:443",
|
Address: "quic://node-b.example.test:19444",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
@@ -345,14 +375,81 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
|
|||||||
},
|
},
|
||||||
MaxObservationAge: time.Minute,
|
MaxObservationAge: time.Minute,
|
||||||
})
|
})
|
||||||
if ranked[0].Candidate.EndpointID != "node-b-wss" {
|
if ranked[0].Candidate.EndpointID != "node-b-ice" {
|
||||||
t.Fatalf("top endpoint = %q, want wss after repeated quic failures: %+v", ranked[0].Candidate.EndpointID, ranked)
|
t.Fatalf("top endpoint = %q, want ice_quic after repeated direct QUIC failures: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||||
}
|
}
|
||||||
if containsReason(ranked[1].Reasons, "latency:moderate") {
|
if containsReason(ranked[1].Reasons, "latency:moderate") {
|
||||||
t.Fatalf("zero latency failure was rewarded as moderate latency: %+v", ranked[1].Reasons)
|
t.Fatalf("zero latency failure was rewarded as moderate latency: %+v", ranked[1].Reasons)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRankPeerEndpointCandidatesPenalizesSevereLatencyGradient(t *testing.T) {
|
||||||
|
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
|
||||||
|
candidates := []PeerEndpointCandidate{
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-lan",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://10.0.0.2:19443",
|
||||||
|
Reachability: "private",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-wan",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://203.0.113.20:19443",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-bad-relay",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "relay_quic",
|
||||||
|
Address: "quic://relay.example.test:19443",
|
||||||
|
Reachability: "relay",
|
||||||
|
ConnectivityMode: "relay_required",
|
||||||
|
LastVerifiedAt: &now,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||||
|
Now: now,
|
||||||
|
MaxVerificationAge: time.Minute,
|
||||||
|
MaxObservationAge: time.Minute,
|
||||||
|
Observations: map[string]EndpointCandidateHealthObservation{
|
||||||
|
"node-b-lan": {
|
||||||
|
EndpointID: "node-b-lan",
|
||||||
|
LastLatencyMs: 4,
|
||||||
|
ReliabilityScore: 95,
|
||||||
|
ObservedAt: now,
|
||||||
|
},
|
||||||
|
"node-b-wan": {
|
||||||
|
EndpointID: "node-b-wan",
|
||||||
|
LastLatencyMs: 420,
|
||||||
|
ReliabilityScore: 95,
|
||||||
|
ObservedAt: now,
|
||||||
|
},
|
||||||
|
"node-b-bad-relay": {
|
||||||
|
EndpointID: "node-b-bad-relay",
|
||||||
|
LastLatencyMs: 900,
|
||||||
|
ReliabilityScore: 95,
|
||||||
|
ObservedAt: now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if ranked[0].Candidate.EndpointID != "node-b-lan" || ranked[1].Candidate.EndpointID != "node-b-wan" || ranked[2].Candidate.EndpointID != "node-b-bad-relay" {
|
||||||
|
t.Fatalf("ranked endpoints = %+v, want lan, wan, bad relay", ranked)
|
||||||
|
}
|
||||||
|
if !containsReason(ranked[1].Reasons, "latency:very_high") {
|
||||||
|
t.Fatalf("wan reasons = %+v, want latency:very_high", ranked[1].Reasons)
|
||||||
|
}
|
||||||
|
if !containsReason(ranked[2].Reasons, "latency:extreme") {
|
||||||
|
t.Fatalf("relay reasons = %+v, want latency:extreme", ranked[2].Reasons)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRankPeerEndpointCandidatesTreatsCapacityAsSoftPressure(t *testing.T) {
|
func TestRankPeerEndpointCandidatesTreatsCapacityAsSoftPressure(t *testing.T) {
|
||||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
|
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
|
||||||
|
|||||||
@@ -0,0 +1,217 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricChannelRouteEventType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FabricChannelRouteEventNone FabricChannelRouteEventType = ""
|
||||||
|
FabricChannelRouteEventOpened FabricChannelRouteEventType = "opened"
|
||||||
|
FabricChannelRouteEventReroute FabricChannelRouteEventType = "reroute"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrFabricRouteRerouteSuppressed = errors.New("fabric route reroute suppressed")
|
||||||
|
|
||||||
|
type FabricChannelRouterConfig struct {
|
||||||
|
SchedulerConfig FabricRouteSchedulerConfig
|
||||||
|
MaxAckLatencyMs int64
|
||||||
|
MaxRoutePressure int
|
||||||
|
MinRerouteInterval time.Duration
|
||||||
|
ProjectedChannelCost int
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelRouter struct {
|
||||||
|
Config FabricChannelRouterConfig
|
||||||
|
Scheduler FabricRouteScheduler
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelObservation struct {
|
||||||
|
ChannelID string
|
||||||
|
RouteID string
|
||||||
|
AckLatencyMs int64
|
||||||
|
Failed bool
|
||||||
|
BytesSent uint64
|
||||||
|
BytesRecv uint64
|
||||||
|
FramesSent uint64
|
||||||
|
FramesRecv uint64
|
||||||
|
Reason string
|
||||||
|
ObservedAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelRouteEvent struct {
|
||||||
|
Type FabricChannelRouteEventType
|
||||||
|
Reason string
|
||||||
|
PreviousRoute FabricRoute
|
||||||
|
NextRoute FabricRoute
|
||||||
|
Choice FabricRouteChoice
|
||||||
|
Observation FabricChannelObservation
|
||||||
|
Channel FabricChannel
|
||||||
|
OccurredAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricChannelRouter(cfg FabricChannelRouterConfig) FabricChannelRouter {
|
||||||
|
cfg = normalizeFabricChannelRouterConfig(cfg)
|
||||||
|
return FabricChannelRouter{
|
||||||
|
Config: cfg,
|
||||||
|
Scheduler: NewFabricRouteScheduler(cfg.SchedulerConfig),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r FabricChannelRouter) OpenChannel(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
choice, err := r.Scheduler.ChooseRoute(spec, routeSet, now)
|
||||||
|
if err != nil {
|
||||||
|
return FabricChannel{}, FabricChannelRouteEvent{}, err
|
||||||
|
}
|
||||||
|
channel := FabricChannel{
|
||||||
|
Spec: spec,
|
||||||
|
State: FabricChannelOpen,
|
||||||
|
RouteID: choice.Route.RouteID,
|
||||||
|
TargetNode: choice.Route.DestinationNodeID,
|
||||||
|
OpenedAt: now,
|
||||||
|
}
|
||||||
|
event := FabricChannelRouteEvent{
|
||||||
|
Type: FabricChannelRouteEventOpened,
|
||||||
|
Reason: choice.Reason,
|
||||||
|
NextRoute: choice.Route,
|
||||||
|
Choice: choice,
|
||||||
|
Channel: channel,
|
||||||
|
OccurredAt: now,
|
||||||
|
}
|
||||||
|
return channel, event, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r FabricChannelRouter) ObserveChannel(channel FabricChannel, routeSet FabricRouteSet, observation FabricChannelObservation, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
if observation.ObservedAt.IsZero() {
|
||||||
|
observation.ObservedAt = now
|
||||||
|
}
|
||||||
|
channel.BytesSent += observation.BytesSent
|
||||||
|
channel.BytesRecv += observation.BytesRecv
|
||||||
|
channel.FramesSent += observation.FramesSent
|
||||||
|
channel.FramesRecv += observation.FramesRecv
|
||||||
|
if channel.State == "" {
|
||||||
|
channel.State = FabricChannelOpen
|
||||||
|
}
|
||||||
|
if !r.shouldReroute(channel, observation, routeSet, now) {
|
||||||
|
return channel, FabricChannelRouteEvent{Type: FabricChannelRouteEventNone, Observation: observation, Channel: channel, OccurredAt: now}, nil
|
||||||
|
}
|
||||||
|
previous, _ := findFabricRoute(routeSet, channel.RouteID)
|
||||||
|
choice, err := r.chooseAlternativeRoute(channel.Spec, routeSet, channel.RouteID, now)
|
||||||
|
if err != nil {
|
||||||
|
return channel, FabricChannelRouteEvent{}, err
|
||||||
|
}
|
||||||
|
channel.RouteID = choice.Route.RouteID
|
||||||
|
channel.TargetNode = choice.Route.DestinationNodeID
|
||||||
|
channel.LastReroute = now
|
||||||
|
channel.RerouteCount++
|
||||||
|
reason := observation.Reason
|
||||||
|
if strings.TrimSpace(reason) == "" {
|
||||||
|
reason = rerouteReason(r.Config, observation, previous)
|
||||||
|
}
|
||||||
|
event := FabricChannelRouteEvent{
|
||||||
|
Type: FabricChannelRouteEventReroute,
|
||||||
|
Reason: reason,
|
||||||
|
PreviousRoute: previous,
|
||||||
|
NextRoute: choice.Route,
|
||||||
|
Choice: choice,
|
||||||
|
Observation: observation,
|
||||||
|
Channel: channel,
|
||||||
|
OccurredAt: now,
|
||||||
|
}
|
||||||
|
return channel, event, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r FabricChannelRouter) shouldReroute(channel FabricChannel, observation FabricChannelObservation, routeSet FabricRouteSet, now time.Time) bool {
|
||||||
|
cfg := normalizeFabricChannelRouterConfig(r.Config)
|
||||||
|
if cfg.MinRerouteInterval > 0 && !channel.LastReroute.IsZero() && now.Sub(channel.LastReroute) < cfg.MinRerouteInterval {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if observation.Failed {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if cfg.MaxRoutePressure > 0 {
|
||||||
|
if route, ok := findFabricRoute(routeSet, channel.RouteID); ok && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r FabricChannelRouter) chooseAlternativeRoute(spec FabricChannelSpec, routeSet FabricRouteSet, currentRouteID string, now time.Time) (FabricRouteChoice, error) {
|
||||||
|
routes := flattenFabricRouteSet(routeSet)
|
||||||
|
alternatives := make([]FabricRoute, 0, len(routes))
|
||||||
|
for _, route := range routes {
|
||||||
|
if route.RouteID == currentRouteID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
alternatives = append(alternatives, route)
|
||||||
|
}
|
||||||
|
if len(alternatives) == 0 {
|
||||||
|
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
return r.Scheduler.ChooseRoute(spec, routeSetFromRoutes(routeSet, alternatives), now)
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeFabricChannelRouterConfig(cfg FabricChannelRouterConfig) FabricChannelRouterConfig {
|
||||||
|
if cfg.ProjectedChannelCost <= 0 {
|
||||||
|
cfg.ProjectedChannelCost = 1
|
||||||
|
}
|
||||||
|
if cfg.SchedulerConfig.ProjectedChannelCost <= 0 {
|
||||||
|
cfg.SchedulerConfig.ProjectedChannelCost = cfg.ProjectedChannelCost
|
||||||
|
}
|
||||||
|
if cfg.MaxRoutePressure <= 0 {
|
||||||
|
cfg.MaxRoutePressure = 90
|
||||||
|
}
|
||||||
|
return cfg
|
||||||
|
}
|
||||||
|
|
||||||
|
func rerouteReason(cfg FabricChannelRouterConfig, observation FabricChannelObservation, route FabricRoute) string {
|
||||||
|
cfg = normalizeFabricChannelRouterConfig(cfg)
|
||||||
|
switch {
|
||||||
|
case observation.Failed:
|
||||||
|
return "route_failure"
|
||||||
|
case cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs:
|
||||||
|
return "ack_latency_threshold"
|
||||||
|
case cfg.MaxRoutePressure > 0 && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure:
|
||||||
|
return "route_capacity_pressure"
|
||||||
|
default:
|
||||||
|
return "route_degraded"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func findFabricRoute(routeSet FabricRouteSet, routeID string) (FabricRoute, bool) {
|
||||||
|
routeID = strings.TrimSpace(routeID)
|
||||||
|
if routeID == "" {
|
||||||
|
return FabricRoute{}, false
|
||||||
|
}
|
||||||
|
for _, route := range flattenFabricRouteSet(routeSet) {
|
||||||
|
if route.RouteID == routeID {
|
||||||
|
return route, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return FabricRoute{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func routeSetFromRoutes(template FabricRouteSet, routes []FabricRoute) FabricRouteSet {
|
||||||
|
out := FabricRouteSet{TargetKind: template.TargetKind, TargetID: template.TargetID}
|
||||||
|
if len(routes) == 0 {
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
out.Primary = routes[0]
|
||||||
|
if len(routes) > 1 {
|
||||||
|
out.WarmStandby = append(out.WarmStandby, routes[1:]...)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFabricChannelRouterOpensOnBestRoute(t *testing.T) {
|
||||||
|
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
|
||||||
|
now := time.Now()
|
||||||
|
channel, event, err := router.OpenChannel(testFabricChannelSpec(FabricChannelTargetNode, "node-b"), FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testFabricRoute("route-slow", "node-b", 80, 100, 0, true),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testFabricRoute("route-fast", "node-b", 15, 100, 0, true),
|
||||||
|
},
|
||||||
|
}, now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open channel: %v", err)
|
||||||
|
}
|
||||||
|
if channel.RouteID != "route-fast" || channel.State != FabricChannelOpen {
|
||||||
|
t.Fatalf("channel = %+v, want route-fast open", channel)
|
||||||
|
}
|
||||||
|
if event.Type != FabricChannelRouteEventOpened || event.NextRoute.RouteID != "route-fast" {
|
||||||
|
t.Fatalf("event = %+v", event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRouterReroutesOnSlowAck(t *testing.T) {
|
||||||
|
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30})
|
||||||
|
now := time.Now()
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testFabricRoute("route-standby", "node-b", 20, 100, 0, true),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
channel := FabricChannel{
|
||||||
|
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
|
||||||
|
State: FabricChannelOpen,
|
||||||
|
RouteID: "route-primary",
|
||||||
|
OpenedAt: now.Add(-time.Minute),
|
||||||
|
}
|
||||||
|
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: channel.Spec.ChannelID,
|
||||||
|
RouteID: channel.RouteID,
|
||||||
|
AckLatencyMs: 120,
|
||||||
|
BytesSent: 4096,
|
||||||
|
FramesSent: 4,
|
||||||
|
}, now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("observe channel: %v", err)
|
||||||
|
}
|
||||||
|
if event.Type != FabricChannelRouteEventReroute || event.Reason != "ack_latency_threshold" {
|
||||||
|
t.Fatalf("event = %+v", event)
|
||||||
|
}
|
||||||
|
if updated.RouteID != "route-standby" || updated.RerouteCount != 1 || updated.BytesSent != 4096 || updated.FramesSent != 4 {
|
||||||
|
t.Fatalf("updated = %+v", updated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRouterReroutesPoolTargetOnFailure(t *testing.T) {
|
||||||
|
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
|
||||||
|
now := time.Now()
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-egress",
|
||||||
|
Primary: testFabricPoolRoute("route-node-b", "node-b", 10, true),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testFabricPoolRoute("route-node-c", "node-c", 20, true),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
channel := FabricChannel{
|
||||||
|
Spec: testFabricChannelSpec(FabricChannelTargetPool, "pool-egress"),
|
||||||
|
State: FabricChannelOpen,
|
||||||
|
RouteID: "route-node-b",
|
||||||
|
TargetNode: "node-b",
|
||||||
|
OpenedAt: now.Add(-time.Minute),
|
||||||
|
}
|
||||||
|
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: channel.Spec.ChannelID,
|
||||||
|
RouteID: channel.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "target_failed",
|
||||||
|
}, now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("observe channel: %v", err)
|
||||||
|
}
|
||||||
|
if event.Type != FabricChannelRouteEventReroute || event.PreviousRoute.RouteID != "route-node-b" || event.NextRoute.RouteID != "route-node-c" {
|
||||||
|
t.Fatalf("event = %+v", event)
|
||||||
|
}
|
||||||
|
if updated.TargetNode != "node-c" || updated.RouteID != "route-node-c" {
|
||||||
|
t.Fatalf("updated = %+v", updated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRouterSuppressesRerouteInsideHysteresis(t *testing.T) {
|
||||||
|
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30, MinRerouteInterval: time.Minute})
|
||||||
|
now := time.Now()
|
||||||
|
channel := FabricChannel{
|
||||||
|
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
|
||||||
|
State: FabricChannelOpen,
|
||||||
|
RouteID: "route-primary",
|
||||||
|
LastReroute: now.Add(-10 * time.Second),
|
||||||
|
}
|
||||||
|
updated, event, err := router.ObserveChannel(channel, FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
|
||||||
|
WarmStandby: []FabricRoute{testFabricRoute("route-standby", "node-b", 20, 100, 0, true)},
|
||||||
|
}, FabricChannelObservation{AckLatencyMs: 120}, now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("observe channel: %v", err)
|
||||||
|
}
|
||||||
|
if event.Type != FabricChannelRouteEventNone || updated.RouteID != "route-primary" {
|
||||||
|
t.Fatalf("event=%+v updated=%+v", event, updated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFabricChannelSpec(kind FabricChannelTargetKind, targetID string) FabricChannelSpec {
|
||||||
|
return FabricChannelSpec{
|
||||||
|
ChannelID: "channel-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: kind,
|
||||||
|
TargetID: targetID,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFabricRoute(routeID string, destination string, latency int, capacity int, active int, healthy bool) FabricRoute {
|
||||||
|
return FabricRoute{
|
||||||
|
RouteID: routeID,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: destination,
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: destination}},
|
||||||
|
BaseLatencyMs: latency,
|
||||||
|
Capacity: capacity,
|
||||||
|
ActiveChannels: active,
|
||||||
|
Healthy: healthy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFabricPoolRoute(routeID string, destination string, latency int, healthy bool) FabricRoute {
|
||||||
|
route := testFabricRoute(routeID, destination, latency, 100, 0, healthy)
|
||||||
|
route.PoolID = "pool-egress"
|
||||||
|
return route
|
||||||
|
}
|
||||||
@@ -0,0 +1,487 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricChannelRuntimeConfig struct {
|
||||||
|
RouterConfig FabricChannelRouterConfig
|
||||||
|
StreamID uint64
|
||||||
|
TrafficClass fabricproto.TrafficClass
|
||||||
|
Timeout time.Duration
|
||||||
|
MaxPayload int
|
||||||
|
RouteHealthTTL time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelRuntime struct {
|
||||||
|
Transport FabricTransport
|
||||||
|
Router FabricChannelRouter
|
||||||
|
Pressure *FabricRoutePressureTracker
|
||||||
|
Health *FabricRouteHealthTracker
|
||||||
|
Config FabricChannelRuntimeConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelRuntimeResult struct {
|
||||||
|
Channel FabricChannel
|
||||||
|
BytesSent uint64
|
||||||
|
BytesRecv uint64
|
||||||
|
FramesSent uint64
|
||||||
|
FramesRecv uint64
|
||||||
|
AcksReceived uint64
|
||||||
|
RouteEvents []FabricChannelRouteEvent
|
||||||
|
RouteAttempts []string
|
||||||
|
MigrationEvents int
|
||||||
|
RoutePressure FabricRoutePressureSnapshot
|
||||||
|
RouteHealth FabricRouteHealthSnapshot
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannelRequestResponseResult struct {
|
||||||
|
FabricChannelRuntimeResult
|
||||||
|
ResponsePayload []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricChannelRuntime(transport FabricTransport, cfg FabricChannelRuntimeConfig) *FabricChannelRuntime {
|
||||||
|
if cfg.StreamID == 0 {
|
||||||
|
cfg.StreamID = 2
|
||||||
|
}
|
||||||
|
if cfg.TrafficClass == 0 {
|
||||||
|
cfg.TrafficClass = fabricproto.TrafficClassBulk
|
||||||
|
}
|
||||||
|
if cfg.Timeout <= 0 {
|
||||||
|
cfg.Timeout = 30 * time.Second
|
||||||
|
}
|
||||||
|
if cfg.MaxPayload <= 0 {
|
||||||
|
cfg.MaxPayload = fabricproto.DefaultMaxPayload
|
||||||
|
}
|
||||||
|
return &FabricChannelRuntime{
|
||||||
|
Transport: transport,
|
||||||
|
Router: NewFabricChannelRouter(cfg.RouterConfig),
|
||||||
|
Pressure: NewFabricRoutePressureTracker(),
|
||||||
|
Health: NewFabricRouteHealthTracker(cfg.RouteHealthTTL),
|
||||||
|
Config: cfg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) SendReliable(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payloads [][]byte) (FabricChannelRuntimeResult, error) {
|
||||||
|
if r == nil || r.Transport == nil {
|
||||||
|
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
routeSet = r.routeSetForScheduling(routeSet)
|
||||||
|
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
|
||||||
|
if err != nil {
|
||||||
|
return FabricChannelRuntimeResult{}, err
|
||||||
|
}
|
||||||
|
result := FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}}
|
||||||
|
sequence := uint64(0)
|
||||||
|
index := 0
|
||||||
|
for index < len(payloads) {
|
||||||
|
routeSet = r.routeSetForScheduling(routeSet)
|
||||||
|
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||||
|
if !ok {
|
||||||
|
return result, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
|
||||||
|
target, err := FabricTransportTargetForRoute(route)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
releaseRoute := r.acquireRoute(route.RouteID)
|
||||||
|
session, err := r.Transport.Connect(ctx, target)
|
||||||
|
if err != nil {
|
||||||
|
releaseRoute()
|
||||||
|
r.markRouteFailure(route.RouteID, err)
|
||||||
|
updated, event, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "connect_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
result.Channel = channel
|
||||||
|
if event.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, event)
|
||||||
|
result.MigrationEvents++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rerouteErr != nil {
|
||||||
|
return result, rerouteErr
|
||||||
|
}
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
migrated, sendErr := r.sendOnSession(ctx, session, &channel, routeSet, route, payloads, &index, &sequence, &result)
|
||||||
|
_ = session.Close()
|
||||||
|
releaseRoute()
|
||||||
|
result.Channel = channel
|
||||||
|
if sendErr != nil {
|
||||||
|
return result, sendErr
|
||||||
|
}
|
||||||
|
if !migrated {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.Channel = channel
|
||||||
|
result.RoutePressure = r.snapshotRoutePressure()
|
||||||
|
result.RouteHealth = r.snapshotRouteHealth()
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) SendRequestResponse(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (FabricChannelRequestResponseResult, error) {
|
||||||
|
if r == nil || r.Transport == nil {
|
||||||
|
return FabricChannelRequestResponseResult{}, ErrForwardRuntimeUnavailable
|
||||||
|
}
|
||||||
|
if len(payload) > r.Config.MaxPayload {
|
||||||
|
return FabricChannelRequestResponseResult{}, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), r.Config.MaxPayload)
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
routeSet = r.routeSetForScheduling(routeSet)
|
||||||
|
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
|
||||||
|
if err != nil {
|
||||||
|
return FabricChannelRequestResponseResult{}, err
|
||||||
|
}
|
||||||
|
result := FabricChannelRequestResponseResult{
|
||||||
|
FabricChannelRuntimeResult: FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}},
|
||||||
|
}
|
||||||
|
sequence := uint64(1)
|
||||||
|
for {
|
||||||
|
routeSet = r.routeSetForScheduling(routeSet)
|
||||||
|
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||||
|
if !ok {
|
||||||
|
return result, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
|
||||||
|
target, err := FabricTransportTargetForRoute(route)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
releaseRoute := r.acquireRoute(route.RouteID)
|
||||||
|
session, err := r.Transport.Connect(ctx, target)
|
||||||
|
if err != nil {
|
||||||
|
releaseRoute()
|
||||||
|
r.markRouteFailure(route.RouteID, err)
|
||||||
|
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "connect_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
result.Channel = channel
|
||||||
|
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||||
|
result.MigrationEvents++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rerouteErr != nil {
|
||||||
|
return result, rerouteErr
|
||||||
|
}
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
response, ackMs, sendErr := r.sendRequestResponseOnSession(ctx, session, route.RouteID, spec.ChannelID, payload, sequence)
|
||||||
|
_ = session.Close()
|
||||||
|
releaseRoute()
|
||||||
|
result.Channel = channel
|
||||||
|
if sendErr == nil {
|
||||||
|
r.markRouteSuccess(route.RouteID)
|
||||||
|
result.BytesSent += uint64(len(payload))
|
||||||
|
result.FramesSent++
|
||||||
|
result.BytesRecv += uint64(len(response))
|
||||||
|
result.FramesRecv++
|
||||||
|
result.AcksReceived++
|
||||||
|
updated, routeEvent, observeErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
AckLatencyMs: ackMs,
|
||||||
|
BytesSent: uint64(len(payload)),
|
||||||
|
FramesSent: 1,
|
||||||
|
BytesRecv: uint64(len(response)),
|
||||||
|
FramesRecv: 1,
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
result.Channel = channel
|
||||||
|
if observeErr != nil {
|
||||||
|
return result, observeErr
|
||||||
|
}
|
||||||
|
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||||
|
result.MigrationEvents++
|
||||||
|
}
|
||||||
|
result.ResponsePayload = response
|
||||||
|
result.RoutePressure = r.snapshotRoutePressure()
|
||||||
|
result.RouteHealth = r.snapshotRouteHealth()
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
r.markRouteFailure(route.RouteID, sendErr)
|
||||||
|
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "response_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
result.Channel = channel
|
||||||
|
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||||
|
result.MigrationEvents++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rerouteErr != nil {
|
||||||
|
return result, rerouteErr
|
||||||
|
}
|
||||||
|
return result, sendErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
|
||||||
|
if r != nil && r.Health != nil {
|
||||||
|
routeSet = r.Health.Apply(routeSet, time.Now().UTC())
|
||||||
|
}
|
||||||
|
return r.routeSetWithActiveChannels(routeSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
|
||||||
|
if r == nil || r.Pressure == nil {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
return r.Pressure.Apply(routeSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) acquireRoute(routeID string) func() {
|
||||||
|
if r == nil || r.Pressure == nil {
|
||||||
|
return func() {}
|
||||||
|
}
|
||||||
|
return r.Pressure.Acquire(routeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) snapshotRoutePressure() FabricRoutePressureSnapshot {
|
||||||
|
if r == nil || r.Pressure == nil {
|
||||||
|
return FabricRoutePressureSnapshot{}
|
||||||
|
}
|
||||||
|
return r.Pressure.SnapshotPressure()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) snapshotRouteHealth() FabricRouteHealthSnapshot {
|
||||||
|
if r == nil || r.Health == nil {
|
||||||
|
return FabricRouteHealthSnapshot{}
|
||||||
|
}
|
||||||
|
return r.Health.Snapshot(time.Now().UTC())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) markRouteFailure(routeID string, err error) {
|
||||||
|
if r == nil || r.Health == nil || err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) markRouteSuccess(routeID string) {
|
||||||
|
if r == nil || r.Health == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.Health.MarkSuccess(routeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) sendOnSession(ctx context.Context, session FabricTransportSession, channel *FabricChannel, routeSet FabricRouteSet, route FabricRoute, payloads [][]byte, index *int, sequence *uint64, result *FabricChannelRuntimeResult) (bool, error) {
|
||||||
|
cfg := r.Config
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameOpenStream,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
}); err != nil {
|
||||||
|
r.markRouteFailure(route.RouteID, err)
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
for *index < len(payloads) {
|
||||||
|
payload := payloads[*index]
|
||||||
|
if len(payload) > cfg.MaxPayload {
|
||||||
|
return false, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), cfg.MaxPayload)
|
||||||
|
}
|
||||||
|
(*sequence)++
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
Sequence: *sequence,
|
||||||
|
Payload: payload,
|
||||||
|
}); err != nil {
|
||||||
|
r.markRouteFailure(route.RouteID, err)
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
ackOK, ackMs := waitForFabricRuntimeAck(ctx, session, cfg.StreamID, *sequence, cfg.Timeout)
|
||||||
|
if !ackOK {
|
||||||
|
r.markRouteFailure(route.RouteID, fmt.Errorf("ack_failed"))
|
||||||
|
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: channel.Spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "ack_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
*channel = updated
|
||||||
|
if event.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, event)
|
||||||
|
result.MigrationEvents++
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
r.markRouteSuccess(route.RouteID)
|
||||||
|
*index++
|
||||||
|
result.BytesSent += uint64(len(payload))
|
||||||
|
result.FramesSent++
|
||||||
|
result.AcksReceived++
|
||||||
|
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: channel.Spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
AckLatencyMs: ackMs,
|
||||||
|
BytesSent: uint64(len(payload)),
|
||||||
|
FramesSent: 1,
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
*channel = updated
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if event.Type == FabricChannelRouteEventReroute {
|
||||||
|
result.RouteEvents = append(result.RouteEvents, event)
|
||||||
|
result.MigrationEvents++
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = session.Send(context.Background(), fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameCloseStream,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
})
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FabricChannelRuntime) sendRequestResponseOnSession(ctx context.Context, session FabricTransportSession, routeID string, channelID string, payload []byte, sequence uint64) ([]byte, int64, error) {
|
||||||
|
cfg := r.Config
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameOpenStream,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
}); err != nil {
|
||||||
|
r.markRouteFailure(routeID, err)
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
started := time.Now()
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
Sequence: sequence,
|
||||||
|
Payload: payload,
|
||||||
|
}); err != nil {
|
||||||
|
r.markRouteFailure(routeID, err)
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
waitCtx := ctx
|
||||||
|
if cfg.Timeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
waitCtx, cancel = context.WithTimeout(ctx, cfg.Timeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-waitCtx.Done():
|
||||||
|
return nil, 0, waitCtx.Err()
|
||||||
|
case err, ok := <-session.Errors():
|
||||||
|
if !ok {
|
||||||
|
return nil, 0, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
case frame, ok := <-session.Frames():
|
||||||
|
if !ok {
|
||||||
|
return nil, 0, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != cfg.StreamID || frame.Sequence != sequence {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = session.Send(context.Background(), fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameCloseStream,
|
||||||
|
TrafficClass: cfg.TrafficClass,
|
||||||
|
StreamID: cfg.StreamID,
|
||||||
|
})
|
||||||
|
return append([]byte(nil), frame.Payload...), time.Since(started).Milliseconds(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricTransportTargetForRoute(route FabricRoute) (FabricTransportTarget, error) {
|
||||||
|
if strings.TrimSpace(route.RouteID) == "" {
|
||||||
|
return FabricTransportTarget{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
if route.RelayCount > 0 {
|
||||||
|
for _, hop := range route.Hops {
|
||||||
|
if hop.Mode != FabricRouteRelay {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if target, ok := fabricTransportTargetForHop(hop); ok {
|
||||||
|
return target, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := len(route.Hops) - 1; i >= 0; i-- {
|
||||||
|
if target, ok := fabricTransportTargetForHop(route.Hops[i]); ok {
|
||||||
|
return target, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return FabricTransportTarget{}, fmt.Errorf("%w: route %s has no transport endpoint", ErrFabricRouteNotFound, route.RouteID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricTransportTargetForHop(hop FabricRouteHop) (FabricTransportTarget, bool) {
|
||||||
|
endpoint := strings.TrimSpace(hop.Address)
|
||||||
|
if endpoint == "" {
|
||||||
|
return FabricTransportTarget{}, false
|
||||||
|
}
|
||||||
|
transport := string(hop.Mode)
|
||||||
|
if transport == "" {
|
||||||
|
transport = "quic"
|
||||||
|
}
|
||||||
|
return FabricTransportTarget{
|
||||||
|
EndpointID: hop.EndpointID,
|
||||||
|
PeerID: strings.TrimSpace(hop.NodeID),
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Transport: transport,
|
||||||
|
PeerCertSHA256: strings.TrimSpace(hop.PeerCertSHA256),
|
||||||
|
}, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitForFabricRuntimeAck(ctx context.Context, session FabricTransportSession, streamID uint64, sequence uint64, timeout time.Duration) (bool, int64) {
|
||||||
|
started := time.Now()
|
||||||
|
if timeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
ctx, cancel = context.WithTimeout(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false, 0
|
||||||
|
case err, ok := <-session.Errors():
|
||||||
|
if !ok || err != nil {
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
case frame, ok := <-session.Frames():
|
||||||
|
if !ok {
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
if frame.Type == fabricproto.FrameAck && frame.StreamID == streamID && frame.Sequence == sequence {
|
||||||
|
return true, time.Since(started).Milliseconds()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,495 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeMigratesSlowAckToStandbyRoute(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://slow.example.test:19443": 60 * time.Millisecond,
|
||||||
|
"quic://fast.example.test:19443": 0,
|
||||||
|
})
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{
|
||||||
|
[]byte("one"),
|
||||||
|
[]byte("two"),
|
||||||
|
[]byte("three"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if result.MigrationEvents != 1 {
|
||||||
|
t.Fatalf("migration events = %d, want 1: %+v", result.MigrationEvents, result.RouteEvents)
|
||||||
|
}
|
||||||
|
if result.Channel.RouteID != "route-fast" || result.Channel.RerouteCount != 1 {
|
||||||
|
t.Fatalf("channel = %+v", result.Channel)
|
||||||
|
}
|
||||||
|
if result.BytesSent != uint64(len("one")+len("two")+len("three")) || result.AcksReceived != 3 {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("slow connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("fast connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if result.RoutePressure.AcquiredTotal != 2 || result.RoutePressure.ReleasedTotal != 2 || result.RoutePressure.MaxActiveTotal == 0 {
|
||||||
|
t.Fatalf("route pressure = %+v", result.RoutePressure)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeReroutesOnConnectFailure(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://fast.example.test:19443": 0,
|
||||||
|
})
|
||||||
|
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeQuarantinesFailedRouteAcrossChannels(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://fast.example.test:19443": 0,
|
||||||
|
})
|
||||||
|
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
RouteHealthTTL: time.Minute,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
first, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("first")})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("first send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if first.Channel.RouteID != "route-fast" || first.RouteHealth.Quarantined["route-dead"].Failures != 1 {
|
||||||
|
t.Fatalf("first result = %+v", first)
|
||||||
|
}
|
||||||
|
second, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("second")})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("second send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if second.Channel.RouteID != "route-fast" {
|
||||||
|
t.Fatalf("second route = %s, want route-fast", second.Channel.RouteID)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("dead connect count = %d, want one attempt before quarantine", got)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
|
||||||
|
t.Fatalf("fast connect count = %d, want both channels on healthy route", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeReroutesOnAckTimeout(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://slow.example.test:19443": 100 * time.Millisecond,
|
||||||
|
"quic://fast.example.test:19443": 0,
|
||||||
|
})
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
Timeout: 10 * time.Millisecond,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeSpreadsConcurrentChannelsBySharedPressure(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://route-a.example.test:19443": 80 * time.Millisecond,
|
||||||
|
"quic://route-b.example.test:19443": 0,
|
||||||
|
})
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{StreamID: 9})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testRuntimeRoute("route-a", "node-b", "quic://route-a.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-b", "node-b", "quic://route-b.example.test:19443", 11),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
firstDone := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
_, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("one")})
|
||||||
|
firstDone <- err
|
||||||
|
}()
|
||||||
|
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
|
||||||
|
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("two")})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("second send reliable: %v", err)
|
||||||
|
}
|
||||||
|
if result.Channel.RouteID != "route-b" {
|
||||||
|
t.Fatalf("second route = %s, want route-b", result.Channel.RouteID)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("route-b connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if err := <-firstDone; err != nil {
|
||||||
|
t.Fatalf("first send reliable: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeRequestResponseReturnsPayload(t *testing.T) {
|
||||||
|
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
|
||||||
|
"quic://runtime.example.test:19443": []byte(`{"status":"ok"}`),
|
||||||
|
})
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-admin-runtime",
|
||||||
|
Primary: testRuntimePoolRoute("route-runtime", "pool-admin-runtime", "node-runtime", "quic://runtime.example.test:19443", 10),
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := runtime.SendRequestResponse(context.Background(), FabricChannelSpec{
|
||||||
|
ChannelID: "channel-web-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-admin-runtime",
|
||||||
|
TrafficClass: "control",
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
}, routeSet, []byte(`{"request":true}`))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("request response: %v", err)
|
||||||
|
}
|
||||||
|
if string(result.ResponsePayload) != `{"status":"ok"}` {
|
||||||
|
t.Fatalf("response payload = %s", string(result.ResponsePayload))
|
||||||
|
}
|
||||||
|
if result.Channel.RouteID != "route-runtime" ||
|
||||||
|
result.BytesSent != uint64(len(`{"request":true}`)) ||
|
||||||
|
result.BytesRecv != uint64(len(`{"status":"ok"}`)) ||
|
||||||
|
result.FramesSent != 1 ||
|
||||||
|
result.FramesRecv != 1 ||
|
||||||
|
result.AcksReceived != 1 {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelRuntimeRequestResponseReroutesOnResponseFailure(t *testing.T) {
|
||||||
|
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
|
||||||
|
"quic://fast.example.test:19443": []byte(`{"status":"ok"}`),
|
||||||
|
})
|
||||||
|
transport.failResponse["quic://slow.example.test:19443"] = true
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||||
|
StreamID: 9,
|
||||||
|
Timeout: 10 * time.Millisecond,
|
||||||
|
})
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-runtime",
|
||||||
|
Primary: testRuntimeRoute("route-slow", "node-runtime", "quic://slow.example.test:19443", 10),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testRuntimeRoute("route-fast", "node-runtime", "quic://fast.example.test:19443", 20),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := runtime.SendRequestResponse(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-runtime"), routeSet, []byte(`{"request":true}`))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("request response: %v", err)
|
||||||
|
}
|
||||||
|
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || string(result.ResponsePayload) != `{"status":"ok"}` {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricTransportTargetForRouteUsesLastAddressedHop(t *testing.T) {
|
||||||
|
target, err := FabricTransportTargetForRoute(FabricRoute{
|
||||||
|
RouteID: "route-1",
|
||||||
|
Hops: []FabricRouteHop{
|
||||||
|
{NodeID: "node-a"},
|
||||||
|
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443"},
|
||||||
|
{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-quic", Address: "quic://node-b.example.test:19443"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("target for route: %v", err)
|
||||||
|
}
|
||||||
|
if target.PeerID != "node-b" || target.EndpointID != "node-b-quic" || target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != string(FabricRouteDirect) {
|
||||||
|
t.Fatalf("target = %+v", target)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFabricRequestResponseTransport struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
responses map[string][]byte
|
||||||
|
failResponse map[string]bool
|
||||||
|
connects map[string]int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeFabricRequestResponseTransport(responses map[string][]byte) *fakeFabricRequestResponseTransport {
|
||||||
|
return &fakeFabricRequestResponseTransport{
|
||||||
|
responses: responses,
|
||||||
|
failResponse: map[string]bool{},
|
||||||
|
connects: map[string]int{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRequestResponseTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||||
|
endpoint := target.Endpoint
|
||||||
|
t.mu.Lock()
|
||||||
|
t.connects[endpoint]++
|
||||||
|
response := append([]byte(nil), t.responses[endpoint]...)
|
||||||
|
failResponse := t.failResponse[endpoint]
|
||||||
|
t.mu.Unlock()
|
||||||
|
return &fakeFabricRequestResponseSession{
|
||||||
|
response: response,
|
||||||
|
failResponse: failResponse,
|
||||||
|
frames: make(chan fabricproto.Frame, 16),
|
||||||
|
errors: make(chan error, 1),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRequestResponseTransport) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFabricRequestResponseSession struct {
|
||||||
|
response []byte
|
||||||
|
failResponse bool
|
||||||
|
frames chan fabricproto.Frame
|
||||||
|
errors chan error
|
||||||
|
done chan struct{}
|
||||||
|
once sync.Once
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRequestResponseSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||||
|
if frame.Type != fabricproto.FrameData || s.failResponse {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
response := append([]byte(nil), s.response...)
|
||||||
|
go func() {
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence, Payload: response}:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRequestResponseSession) Frames() <-chan fabricproto.Frame {
|
||||||
|
return s.frames
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRequestResponseSession) Errors() <-chan error {
|
||||||
|
return s.errors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRequestResponseSession) Close() error {
|
||||||
|
s.once.Do(func() {
|
||||||
|
close(s.done)
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRequestResponseSession) Closed() bool {
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricTransportTargetForRouteUsesRelayHopForRelayRoute(t *testing.T) {
|
||||||
|
target, err := FabricTransportTargetForRoute(FabricRoute{
|
||||||
|
RouteID: "route-relay",
|
||||||
|
RelayCount: 1,
|
||||||
|
Hops: []FabricRouteHop{
|
||||||
|
{NodeID: "node-a"},
|
||||||
|
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443", PeerCertSHA256: "relay-cert"},
|
||||||
|
{NodeID: "node-b", Mode: FabricRouteRelay, EndpointID: "node-b-private", Address: "quic://10.0.0.2:19443", PeerCertSHA256: "node-b-cert"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("target for relay route: %v", err)
|
||||||
|
}
|
||||||
|
if target.PeerID != "node-r" || target.EndpointID != "relay-1" || target.Endpoint != "quic://relay.example.test:19443" || target.PeerCertSHA256 != "relay-cert" {
|
||||||
|
t.Fatalf("target = %+v", target)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFabricRuntimeTransport struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
delays map[string]time.Duration
|
||||||
|
failConnect map[string]bool
|
||||||
|
connects map[string]int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeFabricRuntimeTransport(delays map[string]time.Duration) *fakeFabricRuntimeTransport {
|
||||||
|
return &fakeFabricRuntimeTransport{
|
||||||
|
delays: delays,
|
||||||
|
failConnect: map[string]bool{},
|
||||||
|
connects: map[string]int{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRuntimeTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||||
|
endpoint := target.Endpoint
|
||||||
|
t.mu.Lock()
|
||||||
|
t.connects[endpoint]++
|
||||||
|
fail := t.failConnect[endpoint]
|
||||||
|
delay := t.delays[endpoint]
|
||||||
|
t.mu.Unlock()
|
||||||
|
if fail {
|
||||||
|
return nil, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
return &fakeFabricRuntimeSession{
|
||||||
|
endpoint: endpoint,
|
||||||
|
delay: delay,
|
||||||
|
frames: make(chan fabricproto.Frame, 64),
|
||||||
|
errors: make(chan error, 1),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRuntimeTransport) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRuntimeTransport) connectCount(endpoint string) int {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
return t.connects[endpoint]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeFabricRuntimeTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
|
||||||
|
tb.Helper()
|
||||||
|
deadline := time.Now().Add(time.Second)
|
||||||
|
for {
|
||||||
|
t.mu.Lock()
|
||||||
|
got := t.connects[endpoint]
|
||||||
|
t.mu.Unlock()
|
||||||
|
if got >= count {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if time.Now().After(deadline) {
|
||||||
|
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
|
||||||
|
}
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFabricRuntimeSession struct {
|
||||||
|
endpoint string
|
||||||
|
delay time.Duration
|
||||||
|
frames chan fabricproto.Frame
|
||||||
|
errors chan error
|
||||||
|
done chan struct{}
|
||||||
|
once sync.Once
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRuntimeSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||||
|
if frame.Type != fabricproto.FrameData {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
delay := s.delay
|
||||||
|
go func() {
|
||||||
|
if delay > 0 {
|
||||||
|
time.Sleep(delay)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameAck, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence}:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRuntimeSession) Frames() <-chan fabricproto.Frame {
|
||||||
|
return s.frames
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRuntimeSession) Errors() <-chan error {
|
||||||
|
return s.errors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRuntimeSession) Close() error {
|
||||||
|
s.once.Do(func() {
|
||||||
|
close(s.done)
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeFabricRuntimeSession) Closed() bool {
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testRuntimeRoute(routeID string, destination string, endpoint string, latency int) FabricRoute {
|
||||||
|
route := testFabricRoute(routeID, destination, latency, 100, 0, true)
|
||||||
|
route.Hops[len(route.Hops)-1].Address = endpoint
|
||||||
|
route.Hops[len(route.Hops)-1].EndpointID = strings.TrimPrefix(routeID, "route-")
|
||||||
|
route.Hops[len(route.Hops)-1].Mode = FabricRouteDirect
|
||||||
|
return route
|
||||||
|
}
|
||||||
|
|
||||||
|
func testRuntimePoolRoute(routeID string, poolID string, destination string, endpoint string, latency int) FabricRoute {
|
||||||
|
route := testRuntimeRoute(routeID, destination, endpoint, latency)
|
||||||
|
route.PoolID = poolID
|
||||||
|
return route
|
||||||
|
}
|
||||||
@@ -0,0 +1,390 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricChannelTargetKind string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FabricChannelTargetNode FabricChannelTargetKind = "node"
|
||||||
|
FabricChannelTargetPool FabricChannelTargetKind = "pool"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricChannelLifecycleState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FabricChannelOpening FabricChannelLifecycleState = "opening"
|
||||||
|
FabricChannelOpen FabricChannelLifecycleState = "open"
|
||||||
|
FabricChannelDraining FabricChannelLifecycleState = "draining"
|
||||||
|
FabricChannelClosed FabricChannelLifecycleState = "closed"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricRouteMode string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FabricRouteDirect FabricRouteMode = "direct_quic"
|
||||||
|
FabricRouteLAN FabricRouteMode = "lan_quic"
|
||||||
|
FabricRouteReverse FabricRouteMode = "reverse_quic"
|
||||||
|
FabricRouteRelay FabricRouteMode = "relay_quic"
|
||||||
|
FabricRouteICE FabricRouteMode = "ice_quic"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrFabricChannelInvalid = errors.New("fabric channel request is invalid")
|
||||||
|
ErrFabricRouteNotFound = errors.New("fabric route not found")
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricChannelSpec struct {
|
||||||
|
ChannelID string
|
||||||
|
ClusterID string
|
||||||
|
SourceNodeID string
|
||||||
|
TargetKind FabricChannelTargetKind
|
||||||
|
TargetID string
|
||||||
|
TrafficClass string
|
||||||
|
MinBandwidth int64
|
||||||
|
StickyKey string
|
||||||
|
CreatedAt time.Time
|
||||||
|
ForbiddenHops []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricServiceChannelTarget struct {
|
||||||
|
Kind FabricChannelTargetKind
|
||||||
|
PoolIDs []string
|
||||||
|
NodeIDs []string
|
||||||
|
SelectedNodeID string
|
||||||
|
ServiceRole string
|
||||||
|
SelectionPolicy string
|
||||||
|
SingleMemberPool bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricServiceChannelRequest struct {
|
||||||
|
SchemaVersion string
|
||||||
|
ChannelID string
|
||||||
|
ClusterID string
|
||||||
|
OrganizationID string
|
||||||
|
UserID string
|
||||||
|
ResourceID string
|
||||||
|
SourceNodeID string
|
||||||
|
SourceRole string
|
||||||
|
ServiceClass string
|
||||||
|
Target FabricServiceChannelTarget
|
||||||
|
TrafficClass string
|
||||||
|
CreatedAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricChannel struct {
|
||||||
|
Spec FabricChannelSpec
|
||||||
|
State FabricChannelLifecycleState
|
||||||
|
RouteID string
|
||||||
|
TargetNode string
|
||||||
|
OpenedAt time.Time
|
||||||
|
LastReroute time.Time
|
||||||
|
BytesSent uint64
|
||||||
|
BytesRecv uint64
|
||||||
|
FramesSent uint64
|
||||||
|
FramesRecv uint64
|
||||||
|
RerouteCount uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteHop struct {
|
||||||
|
NodeID string
|
||||||
|
Mode FabricRouteMode
|
||||||
|
EndpointID string
|
||||||
|
Address string
|
||||||
|
PeerCertSHA256 string
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRoute struct {
|
||||||
|
RouteID string
|
||||||
|
ClusterID string
|
||||||
|
SourceNodeID string
|
||||||
|
DestinationNodeID string
|
||||||
|
PoolID string
|
||||||
|
Hops []FabricRouteHop
|
||||||
|
BaseLatencyMs int
|
||||||
|
JitterMs int
|
||||||
|
LossPermille int
|
||||||
|
Capacity int
|
||||||
|
ActiveChannels int
|
||||||
|
RelayCount int
|
||||||
|
LastUpdatedAt time.Time
|
||||||
|
Healthy bool
|
||||||
|
Degraded bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteSet struct {
|
||||||
|
TargetKind FabricChannelTargetKind
|
||||||
|
TargetID string
|
||||||
|
Primary FabricRoute
|
||||||
|
WarmStandby []FabricRoute
|
||||||
|
ColdFallbacks []FabricRoute
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricAdjacency struct {
|
||||||
|
FromNodeID string
|
||||||
|
ToNodeID string
|
||||||
|
Mode FabricRouteMode
|
||||||
|
RTTMs int
|
||||||
|
JitterMs int
|
||||||
|
LossPermille int
|
||||||
|
Capacity int
|
||||||
|
ActiveChannels int
|
||||||
|
ThroughputBps int64
|
||||||
|
PressurePercent int
|
||||||
|
Healthy bool
|
||||||
|
PassiveOutbound bool
|
||||||
|
LocalSegmentID string
|
||||||
|
NATGroupID string
|
||||||
|
LastObservedAt time.Time
|
||||||
|
LastFailureReason string
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteChoice struct {
|
||||||
|
Route FabricRoute
|
||||||
|
Score int
|
||||||
|
Reason string
|
||||||
|
PressureBefore int
|
||||||
|
PressureAfter int
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteSchedulerConfig struct {
|
||||||
|
LatencyWeight int
|
||||||
|
JitterWeight int
|
||||||
|
LossWeight int
|
||||||
|
PressureWeight int
|
||||||
|
HopPenalty int
|
||||||
|
RelayPenalty int
|
||||||
|
DegradedPenalty int
|
||||||
|
ProjectedChannelCost int
|
||||||
|
HardMaxRoutePressure int
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteScheduler struct {
|
||||||
|
Config FabricRouteSchedulerConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricRouteScheduler(cfg FabricRouteSchedulerConfig) FabricRouteScheduler {
|
||||||
|
return FabricRouteScheduler{Config: normalizeFabricRouteSchedulerConfig(cfg)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s FabricRouteScheduler) ChooseRoute(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricRouteChoice, error) {
|
||||||
|
if err := ValidateFabricChannelSpec(spec); err != nil {
|
||||||
|
return FabricRouteChoice{}, err
|
||||||
|
}
|
||||||
|
routes := flattenFabricRouteSet(routeSet)
|
||||||
|
if len(routes) == 0 {
|
||||||
|
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
forbidden := stringSet(spec.ForbiddenHops)
|
||||||
|
choices := make([]FabricRouteChoice, 0, len(routes))
|
||||||
|
for _, route := range routes {
|
||||||
|
if !fabricRouteUsable(spec, route, forbidden, now) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
choice := s.scoreRoute(route)
|
||||||
|
if s.Config.HardMaxRoutePressure > 0 && choice.PressureAfter > s.Config.HardMaxRoutePressure {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
choice.Route = route
|
||||||
|
choices = append(choices, choice)
|
||||||
|
}
|
||||||
|
if len(choices) == 0 {
|
||||||
|
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
sort.SliceStable(choices, func(i, j int) bool {
|
||||||
|
if choices[i].Score != choices[j].Score {
|
||||||
|
return choices[i].Score < choices[j].Score
|
||||||
|
}
|
||||||
|
if choices[i].PressureAfter != choices[j].PressureAfter {
|
||||||
|
return choices[i].PressureAfter < choices[j].PressureAfter
|
||||||
|
}
|
||||||
|
if choices[i].Route.BaseLatencyMs != choices[j].Route.BaseLatencyMs {
|
||||||
|
return choices[i].Route.BaseLatencyMs < choices[j].Route.BaseLatencyMs
|
||||||
|
}
|
||||||
|
return choices[i].Route.RouteID < choices[j].Route.RouteID
|
||||||
|
})
|
||||||
|
return choices[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ValidateFabricChannelSpec(spec FabricChannelSpec) error {
|
||||||
|
if strings.TrimSpace(spec.ChannelID) == "" || strings.TrimSpace(spec.ClusterID) == "" || strings.TrimSpace(spec.SourceNodeID) == "" || strings.TrimSpace(spec.TargetID) == "" {
|
||||||
|
return ErrFabricChannelInvalid
|
||||||
|
}
|
||||||
|
switch spec.TargetKind {
|
||||||
|
case FabricChannelTargetNode, FabricChannelTargetPool:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return ErrFabricChannelInvalid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricChannelSpecFromServiceRequest(req FabricServiceChannelRequest, localNodeID string, now time.Time) (FabricChannelSpec, error) {
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
sourceNodeID := firstNonEmpty(strings.TrimSpace(req.SourceNodeID), strings.TrimSpace(localNodeID))
|
||||||
|
targetKind := req.Target.Kind
|
||||||
|
if targetKind == "" {
|
||||||
|
targetKind = FabricChannelTargetPool
|
||||||
|
}
|
||||||
|
targetID := firstNonEmpty(firstString(req.Target.PoolIDs), strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs))
|
||||||
|
if targetKind == FabricChannelTargetNode {
|
||||||
|
targetID = firstNonEmpty(strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs), targetID)
|
||||||
|
}
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), strings.TrimSpace(req.ResourceID)),
|
||||||
|
ClusterID: strings.TrimSpace(req.ClusterID),
|
||||||
|
SourceNodeID: sourceNodeID,
|
||||||
|
TargetKind: targetKind,
|
||||||
|
TargetID: targetID,
|
||||||
|
TrafficClass: firstNonEmpty(strings.TrimSpace(req.TrafficClass), serviceClassDefaultTrafficClass(req.ServiceClass)),
|
||||||
|
StickyKey: strings.TrimSpace(req.ResourceID),
|
||||||
|
CreatedAt: now,
|
||||||
|
}
|
||||||
|
if err := ValidateFabricChannelSpec(spec); err != nil {
|
||||||
|
return FabricChannelSpec{}, err
|
||||||
|
}
|
||||||
|
return spec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func serviceClassDefaultTrafficClass(serviceClass string) string {
|
||||||
|
switch strings.TrimSpace(strings.ToLower(serviceClass)) {
|
||||||
|
case FabricServiceClassVPNPackets:
|
||||||
|
return FabricServiceChannelBulk
|
||||||
|
case FabricServiceClassRemoteWorkspace:
|
||||||
|
return FabricServiceChannelInteractive
|
||||||
|
default:
|
||||||
|
return FabricServiceChannelReliable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstString(values []string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
if strings.TrimSpace(value) != "" {
|
||||||
|
return strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s FabricRouteScheduler) scoreRoute(route FabricRoute) FabricRouteChoice {
|
||||||
|
cfg := normalizeFabricRouteSchedulerConfig(s.Config)
|
||||||
|
pressureBefore := fabricRoutePressurePercent(route, 0)
|
||||||
|
pressureAfter := fabricRoutePressurePercent(route, cfg.ProjectedChannelCost)
|
||||||
|
score := route.BaseLatencyMs*cfg.LatencyWeight +
|
||||||
|
route.JitterMs*cfg.JitterWeight +
|
||||||
|
route.LossPermille*cfg.LossWeight +
|
||||||
|
pressureAfter*cfg.PressureWeight +
|
||||||
|
len(route.Hops)*cfg.HopPenalty +
|
||||||
|
route.RelayCount*cfg.RelayPenalty
|
||||||
|
if route.Degraded {
|
||||||
|
score += cfg.DegradedPenalty
|
||||||
|
}
|
||||||
|
reason := "latency_load_score"
|
||||||
|
if pressureAfter >= 90 {
|
||||||
|
reason = "capacity_pressure_avoidance"
|
||||||
|
}
|
||||||
|
if route.RelayCount > 0 {
|
||||||
|
reason = "relay_fallback_available"
|
||||||
|
}
|
||||||
|
return FabricRouteChoice{Score: score, Reason: reason, PressureBefore: pressureBefore, PressureAfter: pressureAfter}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeFabricRouteSchedulerConfig(cfg FabricRouteSchedulerConfig) FabricRouteSchedulerConfig {
|
||||||
|
if cfg.LatencyWeight <= 0 {
|
||||||
|
cfg.LatencyWeight = 10
|
||||||
|
}
|
||||||
|
if cfg.JitterWeight <= 0 {
|
||||||
|
cfg.JitterWeight = 4
|
||||||
|
}
|
||||||
|
if cfg.LossWeight <= 0 {
|
||||||
|
cfg.LossWeight = 8
|
||||||
|
}
|
||||||
|
if cfg.PressureWeight <= 0 {
|
||||||
|
cfg.PressureWeight = 12
|
||||||
|
}
|
||||||
|
if cfg.HopPenalty <= 0 {
|
||||||
|
cfg.HopPenalty = 5
|
||||||
|
}
|
||||||
|
if cfg.RelayPenalty <= 0 {
|
||||||
|
cfg.RelayPenalty = 25
|
||||||
|
}
|
||||||
|
if cfg.DegradedPenalty <= 0 {
|
||||||
|
cfg.DegradedPenalty = 500
|
||||||
|
}
|
||||||
|
if cfg.ProjectedChannelCost <= 0 {
|
||||||
|
cfg.ProjectedChannelCost = 1
|
||||||
|
}
|
||||||
|
if cfg.HardMaxRoutePressure < 0 {
|
||||||
|
cfg.HardMaxRoutePressure = 0
|
||||||
|
}
|
||||||
|
return cfg
|
||||||
|
}
|
||||||
|
|
||||||
|
func flattenFabricRouteSet(routeSet FabricRouteSet) []FabricRoute {
|
||||||
|
routes := make([]FabricRoute, 0, 1+len(routeSet.WarmStandby)+len(routeSet.ColdFallbacks))
|
||||||
|
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
|
||||||
|
routes = append(routes, routeSet.Primary)
|
||||||
|
}
|
||||||
|
routes = append(routes, routeSet.WarmStandby...)
|
||||||
|
routes = append(routes, routeSet.ColdFallbacks...)
|
||||||
|
return routes
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteUsable(spec FabricChannelSpec, route FabricRoute, forbidden map[string]struct{}, now time.Time) bool {
|
||||||
|
if strings.TrimSpace(route.RouteID) == "" || !route.Healthy {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if route.ClusterID != "" && spec.ClusterID != "" && route.ClusterID != spec.ClusterID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if route.SourceNodeID != "" && route.SourceNodeID != spec.SourceNodeID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
switch spec.TargetKind {
|
||||||
|
case FabricChannelTargetNode:
|
||||||
|
if route.DestinationNodeID != "" && route.DestinationNodeID != spec.TargetID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
case FabricChannelTargetPool:
|
||||||
|
if route.PoolID != "" && route.PoolID != spec.TargetID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, hop := range route.Hops {
|
||||||
|
if _, blocked := forbidden[hop.NodeID]; blocked {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRoutePressurePercent(route FabricRoute, projected int) int {
|
||||||
|
if route.Capacity <= 0 {
|
||||||
|
return 100
|
||||||
|
}
|
||||||
|
active := route.ActiveChannels + projected
|
||||||
|
if active <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
pressure := (active * 100) / route.Capacity
|
||||||
|
if pressure > 100 {
|
||||||
|
return 100
|
||||||
|
}
|
||||||
|
return pressure
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringSet(values []string) map[string]struct{} {
|
||||||
|
out := make(map[string]struct{}, len(values))
|
||||||
|
for _, value := range values {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value != "" {
|
||||||
|
out[value] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
@@ -0,0 +1,244 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFabricRouteSchedulerAvoidsSaturatedShortestRoute(t *testing.T) {
|
||||||
|
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: "channel-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
}
|
||||||
|
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "short-saturated",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-b"}},
|
||||||
|
BaseLatencyMs: 10,
|
||||||
|
Capacity: 10,
|
||||||
|
ActiveChannels: 10,
|
||||||
|
Healthy: true,
|
||||||
|
},
|
||||||
|
WarmStandby: []FabricRoute{{
|
||||||
|
RouteID: "slightly-longer-free",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
|
||||||
|
BaseLatencyMs: 18,
|
||||||
|
Capacity: 100,
|
||||||
|
ActiveChannels: 5,
|
||||||
|
RelayCount: 1,
|
||||||
|
Healthy: true,
|
||||||
|
}},
|
||||||
|
}, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("choose route: %v", err)
|
||||||
|
}
|
||||||
|
if choice.Route.RouteID != "slightly-longer-free" {
|
||||||
|
t.Fatalf("route = %q, want slightly-longer-free score=%d pressure=%d", choice.Route.RouteID, choice.Score, choice.PressureAfter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelSpecFromServiceRequestTargetsPool(t *testing.T) {
|
||||||
|
spec, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
|
||||||
|
ChannelID: "vpn-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
ResourceID: "vpn-1",
|
||||||
|
ServiceClass: FabricServiceClassVPNPackets,
|
||||||
|
Target: FabricServiceChannelTarget{
|
||||||
|
Kind: FabricChannelTargetPool,
|
||||||
|
PoolIDs: []string{"home-ipv4"},
|
||||||
|
ServiceRole: "ipv4-egress",
|
||||||
|
},
|
||||||
|
}, "android-node", time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("service request spec: %v", err)
|
||||||
|
}
|
||||||
|
if spec.SourceNodeID != "android-node" || spec.TargetKind != FabricChannelTargetPool || spec.TargetID != "home-ipv4" || spec.TrafficClass != FabricServiceChannelBulk {
|
||||||
|
t.Fatalf("unexpected spec: %+v", spec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricChannelSpecFromServiceRequestKeepsServiceOutOfEndpointSelection(t *testing.T) {
|
||||||
|
_, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
|
||||||
|
ChannelID: "rdp-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||||
|
Target: FabricServiceChannelTarget{
|
||||||
|
Kind: FabricChannelTargetPool,
|
||||||
|
ServiceRole: "rdp-gateway",
|
||||||
|
},
|
||||||
|
}, "client-node", time.Now())
|
||||||
|
if !errors.Is(err, ErrFabricChannelInvalid) {
|
||||||
|
t.Fatalf("err = %v, want invalid without pool/node target id", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSchedulerPoolSkipsFailedEndpoint(t *testing.T) {
|
||||||
|
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: "channel-pool",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-egress",
|
||||||
|
}
|
||||||
|
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-egress",
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "pool-node-dead",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
PoolID: "pool-egress",
|
||||||
|
Capacity: 100,
|
||||||
|
Healthy: false,
|
||||||
|
},
|
||||||
|
WarmStandby: []FabricRoute{{
|
||||||
|
RouteID: "pool-node-live",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-c",
|
||||||
|
PoolID: "pool-egress",
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-c"}},
|
||||||
|
BaseLatencyMs: 25,
|
||||||
|
Capacity: 100,
|
||||||
|
Healthy: true,
|
||||||
|
}},
|
||||||
|
}, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("choose route: %v", err)
|
||||||
|
}
|
||||||
|
if choice.Route.DestinationNodeID != "node-c" {
|
||||||
|
t.Fatalf("destination = %q, want node-c", choice.Route.DestinationNodeID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSchedulerHonorsForbiddenHops(t *testing.T) {
|
||||||
|
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: "channel-1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
ForbiddenHops: []string{"node-r"},
|
||||||
|
}
|
||||||
|
_, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "blocked",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
|
||||||
|
Capacity: 100,
|
||||||
|
Healthy: true,
|
||||||
|
},
|
||||||
|
}, time.Now())
|
||||||
|
if !errors.Is(err, ErrFabricRouteNotFound) {
|
||||||
|
t.Fatalf("err = %v, want ErrFabricRouteNotFound", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSchedulerRejectsRoutesAboveHardPressureLimit(t *testing.T) {
|
||||||
|
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 80})
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: "channel-pressure",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
}
|
||||||
|
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "too-busy",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Capacity: 10,
|
||||||
|
ActiveChannels: 9,
|
||||||
|
Healthy: true,
|
||||||
|
},
|
||||||
|
WarmStandby: []FabricRoute{{
|
||||||
|
RouteID: "admissible",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Capacity: 10,
|
||||||
|
ActiveChannels: 5,
|
||||||
|
Healthy: true,
|
||||||
|
}},
|
||||||
|
}, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("choose route: %v", err)
|
||||||
|
}
|
||||||
|
if choice.Route.RouteID != "admissible" {
|
||||||
|
t.Fatalf("route = %q, want admissible", choice.Route.RouteID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSchedulerKeepsHighLatencyRouteAsFallbackUntilFastRouteSaturates(t *testing.T) {
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: "channel-latency-aware",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-egress",
|
||||||
|
}
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetPool,
|
||||||
|
TargetID: "pool-egress",
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "lan-fast",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-lan",
|
||||||
|
PoolID: "pool-egress",
|
||||||
|
BaseLatencyMs: 4,
|
||||||
|
Capacity: 100,
|
||||||
|
ActiveChannels: 85,
|
||||||
|
Healthy: true,
|
||||||
|
},
|
||||||
|
WarmStandby: []FabricRoute{{
|
||||||
|
RouteID: "wan-slow",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-wan",
|
||||||
|
PoolID: "pool-egress",
|
||||||
|
BaseLatencyMs: 420,
|
||||||
|
Capacity: 100,
|
||||||
|
ActiveChannels: 0,
|
||||||
|
Healthy: true,
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
|
||||||
|
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 90})
|
||||||
|
choice, err := scheduler.ChooseRoute(spec, routeSet, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("choose route: %v", err)
|
||||||
|
}
|
||||||
|
if choice.Route.RouteID != "lan-fast" {
|
||||||
|
t.Fatalf("route = %q, want fast LAN before hard pressure limit", choice.Route.RouteID)
|
||||||
|
}
|
||||||
|
|
||||||
|
routeSet.Primary.ActiveChannels = 90
|
||||||
|
choice, err = scheduler.ChooseRoute(spec, routeSet, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("choose fallback route: %v", err)
|
||||||
|
}
|
||||||
|
if choice.Route.RouteID != "wan-slow" {
|
||||||
|
t.Fatalf("route = %q, want WAN only after LAN reaches hard pressure limit", choice.Route.RouteID)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricOverlayTransportConfig struct {
|
||||||
|
ClusterID string
|
||||||
|
LocalNodeID string
|
||||||
|
RouterConfig FabricChannelRouterConfig
|
||||||
|
Timeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricOverlayTransport struct {
|
||||||
|
Runtime *FabricChannelRuntime
|
||||||
|
RouteSets map[string]FabricRouteSet
|
||||||
|
Config FabricOverlayTransportConfig
|
||||||
|
sequence atomic.Uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricOverlayTransportSnapshot struct {
|
||||||
|
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
|
||||||
|
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricOverlaySendRequest struct {
|
||||||
|
ChannelID string
|
||||||
|
TargetKind FabricChannelTargetKind
|
||||||
|
TargetID string
|
||||||
|
TrafficClass fabricproto.TrafficClass
|
||||||
|
Payloads [][]byte
|
||||||
|
StickyKey string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricOverlayTransport(transport FabricTransport, routeSets map[string]FabricRouteSet, cfg FabricOverlayTransportConfig) *FabricOverlayTransport {
|
||||||
|
if cfg.Timeout <= 0 {
|
||||||
|
cfg.Timeout = 30 * time.Second
|
||||||
|
}
|
||||||
|
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||||
|
RouterConfig: cfg.RouterConfig,
|
||||||
|
Timeout: cfg.Timeout,
|
||||||
|
})
|
||||||
|
normalized := make(map[string]FabricRouteSet, len(routeSets))
|
||||||
|
for targetID, routeSet := range routeSets {
|
||||||
|
targetID = strings.TrimSpace(targetID)
|
||||||
|
if targetID != "" {
|
||||||
|
normalized[targetID] = routeSet
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &FabricOverlayTransport{
|
||||||
|
Runtime: runtime,
|
||||||
|
RouteSets: normalized,
|
||||||
|
Config: cfg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricOverlayTransport) Send(ctx context.Context, req FabricOverlaySendRequest) (FabricChannelRuntimeResult, error) {
|
||||||
|
if t == nil || t.Runtime == nil {
|
||||||
|
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
|
||||||
|
}
|
||||||
|
targetID := strings.TrimSpace(req.TargetID)
|
||||||
|
if targetID == "" {
|
||||||
|
return FabricChannelRuntimeResult{}, ErrFabricChannelInvalid
|
||||||
|
}
|
||||||
|
routeSet, ok := t.RouteSets[targetID]
|
||||||
|
if !ok {
|
||||||
|
return FabricChannelRuntimeResult{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
targetKind := req.TargetKind
|
||||||
|
if targetKind == "" {
|
||||||
|
targetKind = routeSet.TargetKind
|
||||||
|
}
|
||||||
|
if targetKind == "" {
|
||||||
|
targetKind = FabricChannelTargetNode
|
||||||
|
}
|
||||||
|
trafficClass := req.TrafficClass
|
||||||
|
if trafficClass == 0 {
|
||||||
|
trafficClass = fabricproto.TrafficClassReliable
|
||||||
|
}
|
||||||
|
t.Runtime.Config.TrafficClass = trafficClass
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), fmt.Sprintf("fabric-overlay-%d", t.sequence.Add(1))),
|
||||||
|
ClusterID: strings.TrimSpace(t.Config.ClusterID),
|
||||||
|
SourceNodeID: strings.TrimSpace(t.Config.LocalNodeID),
|
||||||
|
TargetKind: targetKind,
|
||||||
|
TargetID: targetID,
|
||||||
|
TrafficClass: loadFabricTrafficClassName(trafficClass),
|
||||||
|
StickyKey: strings.TrimSpace(req.StickyKey),
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
return t.Runtime.SendReliable(ctx, spec, routeSet, req.Payloads)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricOverlayTransport) SnapshotPressure() FabricRoutePressureSnapshot {
|
||||||
|
if t == nil || t.Runtime == nil || t.Runtime.Pressure == nil {
|
||||||
|
return FabricRoutePressureSnapshot{}
|
||||||
|
}
|
||||||
|
return t.Runtime.Pressure.SnapshotPressure()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricOverlayTransport) Snapshot() FabricOverlayTransportSnapshot {
|
||||||
|
if t == nil || t.Runtime == nil {
|
||||||
|
return FabricOverlayTransportSnapshot{}
|
||||||
|
}
|
||||||
|
return FabricOverlayTransportSnapshot{
|
||||||
|
RoutePressure: t.Runtime.snapshotRoutePressure(),
|
||||||
|
RouteHealth: t.Runtime.snapshotRouteHealth(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadFabricTrafficClassName(trafficClass fabricproto.TrafficClass) string {
|
||||||
|
switch trafficClass {
|
||||||
|
case fabricproto.TrafficClassControl:
|
||||||
|
return "control"
|
||||||
|
case fabricproto.TrafficClassInteractive:
|
||||||
|
return "interactive"
|
||||||
|
case fabricproto.TrafficClassBulk:
|
||||||
|
return "bulk"
|
||||||
|
case fabricproto.TrafficClassReliable:
|
||||||
|
return "reliable"
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("traffic_class_%d", trafficClass)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFabricOverlayTransportSendsThroughRouteSet(t *testing.T) {
|
||||||
|
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||||
|
"quic://node-b:19443": 0,
|
||||||
|
})
|
||||||
|
overlay := NewFabricOverlayTransport(transport, map[string]FabricRouteSet{
|
||||||
|
"node-b": {
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: FabricRoute{
|
||||||
|
RouteID: "node-b-direct",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
Hops: []FabricRouteHop{{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-direct", Address: "quic://node-b:19443"}},
|
||||||
|
Capacity: 100,
|
||||||
|
Healthy: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, FabricOverlayTransportConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||||
|
defer cancel()
|
||||||
|
result, err := overlay.Send(ctx, FabricOverlaySendRequest{
|
||||||
|
TargetID: "node-b",
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
Payloads: [][]byte{[]byte("payload")},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send: %v", err)
|
||||||
|
}
|
||||||
|
if result.BytesSent != uint64(len("payload")) || result.AcksReceived != 1 {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
if pressure := overlay.SnapshotPressure(); pressure.ActiveTotal != 0 || pressure.AcquiredTotal != pressure.ReleasedTotal {
|
||||||
|
t.Fatalf("pressure leak: %+v", pressure)
|
||||||
|
}
|
||||||
|
if snapshot := overlay.Snapshot(); snapshot.RoutePressure.AcquiredTotal != 1 || len(snapshot.RouteHealth.Quarantined) != 0 {
|
||||||
|
t.Fatalf("snapshot = %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,9 +3,12 @@ package mesh
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
"github.com/quic-go/quic-go"
|
"github.com/quic-go/quic-go"
|
||||||
@@ -14,6 +17,13 @@ import (
|
|||||||
type QUICFabricServer struct {
|
type QUICFabricServer struct {
|
||||||
listener *quic.Listener
|
listener *quic.Listener
|
||||||
logger FabricSessionEventLogger
|
logger FabricSessionEventLogger
|
||||||
|
reverseMu sync.RWMutex
|
||||||
|
reverseTransport *QUICFabricTransport
|
||||||
|
fabricFrameHandler FabricFrameHandler
|
||||||
|
productionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||||
|
webIngressForwardHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
fabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
syntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||||
done chan struct{}
|
done chan struct{}
|
||||||
closeOnce sync.Once
|
closeOnce sync.Once
|
||||||
}
|
}
|
||||||
@@ -23,8 +33,20 @@ type QUICFabricServerConfig struct {
|
|||||||
TLSConfig *tls.Config
|
TLSConfig *tls.Config
|
||||||
QUICConfig *quic.Config
|
QUICConfig *quic.Config
|
||||||
Logger FabricSessionEventLogger
|
Logger FabricSessionEventLogger
|
||||||
|
ReverseTransport *QUICFabricTransport
|
||||||
|
FabricFrameHandler FabricFrameHandler
|
||||||
|
ProductionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||||
|
WebIngressForwardHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
FabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
SyntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FabricFrameSender interface {
|
||||||
|
SendFrame(context.Context, fabricproto.Frame) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricFrameHandler func(context.Context, FabricFrameSender, fabricproto.Frame) (bool, error)
|
||||||
|
|
||||||
func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QUICFabricServer, error) {
|
func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QUICFabricServer, error) {
|
||||||
if cfg.ListenAddr == "" {
|
if cfg.ListenAddr == "" {
|
||||||
return nil, fmt.Errorf("quic fabric listen addr is required")
|
return nil, fmt.Errorf("quic fabric listen addr is required")
|
||||||
@@ -44,6 +66,12 @@ func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QU
|
|||||||
server := &QUICFabricServer{
|
server := &QUICFabricServer{
|
||||||
listener: listener,
|
listener: listener,
|
||||||
logger: cfg.Logger,
|
logger: cfg.Logger,
|
||||||
|
reverseTransport: cfg.ReverseTransport,
|
||||||
|
fabricFrameHandler: cfg.FabricFrameHandler,
|
||||||
|
productionForwardHandler: cfg.ProductionForwardHandler,
|
||||||
|
webIngressForwardHandler: cfg.WebIngressForwardHandler,
|
||||||
|
fabricControlHandler: cfg.FabricControlHandler,
|
||||||
|
syntheticForwardHandler: cfg.SyntheticForwardHandler,
|
||||||
done: make(chan struct{}),
|
done: make(chan struct{}),
|
||||||
}
|
}
|
||||||
go server.acceptLoop(ctx)
|
go server.acceptLoop(ctx)
|
||||||
@@ -57,6 +85,15 @@ func (s *QUICFabricServer) Addr() net.Addr {
|
|||||||
return s.listener.Addr()
|
return s.listener.Addr()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) SetReverseTransport(transport *QUICFabricTransport) {
|
||||||
|
if s == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.reverseMu.Lock()
|
||||||
|
s.reverseTransport = transport
|
||||||
|
s.reverseMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *QUICFabricServer) Close() error {
|
func (s *QUICFabricServer) Close() error {
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -95,6 +132,8 @@ func (s *QUICFabricServer) handleConn(ctx context.Context, conn *quic.Conn) {
|
|||||||
|
|
||||||
func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
||||||
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
||||||
|
sender := quicStreamFrameSender{stream: stream}
|
||||||
|
defer func() { _ = stream.Close() }()
|
||||||
s.logFabricSession(FabricSessionEventLogEntry{
|
s.logFabricSession(FabricSessionEventLogEntry{
|
||||||
Event: "fabric_session_quic_stream_opened",
|
Event: "fabric_session_quic_stream_opened",
|
||||||
AcceptedBy: "quic",
|
AcceptedBy: "quic",
|
||||||
@@ -116,6 +155,29 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
s.registerReverseHelloFrame(conn, frame)
|
||||||
|
if s.handleProductionForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if s.handleWebIngressForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if s.handleFabricControlForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if s.handleSyntheticForwardFrame(ctx, conn, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if s.fabricFrameHandler != nil {
|
||||||
|
handled, err := s.fabricFrameHandler(ctx, sender, frame)
|
||||||
|
if err != nil {
|
||||||
|
_ = conn.CloseWithError(2, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if handled {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
event, responses, err := session.HandleFrame(frame)
|
event, responses, err := session.HandleFrame(frame)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
_ = conn.CloseWithError(2, err.Error())
|
_ = conn.CloseWithError(2, err.Error())
|
||||||
@@ -140,6 +202,196 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type quicStreamFrameSender struct {
|
||||||
|
stream *quic.Stream
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s quicStreamFrameSender) SendFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||||
|
if s.stream == nil {
|
||||||
|
return fmt.Errorf("quic fabric stream is closed")
|
||||||
|
}
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
if deadline, ok := ctx.Deadline(); ok {
|
||||||
|
_ = s.stream.SetWriteDeadline(deadline)
|
||||||
|
} else {
|
||||||
|
_ = s.stream.SetWriteDeadline(time.Now().Add(30 * time.Second))
|
||||||
|
}
|
||||||
|
return fabricproto.WriteFrame(s.stream, frame)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
|
||||||
|
reverseTransport := s.getReverseTransport()
|
||||||
|
if s == nil || reverseTransport == nil || conn == nil || frame.Type != fabricproto.FramePing {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
payload := string(frame.Payload)
|
||||||
|
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
|
||||||
|
reverseTransport.RegisterReverseConn(peerID, conn)
|
||||||
|
s.logFabricSession(FabricSessionEventLogEntry{
|
||||||
|
Event: "fabric_session_quic_reverse_registered",
|
||||||
|
AcceptedBy: "quic_reverse_hello",
|
||||||
|
RemoteAddr: conn.RemoteAddr().String(),
|
||||||
|
PeerID: peerID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type quicProductionForwardResponse struct {
|
||||||
|
Result ProductionForwardResult `json:"result,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type quicSyntheticForwardResponse struct {
|
||||||
|
Envelope SyntheticEnvelope `json:"envelope,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type quicWebIngressForwardResponse struct {
|
||||||
|
Payload json.RawMessage `json:"payload,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type quicFabricControlForwardResponse struct {
|
||||||
|
Payload json.RawMessage `json:"payload,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) handleProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicProductionForwardResponse{}
|
||||||
|
if s == nil || s.productionForwardHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else {
|
||||||
|
var envelope ProductionEnvelope
|
||||||
|
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||||
|
response.Error = "invalid production mesh envelope"
|
||||||
|
} else if result, err := s.productionForwardHandler(ctx, envelope); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Result = result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: ProductionForwardQUICStreamID,
|
||||||
|
Sequence: frame.Sequence,
|
||||||
|
Payload: payload,
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) handleWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicWebIngressForwardResponse{}
|
||||||
|
if s == nil || s.webIngressForwardHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else if payload, err := s.webIngressForwardHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Payload = append(json.RawMessage(nil), payload...)
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: WebIngressForwardQUICStreamID,
|
||||||
|
Sequence: frame.Sequence,
|
||||||
|
Payload: payload,
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) handleFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicFabricControlForwardResponse{}
|
||||||
|
if s == nil || s.fabricControlHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else if payload, err := s.fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Payload = append(json.RawMessage(nil), payload...)
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: FabricControlForwardQUICStreamID,
|
||||||
|
Sequence: frame.Sequence,
|
||||||
|
Payload: payload,
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) handleSyntheticForwardFrame(ctx context.Context, conn *quic.Conn, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicSyntheticForwardResponse{}
|
||||||
|
if s == nil || s.syntheticForwardHandler == nil {
|
||||||
|
response.Error = ErrMeshRuntimeDisabled.Error()
|
||||||
|
} else {
|
||||||
|
var envelope SyntheticEnvelope
|
||||||
|
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||||
|
response.Error = "invalid synthetic mesh envelope"
|
||||||
|
} else if ack, err := s.syntheticForwardHandler(ctx, envelope); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
s.registerReversePeerConn(envelope.From.NodeID, conn)
|
||||||
|
response.Envelope = ack
|
||||||
|
}
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: SyntheticForwardQUICStreamID,
|
||||||
|
Sequence: frame.Sequence,
|
||||||
|
Payload: payload,
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) registerReversePeerConn(peerID string, conn *quic.Conn) {
|
||||||
|
reverseTransport := s.getReverseTransport()
|
||||||
|
if s == nil || reverseTransport == nil || conn == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
reverseTransport.RegisterReverseConn(peerID, conn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *QUICFabricServer) getReverseTransport() *QUICFabricTransport {
|
||||||
|
if s == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
s.reverseMu.RLock()
|
||||||
|
defer s.reverseMu.RUnlock()
|
||||||
|
return s.reverseTransport
|
||||||
|
}
|
||||||
|
|
||||||
func (s *QUICFabricServer) logFabricSession(entry FabricSessionEventLogEntry) {
|
func (s *QUICFabricServer) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||||
if s != nil && s.logger != nil {
|
if s != nil && s.logger != nil {
|
||||||
s.logger(entry)
|
s.logger(entry)
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ import (
|
|||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -17,6 +19,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
||||||
|
const fabricQUICReverseHelloPrefix = "rap-fabric-reverse-hello-v1:"
|
||||||
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
|
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
|
||||||
const defaultQUICFabricMaxStreamsPerConn = 64
|
const defaultQUICFabricMaxStreamsPerConn = 64
|
||||||
const ErrQUICFabricStreamLimitReached = quicFabricError("quic fabric stream limit reached")
|
const ErrQUICFabricStreamLimitReached = quicFabricError("quic fabric stream limit reached")
|
||||||
@@ -29,16 +32,28 @@ func (e quicFabricError) Error() string {
|
|||||||
|
|
||||||
type QUICFabricTransport struct {
|
type QUICFabricTransport struct {
|
||||||
Config *quic.Config
|
Config *quic.Config
|
||||||
|
LocalPeerID string
|
||||||
IdleTTL time.Duration
|
IdleTTL time.Duration
|
||||||
MaxStreamsPerConn int
|
MaxStreamsPerConn int
|
||||||
|
DialAddr func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error)
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
conns map[string]*quicFabricConnEntry
|
conns map[string]*quicFabricConnEntry
|
||||||
|
reverseConns map[string]*quicFabricConnEntry
|
||||||
|
inboundProductionHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||||
|
inboundWebIngressHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
inboundFabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||||
|
inboundSyntheticHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||||
|
logger FabricSessionEventLogger
|
||||||
stats QUICFabricTransportStats
|
stats QUICFabricTransportStats
|
||||||
}
|
}
|
||||||
|
|
||||||
type QUICFabricTransportStats struct {
|
type QUICFabricTransportStats struct {
|
||||||
Opens uint64 `json:"opens"`
|
Opens uint64 `json:"opens"`
|
||||||
Reuses uint64 `json:"reuses"`
|
Reuses uint64 `json:"reuses"`
|
||||||
|
ReverseHelloSent uint64 `json:"reverse_hello_sent"`
|
||||||
|
ReverseHelloFailed uint64 `json:"reverse_hello_failed"`
|
||||||
|
ReverseRegisters uint64 `json:"reverse_registers"`
|
||||||
|
ReverseReuses uint64 `json:"reverse_reuses"`
|
||||||
OpenFailures uint64 `json:"open_failures"`
|
OpenFailures uint64 `json:"open_failures"`
|
||||||
ClosedEvicted uint64 `json:"closed_evicted"`
|
ClosedEvicted uint64 `json:"closed_evicted"`
|
||||||
CloseAllCalls uint64 `json:"close_all_calls"`
|
CloseAllCalls uint64 `json:"close_all_calls"`
|
||||||
@@ -50,6 +65,7 @@ type QUICFabricTransportStats struct {
|
|||||||
|
|
||||||
type QUICFabricTransportSnapshot struct {
|
type QUICFabricTransportSnapshot struct {
|
||||||
SchemaVersion string `json:"schema_version"`
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
LocalPeerID string `json:"local_peer_id,omitempty"`
|
||||||
ActiveCount int `json:"active_count"`
|
ActiveCount int `json:"active_count"`
|
||||||
ActiveStreams int `json:"active_streams"`
|
ActiveStreams int `json:"active_streams"`
|
||||||
MaxStreamsPerConn int `json:"max_streams_per_conn"`
|
MaxStreamsPerConn int `json:"max_streams_per_conn"`
|
||||||
@@ -63,6 +79,7 @@ type QUICFabricConnSnapshot struct {
|
|||||||
PeerID string `json:"peer_id,omitempty"`
|
PeerID string `json:"peer_id,omitempty"`
|
||||||
Endpoint string `json:"endpoint,omitempty"`
|
Endpoint string `json:"endpoint,omitempty"`
|
||||||
CertSHA256 string `json:"cert_sha256,omitempty"`
|
CertSHA256 string `json:"cert_sha256,omitempty"`
|
||||||
|
Direction string `json:"direction,omitempty"`
|
||||||
ActiveStreams int `json:"active_streams"`
|
ActiveStreams int `json:"active_streams"`
|
||||||
MaxStreams int `json:"max_streams"`
|
MaxStreams int `json:"max_streams"`
|
||||||
CapacityPressurePercent int `json:"capacity_pressure_percent"`
|
CapacityPressurePercent int `json:"capacity_pressure_percent"`
|
||||||
@@ -92,7 +109,41 @@ type quicFabricConnEntry struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
||||||
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}}
|
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}, reverseConns: map[string]*quicFabricConnEntry{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) SetInboundHandlers(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
|
||||||
|
t.SetInboundHandlersWithWebIngress(production, nil, synthetic, logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) SetInboundHandlersWithWebIngress(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), webIngress func(context.Context, []byte) ([]byte, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
|
||||||
|
if t == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
t.inboundProductionHandler = production
|
||||||
|
t.inboundWebIngressHandler = webIngress
|
||||||
|
t.inboundSyntheticHandler = synthetic
|
||||||
|
t.logger = logger
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) SetInboundFabricControlHandler(handler func(context.Context, []byte) ([]byte, error)) {
|
||||||
|
if t == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
t.inboundFabricControlHandler = handler
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) SetLocalPeerID(peerID string) {
|
||||||
|
if t == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
t.LocalPeerID = strings.TrimSpace(peerID)
|
||||||
|
t.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
||||||
@@ -186,9 +237,12 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
|||||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, nil)
|
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, nil)
|
||||||
return conn, "", true, err
|
return conn, "", true, err
|
||||||
}
|
}
|
||||||
|
if conn, key, ok := t.reverseConnForTarget(target); ok {
|
||||||
|
return conn, key, false, nil
|
||||||
|
}
|
||||||
key := quicFabricConnKey(target)
|
key := quicFabricConnKey(target)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
|
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
|
||||||
return conn, "", true, err
|
return conn, "", true, err
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
@@ -207,7 +261,7 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
|||||||
}
|
}
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
|
|
||||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
|
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
t.stats.OpenFailures++
|
t.stats.OpenFailures++
|
||||||
@@ -235,16 +289,339 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
|||||||
t.conns[key] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
t.conns[key] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
||||||
t.stats.Opens++
|
t.stats.Opens++
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
|
go t.acceptInboundStreams(context.Background(), conn)
|
||||||
|
go t.sendReverseHello(context.Background(), conn)
|
||||||
return conn, key, false, nil
|
return conn, key, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) dialAddr(ctx context.Context, endpoint string, tlsConfig *tls.Config) (*quic.Conn, error) {
|
||||||
|
if t != nil && t.DialAddr != nil {
|
||||||
|
return t.DialAddr(ctx, endpoint, tlsConfig, t.Config)
|
||||||
|
}
|
||||||
|
return quic.DialAddr(ctx, endpoint, tlsConfig, t.Config)
|
||||||
|
}
|
||||||
|
|
||||||
|
func DialQUICAddrWithPacketConn(ctx context.Context, endpoint string, packetConn net.PacketConn, tlsConfig *tls.Config, config *quic.Config) (*quic.Conn, error) {
|
||||||
|
if packetConn == nil {
|
||||||
|
return nil, fmt.Errorf("quic packet connection is required")
|
||||||
|
}
|
||||||
|
addr, err := net.ResolveUDPAddr("udp", strings.TrimPrefix(strings.TrimSpace(endpoint), "quic://"))
|
||||||
|
if err != nil {
|
||||||
|
_ = packetConn.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
transport := &quic.Transport{Conn: packetConn}
|
||||||
|
conn, err := transport.Dial(ctx, addr, tlsConfig, config)
|
||||||
|
if err != nil {
|
||||||
|
_ = transport.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
<-conn.Context().Done()
|
||||||
|
_ = transport.Close()
|
||||||
|
}()
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) sendReverseHello(ctx context.Context, conn *quic.Conn) {
|
||||||
|
if t == nil || conn == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
localPeerID := t.localPeerID()
|
||||||
|
if localPeerID == "" {
|
||||||
|
t.mu.Lock()
|
||||||
|
t.stats.ReverseHelloFailed++
|
||||||
|
t.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
helloCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
stream, err := conn.OpenStreamSync(helloCtx)
|
||||||
|
if err != nil {
|
||||||
|
t.mu.Lock()
|
||||||
|
t.stats.ReverseHelloFailed++
|
||||||
|
t.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer func() { _ = stream.Close() }()
|
||||||
|
if err := fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FramePing,
|
||||||
|
Sequence: 1,
|
||||||
|
Payload: []byte(fabricQUICReverseHelloPrefix + localPeerID),
|
||||||
|
}); err != nil {
|
||||||
|
t.mu.Lock()
|
||||||
|
t.stats.ReverseHelloFailed++
|
||||||
|
t.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
t.stats.ReverseHelloSent++
|
||||||
|
t.mu.Unlock()
|
||||||
|
_, _ = fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) acceptInboundStreams(ctx context.Context, conn *quic.Conn) {
|
||||||
|
if t == nil || conn == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
stream, err := conn.AcceptStream(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
go t.handleInboundStream(ctx, conn, stream)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) handleInboundStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
||||||
|
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
||||||
|
defer func() { _ = stream.Close() }()
|
||||||
|
t.logFabricSession(FabricSessionEventLogEntry{
|
||||||
|
Event: "fabric_session_quic_reverse_stream_opened",
|
||||||
|
AcceptedBy: "quic_reverse",
|
||||||
|
RemoteAddr: conn.RemoteAddr().String(),
|
||||||
|
})
|
||||||
|
defer t.logFabricSession(FabricSessionEventLogEntry{
|
||||||
|
Event: "fabric_session_quic_reverse_stream_closed",
|
||||||
|
AcceptedBy: "quic_reverse",
|
||||||
|
RemoteAddr: conn.RemoteAddr().String(),
|
||||||
|
})
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
_ = stream.Close()
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
frame, err := fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.registerReverseHelloFrame(conn, frame)
|
||||||
|
if t.handleInboundProductionForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.handleInboundWebIngressForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.handleInboundFabricControlForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.handleInboundSyntheticForwardFrame(ctx, stream, frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
event, responses, err := session.HandleFrame(frame)
|
||||||
|
if err != nil {
|
||||||
|
_ = stream.Close()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if event.Type != fabricproto.SessionEventNone {
|
||||||
|
t.logFabricSession(FabricSessionEventLogEntry{
|
||||||
|
Event: "fabric_session_reverse_event",
|
||||||
|
SessionEvent: event.Type,
|
||||||
|
StreamID: event.StreamID,
|
||||||
|
Sequence: event.Sequence,
|
||||||
|
TrafficClass: event.TrafficClass,
|
||||||
|
AcceptedBy: "quic_reverse",
|
||||||
|
RemoteAddr: conn.RemoteAddr().String(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
for _, response := range responses {
|
||||||
|
if err := fabricproto.WriteFrame(stream, response); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
|
||||||
|
if t == nil || conn == nil || frame.Type != fabricproto.FramePing {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
payload := string(frame.Payload)
|
||||||
|
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
|
||||||
|
t.RegisterReverseConn(peerID, conn)
|
||||||
|
t.logFabricSession(FabricSessionEventLogEntry{
|
||||||
|
Event: "fabric_session_quic_reverse_registered",
|
||||||
|
AcceptedBy: "quic_reverse_hello",
|
||||||
|
RemoteAddr: conn.RemoteAddr().String(),
|
||||||
|
PeerID: peerID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) handleInboundProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicProductionForwardResponse{}
|
||||||
|
productionHandler, _, _, _, _ := t.inboundHandlers()
|
||||||
|
if productionHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else {
|
||||||
|
var envelope ProductionEnvelope
|
||||||
|
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||||
|
response.Error = "invalid production mesh envelope"
|
||||||
|
} else if result, err := productionHandler(ctx, envelope); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Result = result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err == nil {
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) handleInboundWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicWebIngressForwardResponse{}
|
||||||
|
_, webIngressHandler, _, _, _ := t.inboundHandlers()
|
||||||
|
if webIngressHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else if payload, err := webIngressHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Payload = append(json.RawMessage(nil), payload...)
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err == nil {
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: WebIngressForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) handleInboundFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicFabricControlForwardResponse{}
|
||||||
|
_, _, fabricControlHandler, _, _ := t.inboundHandlers()
|
||||||
|
if fabricControlHandler == nil {
|
||||||
|
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||||
|
} else if payload, err := fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Payload = append(json.RawMessage(nil), payload...)
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err == nil {
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: FabricControlForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) handleInboundSyntheticForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
response := quicSyntheticForwardResponse{}
|
||||||
|
_, _, _, syntheticHandler, _ := t.inboundHandlers()
|
||||||
|
if syntheticHandler == nil {
|
||||||
|
response.Error = ErrMeshRuntimeDisabled.Error()
|
||||||
|
} else {
|
||||||
|
var envelope SyntheticEnvelope
|
||||||
|
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||||
|
response.Error = "invalid synthetic mesh envelope"
|
||||||
|
} else if ack, err := syntheticHandler(ctx, envelope); err != nil {
|
||||||
|
response.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
response.Envelope = ack
|
||||||
|
}
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(response)
|
||||||
|
if err == nil {
|
||||||
|
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: SyntheticForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) inboundHandlers() (func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), func(context.Context, []byte) ([]byte, error), func(context.Context, []byte) ([]byte, error), func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), FabricSessionEventLogger) {
|
||||||
|
if t == nil {
|
||||||
|
return nil, nil, nil, nil, nil
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
return t.inboundProductionHandler, t.inboundWebIngressHandler, t.inboundFabricControlHandler, t.inboundSyntheticHandler, t.logger
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) localPeerID() string {
|
||||||
|
if t == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
return strings.TrimSpace(t.LocalPeerID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||||
|
_, _, _, _, logger := t.inboundHandlers()
|
||||||
|
if logger != nil {
|
||||||
|
logger(entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) RegisterReverseConn(peerID string, conn *quic.Conn) {
|
||||||
|
if t == nil || conn == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
peerID = strings.TrimSpace(peerID)
|
||||||
|
if peerID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
if t.reverseConns == nil {
|
||||||
|
t.reverseConns = map[string]*quicFabricConnEntry{}
|
||||||
|
}
|
||||||
|
if existing := t.reverseConns[peerID]; existing != nil && existing.conn != nil && existing.conn != conn {
|
||||||
|
select {
|
||||||
|
case <-existing.conn.Context().Done():
|
||||||
|
default:
|
||||||
|
_ = existing.conn.CloseWithError(0, "reverse connection replaced")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.reverseConns[peerID] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
||||||
|
t.stats.ReverseRegisters++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) reverseConnForTarget(target FabricTransportTarget) (*quic.Conn, string, bool) {
|
||||||
|
peerID := strings.TrimSpace(target.PeerID)
|
||||||
|
if t == nil || peerID == "" || !fabricTransportPrefersReverseConn(target.Transport) {
|
||||||
|
return nil, "", false
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
t.pruneIdleLocked(time.Now())
|
||||||
|
entry := t.reverseConns[peerID]
|
||||||
|
if entry == nil || entry.conn == nil {
|
||||||
|
return nil, "", false
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-entry.conn.Context().Done():
|
||||||
|
delete(t.reverseConns, peerID)
|
||||||
|
t.stats.ClosedEvicted++
|
||||||
|
return nil, "", false
|
||||||
|
default:
|
||||||
|
entry.lastUsed = time.Now()
|
||||||
|
t.stats.ReverseReuses++
|
||||||
|
return entry.conn, quicFabricReverseConnKey(peerID), true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (t *QUICFabricTransport) reserveStream(key string, conn *quic.Conn) error {
|
func (t *QUICFabricTransport) reserveStream(key string, conn *quic.Conn) error {
|
||||||
if t == nil || key == "" {
|
if t == nil || key == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
entry := t.conns[key]
|
entry := t.connEntryLocked(key)
|
||||||
if entry == nil || entry.conn != conn {
|
if entry == nil || entry.conn != conn {
|
||||||
return fmt.Errorf("quic fabric connection is not cached")
|
return fmt.Errorf("quic fabric connection is not cached")
|
||||||
}
|
}
|
||||||
@@ -267,16 +644,26 @@ func (t *QUICFabricTransport) releaseStream(key string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
if entry := t.conns[key]; entry != nil {
|
if entry := t.connEntryLocked(key); entry != nil {
|
||||||
if entry.activeStreams > 0 {
|
if entry.activeStreams > 0 {
|
||||||
entry.activeStreams--
|
entry.activeStreams--
|
||||||
}
|
}
|
||||||
entry.lastUsed = time.Now()
|
entry.lastUsed = time.Now()
|
||||||
t.stats.StreamCloses++
|
|
||||||
}
|
}
|
||||||
|
t.stats.StreamCloses++
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *QUICFabricTransport) connEntryLocked(key string) *quicFabricConnEntry {
|
||||||
|
if t == nil || key == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(key, "reverse\x00") {
|
||||||
|
return t.reverseConns[strings.TrimPrefix(key, "reverse\x00")]
|
||||||
|
}
|
||||||
|
return t.conns[key]
|
||||||
|
}
|
||||||
|
|
||||||
func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic.Conn) {
|
func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic.Conn) {
|
||||||
if t == nil || conn == nil {
|
if t == nil || conn == nil {
|
||||||
return
|
return
|
||||||
@@ -315,6 +702,20 @@ func (t *QUICFabricTransport) pruneIdleLocked(now time.Time) {
|
|||||||
t.stats.IdleEvicted++
|
t.stats.IdleEvicted++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for peerID, entry := range t.reverseConns {
|
||||||
|
if entry == nil || entry.conn == nil {
|
||||||
|
delete(t.reverseConns, peerID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !entry.lastUsed.IsZero() && now.Sub(entry.lastUsed) > ttl {
|
||||||
|
if entry.activeStreams > 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = entry.conn.CloseWithError(0, "idle reverse")
|
||||||
|
delete(t.reverseConns, peerID)
|
||||||
|
t.stats.IdleEvicted++
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func quicFabricConnKey(target FabricTransportTarget) string {
|
func quicFabricConnKey(target FabricTransportTarget) string {
|
||||||
@@ -340,6 +741,23 @@ func parseQUICFabricConnKey(key string) (peerID string, endpoint string, certSHA
|
|||||||
return peerID, endpoint, certSHA256
|
return peerID, endpoint, certSHA256
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func quicFabricReverseConnKey(peerID string) string {
|
||||||
|
peerID = strings.TrimSpace(peerID)
|
||||||
|
if peerID == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return "reverse\x00" + peerID
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricTransportPrefersReverseConn(transport string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(transport)) {
|
||||||
|
case "reverse_quic", "relay_quic":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (t *QUICFabricTransport) Close() error {
|
func (t *QUICFabricTransport) Close() error {
|
||||||
if t == nil {
|
if t == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -348,12 +766,19 @@ func (t *QUICFabricTransport) Close() error {
|
|||||||
t.stats.CloseAllCalls++
|
t.stats.CloseAllCalls++
|
||||||
conns := t.conns
|
conns := t.conns
|
||||||
t.conns = map[string]*quicFabricConnEntry{}
|
t.conns = map[string]*quicFabricConnEntry{}
|
||||||
|
reverseConns := t.reverseConns
|
||||||
|
t.reverseConns = map[string]*quicFabricConnEntry{}
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
for _, entry := range conns {
|
for _, entry := range conns {
|
||||||
if entry != nil && entry.conn != nil {
|
if entry != nil && entry.conn != nil {
|
||||||
_ = entry.conn.CloseWithError(0, "closed")
|
_ = entry.conn.CloseWithError(0, "closed")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for _, entry := range reverseConns {
|
||||||
|
if entry != nil && entry.conn != nil {
|
||||||
|
_ = entry.conn.CloseWithError(0, "closed")
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -370,6 +795,7 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
|
|||||||
}
|
}
|
||||||
snapshot := QUICFabricTransportSnapshot{
|
snapshot := QUICFabricTransportSnapshot{
|
||||||
SchemaVersion: "rap.quic_fabric_transport.v1",
|
SchemaVersion: "rap.quic_fabric_transport.v1",
|
||||||
|
LocalPeerID: strings.TrimSpace(t.LocalPeerID),
|
||||||
MaxStreamsPerConn: limit,
|
MaxStreamsPerConn: limit,
|
||||||
Stats: t.stats,
|
Stats: t.stats,
|
||||||
}
|
}
|
||||||
@@ -391,6 +817,40 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
|
|||||||
PeerID: peerID,
|
PeerID: peerID,
|
||||||
Endpoint: endpoint,
|
Endpoint: endpoint,
|
||||||
CertSHA256: certSHA256,
|
CertSHA256: certSHA256,
|
||||||
|
Direction: "outbound",
|
||||||
|
ActiveStreams: entry.activeStreams,
|
||||||
|
MaxStreams: limit,
|
||||||
|
Saturated: entry.activeStreams >= limit,
|
||||||
|
}
|
||||||
|
if !entry.lastUsed.IsZero() {
|
||||||
|
connSnapshot.LastUsedUnixSec = entry.lastUsed.UTC().Unix()
|
||||||
|
}
|
||||||
|
if limit > 0 {
|
||||||
|
connSnapshot.CapacityPressurePercent = (entry.activeStreams * 100) / limit
|
||||||
|
}
|
||||||
|
snapshot.Connections = append(snapshot.Connections, connSnapshot)
|
||||||
|
if entry.activeStreams >= limit {
|
||||||
|
snapshot.SaturatedConnections++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for peerID, entry := range t.reverseConns {
|
||||||
|
if entry == nil || entry.conn == nil {
|
||||||
|
delete(t.reverseConns, peerID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-entry.conn.Context().Done():
|
||||||
|
delete(t.reverseConns, peerID)
|
||||||
|
t.stats.ClosedEvicted++
|
||||||
|
snapshot.Stats.ClosedEvicted++
|
||||||
|
default:
|
||||||
|
snapshot.ActiveCount++
|
||||||
|
snapshot.ActiveStreams += entry.activeStreams
|
||||||
|
connSnapshot := QUICFabricConnSnapshot{
|
||||||
|
PeerID: peerID,
|
||||||
|
Endpoint: entry.conn.RemoteAddr().String(),
|
||||||
|
Direction: "reverse",
|
||||||
ActiveStreams: entry.activeStreams,
|
ActiveStreams: entry.activeStreams,
|
||||||
MaxStreams: limit,
|
MaxStreams: limit,
|
||||||
Saturated: entry.activeStreams >= limit,
|
Saturated: entry.activeStreams >= limit,
|
||||||
@@ -462,6 +922,7 @@ func (s *quicFabricSession) Close() error {
|
|||||||
s.closeOnce.Do(func() {
|
s.closeOnce.Do(func() {
|
||||||
close(s.done)
|
close(s.done)
|
||||||
if s.stream != nil {
|
if s.stream != nil {
|
||||||
|
s.stream.CancelRead(0)
|
||||||
err = s.stream.Close()
|
err = s.stream.Close()
|
||||||
}
|
}
|
||||||
if s.transport != nil {
|
if s.transport != nil {
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"crypto/x509/pkix"
|
"crypto/x509/pkix"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"math/big"
|
"math/big"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -341,6 +342,119 @@ func TestQUICFabricTransportLimitsStreamsPerConnection(t *testing.T) {
|
|||||||
defer second.Close()
|
defer second.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQUICFabricTransportReusesInboundConnectionForReverseStream(t *testing.T) {
|
||||||
|
reverseTransport := NewQUICFabricTransport(nil)
|
||||||
|
defer reverseTransport.Close()
|
||||||
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: testQUICTLSConfig(t),
|
||||||
|
ReverseTransport: reverseTransport,
|
||||||
|
SyntheticForwardHandler: func(_ context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||||
|
envelope.To, envelope.From = envelope.From, PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-r"}
|
||||||
|
return envelope, nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
clientTransport := NewQUICFabricTransport(nil)
|
||||||
|
defer clientTransport.Close()
|
||||||
|
clientTransport.SetLocalPeerID("node-a")
|
||||||
|
clientTransport.SetInboundHandlers(func(_ context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||||
|
return ProductionForwardResult{
|
||||||
|
Accepted: true,
|
||||||
|
Delivered: true,
|
||||||
|
Forwarded: true,
|
||||||
|
By: PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-a"},
|
||||||
|
MessageID: envelope.MessageID,
|
||||||
|
RouteID: envelope.RouteID,
|
||||||
|
}, nil
|
||||||
|
}, nil, nil)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
session, err := clientTransport.Connect(ctx, FabricTransportTarget{
|
||||||
|
PeerID: "node-r",
|
||||||
|
Endpoint: server.Addr().String(),
|
||||||
|
TLSConfig: &tls.Config{
|
||||||
|
InsecureSkipVerify: true,
|
||||||
|
NextProtos: []string{fabricQUICNextProto},
|
||||||
|
},
|
||||||
|
Timeout: time.Second,
|
||||||
|
InboundBuffer: 4,
|
||||||
|
ErrorBuffer: 4,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("client connect: %v", err)
|
||||||
|
}
|
||||||
|
defer session.Close()
|
||||||
|
deadline := time.Now().Add(time.Second)
|
||||||
|
for {
|
||||||
|
if reverseTransport.Snapshot().Stats.ReverseRegisters > 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if time.Now().After(deadline) {
|
||||||
|
t.Fatalf("reverse hello did not register connection: %+v", reverseTransport.Snapshot())
|
||||||
|
}
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
reverseSession, err := reverseTransport.Connect(ctx, FabricTransportTarget{
|
||||||
|
PeerID: "node-a",
|
||||||
|
Endpoint: "10.0.0.2:19443",
|
||||||
|
Transport: "relay_quic",
|
||||||
|
Timeout: time.Second,
|
||||||
|
InboundBuffer: 4,
|
||||||
|
ErrorBuffer: 4,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("reverse connect: %v", err)
|
||||||
|
}
|
||||||
|
defer reverseSession.Close()
|
||||||
|
productionPayload, err := json.Marshal(ProductionEnvelope{
|
||||||
|
FabricProtocolVersion: ProtocolVersion,
|
||||||
|
MessageID: "msg-1",
|
||||||
|
RouteID: "route-r-a",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: "node-r",
|
||||||
|
DestinationNodeID: "node-a",
|
||||||
|
CurrentHopNodeID: "node-a",
|
||||||
|
NextHopNodeID: "node-a",
|
||||||
|
ChannelClass: ProductionChannelFabricControl,
|
||||||
|
MessageType: ProductionMessageFabricControl,
|
||||||
|
TTL: 4,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
ExpiresAt: time.Now().UTC().Add(time.Minute),
|
||||||
|
PayloadHash: "unused-by-test-handler",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal production: %v", err)
|
||||||
|
}
|
||||||
|
if err := reverseSession.Send(ctx, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: 2, Payload: productionPayload}); err != nil {
|
||||||
|
t.Fatalf("send reverse production: %v", err)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case frame := <-reverseSession.Frames():
|
||||||
|
var response quicProductionForwardResponse
|
||||||
|
if err := json.Unmarshal(frame.Payload, &response); err != nil {
|
||||||
|
t.Fatalf("decode response: %v", err)
|
||||||
|
}
|
||||||
|
if !response.Result.Accepted || !response.Result.Delivered || response.Result.By.NodeID != "node-a" {
|
||||||
|
t.Fatalf("response = %+v", response)
|
||||||
|
}
|
||||||
|
case err := <-reverseSession.Errors():
|
||||||
|
t.Fatalf("reverse session error: %v", err)
|
||||||
|
case <-ctx.Done():
|
||||||
|
t.Fatal(ctx.Err())
|
||||||
|
}
|
||||||
|
snapshot := reverseTransport.Snapshot()
|
||||||
|
if snapshot.Stats.ReverseRegisters == 0 || snapshot.Stats.ReverseReuses == 0 {
|
||||||
|
t.Fatalf("reverse connection was not registered/reused: %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
||||||
var events []FabricSessionEventLogEntry
|
var events []FabricSessionEventLogEntry
|
||||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
@@ -389,6 +503,68 @@ func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQUICFabricServerHandlesWebIngressForwardFrames(t *testing.T) {
|
||||||
|
var received []byte
|
||||||
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: testQUICTLSConfig(t),
|
||||||
|
WebIngressForwardHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||||
|
received = append([]byte(nil), payload...)
|
||||||
|
return []byte(`{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}`), nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
|
||||||
|
Endpoint: server.Addr().String(),
|
||||||
|
TLSConfig: &tls.Config{
|
||||||
|
InsecureSkipVerify: true,
|
||||||
|
NextProtos: []string{fabricQUICNextProto},
|
||||||
|
},
|
||||||
|
Timeout: time.Second,
|
||||||
|
InboundBuffer: 4,
|
||||||
|
ErrorBuffer: 4,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect quic fabric: %v", err)
|
||||||
|
}
|
||||||
|
defer session.Close()
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: WebIngressForwardQUICStreamID,
|
||||||
|
Sequence: 44,
|
||||||
|
Payload: []byte(`{"envelope":true}`),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("send web ingress frame: %v", err)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case frame := <-session.Frames():
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID || frame.Sequence != 44 {
|
||||||
|
t.Fatalf("frame = %+v", frame)
|
||||||
|
}
|
||||||
|
var response quicWebIngressForwardResponse
|
||||||
|
if err := json.Unmarshal(frame.Payload, &response); err != nil {
|
||||||
|
t.Fatalf("decode response: %v", err)
|
||||||
|
}
|
||||||
|
if string(response.Payload) != `{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}` || response.Error != "" {
|
||||||
|
t.Fatalf("response = %+v", response)
|
||||||
|
}
|
||||||
|
case err := <-session.Errors():
|
||||||
|
t.Fatalf("session error: %v", err)
|
||||||
|
case <-ctx.Done():
|
||||||
|
t.Fatal(ctx.Err())
|
||||||
|
}
|
||||||
|
if string(received) != `{"envelope":true}` {
|
||||||
|
t.Fatalf("received = %s", string(received))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
|
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
|
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
|
||||||
|
|||||||
@@ -0,0 +1,128 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricRouteHealthTracker struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
QuarantineTTL time.Duration
|
||||||
|
routes map[string]FabricRouteHealthEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteHealthEntry struct {
|
||||||
|
Reason string `json:"reason,omitempty"`
|
||||||
|
Failures uint64 `json:"failures"`
|
||||||
|
LastFailure time.Time `json:"last_failure,omitempty"`
|
||||||
|
RetryAfter time.Time `json:"retry_after,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRouteHealthSnapshot struct {
|
||||||
|
Quarantined map[string]FabricRouteHealthEntry `json:"quarantined,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricRouteHealthTracker(ttl time.Duration) *FabricRouteHealthTracker {
|
||||||
|
if ttl <= 0 {
|
||||||
|
ttl = 30 * time.Second
|
||||||
|
}
|
||||||
|
return &FabricRouteHealthTracker{QuarantineTTL: ttl, routes: map[string]FabricRouteHealthEntry{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRouteHealthTracker) MarkFailure(routeID string, reason string, now time.Time) {
|
||||||
|
routeID = strings.TrimSpace(routeID)
|
||||||
|
if t == nil || routeID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
ttl := t.QuarantineTTL
|
||||||
|
if ttl <= 0 {
|
||||||
|
ttl = 30 * time.Second
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
entry := t.routes[routeID]
|
||||||
|
entry.Failures++
|
||||||
|
entry.Reason = strings.TrimSpace(reason)
|
||||||
|
entry.LastFailure = now
|
||||||
|
entry.RetryAfter = now.Add(ttl)
|
||||||
|
if t.routes == nil {
|
||||||
|
t.routes = map[string]FabricRouteHealthEntry{}
|
||||||
|
}
|
||||||
|
t.routes[routeID] = entry
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRouteHealthTracker) MarkSuccess(routeID string) {
|
||||||
|
routeID = strings.TrimSpace(routeID)
|
||||||
|
if t == nil || routeID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
delete(t.routes, routeID)
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRouteHealthTracker) Apply(routeSet FabricRouteSet, now time.Time) FabricRouteSet {
|
||||||
|
if t == nil {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
if len(t.routes) == 0 {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
return mapFabricRouteSet(routeSet, func(route FabricRoute) FabricRoute {
|
||||||
|
entry, ok := t.routes[route.RouteID]
|
||||||
|
if !ok {
|
||||||
|
return route
|
||||||
|
}
|
||||||
|
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
|
||||||
|
delete(t.routes, route.RouteID)
|
||||||
|
return route
|
||||||
|
}
|
||||||
|
route.Healthy = false
|
||||||
|
route.Degraded = true
|
||||||
|
return route
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRouteHealthTracker) Snapshot(now time.Time) FabricRouteHealthSnapshot {
|
||||||
|
if t == nil {
|
||||||
|
return FabricRouteHealthSnapshot{}
|
||||||
|
}
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
out := map[string]FabricRouteHealthEntry{}
|
||||||
|
for routeID, entry := range t.routes {
|
||||||
|
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[routeID] = entry
|
||||||
|
}
|
||||||
|
if len(out) == 0 {
|
||||||
|
return FabricRouteHealthSnapshot{}
|
||||||
|
}
|
||||||
|
return FabricRouteHealthSnapshot{Quarantined: out}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mapFabricRouteSet(routeSet FabricRouteSet, fn func(FabricRoute) FabricRoute) FabricRouteSet {
|
||||||
|
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
|
||||||
|
routeSet.Primary = fn(routeSet.Primary)
|
||||||
|
}
|
||||||
|
for i := range routeSet.WarmStandby {
|
||||||
|
routeSet.WarmStandby[i] = fn(routeSet.WarmStandby[i])
|
||||||
|
}
|
||||||
|
for i := range routeSet.ColdFallbacks {
|
||||||
|
routeSet.ColdFallbacks[i] = fn(routeSet.ColdFallbacks[i])
|
||||||
|
}
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
@@ -0,0 +1,322 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
FabricCandidateReachabilityPublic = "public"
|
||||||
|
FabricCandidateReachabilityPrivate = "private"
|
||||||
|
FabricCandidateReachabilityRelay = "relay"
|
||||||
|
FabricCandidateReachabilityOutboundOnly = "outbound_only"
|
||||||
|
|
||||||
|
FabricConnectivityDirect = "direct"
|
||||||
|
FabricConnectivityOutboundOnly = "outbound_only"
|
||||||
|
FabricConnectivityRelayRequired = "relay_required"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricRoutePlannerConfig struct {
|
||||||
|
ClusterID string
|
||||||
|
LocalNodeID string
|
||||||
|
LocalSegmentID string
|
||||||
|
LocalNATGroupID string
|
||||||
|
DefaultCapacity int
|
||||||
|
RelayCapacity int
|
||||||
|
ReverseCapacity int
|
||||||
|
Observations map[string]EndpointCandidateHealthObservation
|
||||||
|
CapacityPressure map[string]EndpointCandidateCapacityPressure
|
||||||
|
Now time.Time
|
||||||
|
MaxObservationAge time.Duration
|
||||||
|
MaxCapacityPressureAge time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricCandidateMetadata struct {
|
||||||
|
LocalSegmentID string `json:"local_segment_id,omitempty"`
|
||||||
|
NATGroupID string `json:"nat_group_id,omitempty"`
|
||||||
|
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||||
|
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||||
|
ViaNodeID string `json:"via_node_id,omitempty"`
|
||||||
|
STUNServer string `json:"stun_server,omitempty"`
|
||||||
|
ICEFoundation string `json:"ice_foundation,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricRouteSetForPeerEndpointCandidates(targetNodeID string, candidates []PeerEndpointCandidate, cfg FabricRoutePlannerConfig) FabricRouteSet {
|
||||||
|
targetNodeID = strings.TrimSpace(targetNodeID)
|
||||||
|
if targetNodeID == "" && len(candidates) > 0 {
|
||||||
|
targetNodeID = strings.TrimSpace(candidates[0].NodeID)
|
||||||
|
}
|
||||||
|
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: targetNodeID}
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
now := cfg.Now
|
||||||
|
if now.IsZero() {
|
||||||
|
now = time.Now().UTC()
|
||||||
|
}
|
||||||
|
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||||
|
Now: now,
|
||||||
|
Observations: cfg.Observations,
|
||||||
|
MaxObservationAge: firstNonZeroDuration(cfg.MaxObservationAge, 30*time.Second),
|
||||||
|
CapacityPressure: cfg.CapacityPressure,
|
||||||
|
MaxCapacityPressureAge: firstNonZeroDuration(cfg.MaxCapacityPressureAge, 10*time.Second),
|
||||||
|
})
|
||||||
|
routes := make([]FabricRoute, 0, len(ranked))
|
||||||
|
for index, scored := range ranked {
|
||||||
|
route, ok := fabricRouteForPeerEndpointCandidate(scored.Candidate, cfg, scored.Score, index, now)
|
||||||
|
if ok {
|
||||||
|
routes = append(routes, route)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return routeSetFromRoutes(routeSet, routes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricRouteSetsForPeerEndpointCandidates(candidatesByNode map[string][]PeerEndpointCandidate, cfg FabricRoutePlannerConfig) map[string]FabricRouteSet {
|
||||||
|
out := make(map[string]FabricRouteSet, len(candidatesByNode))
|
||||||
|
for nodeID, candidates := range candidatesByNode {
|
||||||
|
nodeID = strings.TrimSpace(nodeID)
|
||||||
|
if nodeID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates(nodeID, candidates, cfg)
|
||||||
|
if strings.TrimSpace(routeSet.Primary.RouteID) != "" || len(routeSet.WarmStandby) > 0 || len(routeSet.ColdFallbacks) > 0 {
|
||||||
|
out[nodeID] = routeSet
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteForPeerEndpointCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int, now time.Time) (FabricRoute, bool) {
|
||||||
|
candidate.EndpointID = strings.TrimSpace(candidate.EndpointID)
|
||||||
|
candidate.NodeID = strings.TrimSpace(candidate.NodeID)
|
||||||
|
candidate.Address = strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
|
||||||
|
if candidate.EndpointID == "" || candidate.NodeID == "" || candidate.Address == "" || !isQUICOnlyCandidateTransport(candidate.Transport) {
|
||||||
|
return FabricRoute{}, false
|
||||||
|
}
|
||||||
|
metadata := decodeFabricCandidateMetadata(candidate.Metadata)
|
||||||
|
mode := fabricRouteModeForPeerEndpointCandidate(candidate, metadata, cfg)
|
||||||
|
hops := fabricRouteHopsForCandidate(candidate, metadata, mode, cfg)
|
||||||
|
if len(hops) == 0 {
|
||||||
|
return FabricRoute{}, false
|
||||||
|
}
|
||||||
|
relayCount := 0
|
||||||
|
for _, hop := range hops {
|
||||||
|
if hop.Mode == FabricRouteRelay {
|
||||||
|
relayCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
latency := fabricRouteLatencyFromCandidate(candidate, cfg, score, index)
|
||||||
|
capacity := fabricRouteCapacityForMode(mode, cfg)
|
||||||
|
if capacity <= 0 {
|
||||||
|
capacity = 100
|
||||||
|
}
|
||||||
|
healthy := true
|
||||||
|
degraded := false
|
||||||
|
if observation, ok := cfg.Observations[candidate.EndpointID]; ok {
|
||||||
|
healthy = observation.ReliabilityScore == 0 || observation.ReliabilityScore >= 50
|
||||||
|
degraded = observation.LastLatencyMs > 0 && observation.LastLatencyMs >= 250
|
||||||
|
}
|
||||||
|
return FabricRoute{
|
||||||
|
RouteID: candidate.EndpointID,
|
||||||
|
ClusterID: strings.TrimSpace(cfg.ClusterID),
|
||||||
|
SourceNodeID: strings.TrimSpace(cfg.LocalNodeID),
|
||||||
|
DestinationNodeID: candidate.NodeID,
|
||||||
|
Hops: hops,
|
||||||
|
BaseLatencyMs: latency,
|
||||||
|
Capacity: capacity,
|
||||||
|
ActiveChannels: int(candidatePressureCount(candidate.EndpointID, cfg)),
|
||||||
|
RelayCount: relayCount,
|
||||||
|
Healthy: healthy,
|
||||||
|
Degraded: degraded,
|
||||||
|
LastUpdatedAt: now,
|
||||||
|
}, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteModeForPeerEndpointCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) FabricRouteMode {
|
||||||
|
transportMode := fabricRouteModeForTransportTarget(FabricTransportTarget{Transport: candidate.Transport})
|
||||||
|
if transportMode == FabricRouteRelay || transportMode == FabricRouteReverse || transportMode == FabricRouteICE || transportMode == FabricRouteLAN {
|
||||||
|
return transportMode
|
||||||
|
}
|
||||||
|
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||||
|
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||||
|
if sameLocalSegment(metadata, cfg) || sameNATGroup(metadata, cfg) {
|
||||||
|
return FabricRouteLAN
|
||||||
|
}
|
||||||
|
if reachability == FabricCandidateReachabilityRelay || connectivity == FabricConnectivityRelayRequired || strings.TrimSpace(metadata.RelayEndpoint) != "" {
|
||||||
|
return FabricRouteRelay
|
||||||
|
}
|
||||||
|
if connectivity == FabricConnectivityOutboundOnly || reachability == FabricCandidateReachabilityOutboundOnly {
|
||||||
|
return FabricRouteReverse
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(metadata.STUNServer) != "" || strings.TrimSpace(metadata.ICEFoundation) != "" || candidate.NATType != "" {
|
||||||
|
return FabricRouteICE
|
||||||
|
}
|
||||||
|
return FabricRouteDirect
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, mode FabricRouteMode, cfg FabricRoutePlannerConfig) []FabricRouteHop {
|
||||||
|
localNodeID := strings.TrimSpace(cfg.LocalNodeID)
|
||||||
|
targetNodeID := strings.TrimSpace(candidate.NodeID)
|
||||||
|
endpoint := strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
|
||||||
|
switch mode {
|
||||||
|
case FabricRouteRelay:
|
||||||
|
relayNodeID := firstNonEmpty(strings.TrimSpace(metadata.RelayNodeID), strings.TrimSpace(metadata.ViaNodeID))
|
||||||
|
relayEndpoint := firstNonEmpty(strings.TrimRight(strings.TrimSpace(metadata.RelayEndpoint), "/"), endpoint)
|
||||||
|
hops := []FabricRouteHop{}
|
||||||
|
if localNodeID != "" {
|
||||||
|
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: FabricRouteDirect})
|
||||||
|
}
|
||||||
|
if relayNodeID == "" {
|
||||||
|
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
|
||||||
|
return hops
|
||||||
|
}
|
||||||
|
hops = append(hops,
|
||||||
|
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint},
|
||||||
|
FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)},
|
||||||
|
)
|
||||||
|
return hops
|
||||||
|
case FabricRouteLAN, FabricRouteICE, FabricRouteReverse, FabricRouteDirect:
|
||||||
|
hops := []FabricRouteHop{}
|
||||||
|
if localNodeID != "" {
|
||||||
|
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: mode})
|
||||||
|
}
|
||||||
|
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: mode, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
|
||||||
|
return hops
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isQUICOnlyCandidateTransport(transport string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(transport)) {
|
||||||
|
case "quic", "direct_quic", "udp_quic", "quic_udp",
|
||||||
|
string(FabricRouteLAN), string(FabricRouteReverse), string(FabricRouteRelay), string(FabricRouteICE):
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteLatencyFromCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int) int {
|
||||||
|
if observation, ok := cfg.Observations[candidate.EndpointID]; ok && observation.LastLatencyMs > 0 {
|
||||||
|
if observation.LastLatencyMs > int64(^uint(0)>>1) {
|
||||||
|
return int(^uint(0) >> 1)
|
||||||
|
}
|
||||||
|
return int(observation.LastLatencyMs)
|
||||||
|
}
|
||||||
|
base := 10 + index
|
||||||
|
switch strings.ToLower(strings.TrimSpace(candidate.Reachability)) {
|
||||||
|
case FabricCandidateReachabilityPrivate:
|
||||||
|
base = 3 + index
|
||||||
|
case FabricCandidateReachabilityOutboundOnly:
|
||||||
|
base = 25 + index
|
||||||
|
case FabricCandidateReachabilityRelay:
|
||||||
|
base = 40 + index
|
||||||
|
}
|
||||||
|
if score < 100 {
|
||||||
|
base += (100 - score) / 10
|
||||||
|
}
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteCapacityForMode(mode FabricRouteMode, cfg FabricRoutePlannerConfig) int {
|
||||||
|
switch mode {
|
||||||
|
case FabricRouteRelay:
|
||||||
|
return firstPositiveInt(cfg.RelayCapacity, cfg.DefaultCapacity, 100)
|
||||||
|
case FabricRouteReverse:
|
||||||
|
return firstPositiveInt(cfg.ReverseCapacity, cfg.DefaultCapacity, 100)
|
||||||
|
default:
|
||||||
|
return firstPositiveInt(cfg.DefaultCapacity, 100)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func candidatePressureCount(endpointID string, cfg FabricRoutePlannerConfig) int64 {
|
||||||
|
if pressure, ok := cfg.CapacityPressure[endpointID]; ok {
|
||||||
|
return pressure.Count
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameLocalSegment(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||||
|
localSegment := strings.TrimSpace(cfg.LocalSegmentID)
|
||||||
|
if localSegment == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return strings.EqualFold(strings.TrimSpace(metadata.LocalSegmentID), localSegment)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameNATGroup(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||||
|
localNATGroup := strings.TrimSpace(cfg.LocalNATGroupID)
|
||||||
|
if localNATGroup == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return strings.EqualFold(strings.TrimSpace(metadata.NATGroupID), localNATGroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeFabricCandidateMetadata(raw json.RawMessage) FabricCandidateMetadata {
|
||||||
|
if len(raw) == 0 {
|
||||||
|
return FabricCandidateMetadata{}
|
||||||
|
}
|
||||||
|
var metadata FabricCandidateMetadata
|
||||||
|
if err := json.Unmarshal(raw, &metadata); err != nil {
|
||||||
|
return FabricCandidateMetadata{}
|
||||||
|
}
|
||||||
|
return metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
func candidatePeerCertSHA256(candidate PeerEndpointCandidate) string {
|
||||||
|
var metadata struct {
|
||||||
|
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||||
|
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||||
|
}
|
||||||
|
if len(candidate.Metadata) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstPositiveInt(values ...int) int {
|
||||||
|
for _, value := range values {
|
||||||
|
if value > 0 {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstNonZeroDuration(values ...time.Duration) time.Duration {
|
||||||
|
for _, value := range values {
|
||||||
|
if value > 0 {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricRouteSetForRelayFallback(clusterID string, sourceNodeID string, targetNodeID string, relayNodeID string, relayEndpoint string, targetEndpoint string) FabricRouteSet {
|
||||||
|
relayEndpoint = strings.TrimRight(strings.TrimSpace(relayEndpoint), "/")
|
||||||
|
targetEndpoint = strings.TrimRight(strings.TrimSpace(targetEndpoint), "/")
|
||||||
|
candidate := PeerEndpointCandidate{
|
||||||
|
EndpointID: fmt.Sprintf("%s-via-%s-relay", strings.TrimSpace(targetNodeID), strings.TrimSpace(relayNodeID)),
|
||||||
|
NodeID: strings.TrimSpace(targetNodeID),
|
||||||
|
Transport: string(FabricRouteRelay),
|
||||||
|
Address: targetEndpoint,
|
||||||
|
Reachability: FabricCandidateReachabilityRelay,
|
||||||
|
ConnectivityMode: FabricConnectivityRelayRequired,
|
||||||
|
Metadata: mustMarshalFabricCandidateMetadata(FabricCandidateMetadata{RelayNodeID: relayNodeID, RelayEndpoint: relayEndpoint}),
|
||||||
|
}
|
||||||
|
return FabricRouteSetForPeerEndpointCandidates(targetNodeID, []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{
|
||||||
|
ClusterID: clusterID,
|
||||||
|
LocalNodeID: sourceNodeID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustMarshalFabricCandidateMetadata(metadata FabricCandidateMetadata) json.RawMessage {
|
||||||
|
raw, _ := json.Marshal(metadata)
|
||||||
|
return raw
|
||||||
|
}
|
||||||
@@ -0,0 +1,187 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||||
|
metadata, _ := json.Marshal(FabricCandidateMetadata{LocalSegmentID: "site-a", NATGroupID: "nat-a"})
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-public",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "quic",
|
||||||
|
Address: "quic://203.0.113.10:19443",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 10,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-lan",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "quic",
|
||||||
|
Address: "quic://10.10.0.12:19443",
|
||||||
|
Reachability: "private",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
PolicyTags: []string{"private-lan"},
|
||||||
|
Metadata: metadata,
|
||||||
|
},
|
||||||
|
}, FabricRoutePlannerConfig{
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
LocalSegmentID: "site-a",
|
||||||
|
DefaultCapacity: 200,
|
||||||
|
Now: time.Unix(100, 0).UTC(),
|
||||||
|
})
|
||||||
|
if routeSet.Primary.RouteID != "node-b-lan" {
|
||||||
|
t.Fatalf("primary route = %q, want node-b-lan", routeSet.Primary.RouteID)
|
||||||
|
}
|
||||||
|
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
|
||||||
|
t.Fatalf("primary mode = %q, want lan", routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T) {
|
||||||
|
metadata, _ := json.Marshal(FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"})
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||||
|
EndpointID: "node-b-relay",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "quic",
|
||||||
|
Address: "quic://node-b-passive:19443",
|
||||||
|
Reachability: "outbound_only",
|
||||||
|
ConnectivityMode: "relay_required",
|
||||||
|
NATType: "symmetric",
|
||||||
|
Metadata: metadata,
|
||||||
|
}}, FabricRoutePlannerConfig{
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
RelayCapacity: 50,
|
||||||
|
Now: time.Unix(100, 0).UTC(),
|
||||||
|
})
|
||||||
|
if routeSet.Primary.RouteID != "node-b-relay" {
|
||||||
|
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
|
||||||
|
}
|
||||||
|
if routeSet.Primary.RelayCount != 2 {
|
||||||
|
t.Fatalf("relay count = %d, want 2", routeSet.Primary.RelayCount)
|
||||||
|
}
|
||||||
|
if got := routeSet.Primary.Hops[1].NodeID; got != "node-r" {
|
||||||
|
t.Fatalf("relay hop = %q, want node-r", got)
|
||||||
|
}
|
||||||
|
if routeSet.Primary.Capacity != 50 {
|
||||||
|
t.Fatalf("capacity = %d, want 50", routeSet.Primary.Capacity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesUsesTargetWhenRelayMetadataIsAbsent(t *testing.T) {
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||||
|
EndpointID: "node-b-relay",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "relay_quic",
|
||||||
|
Address: "quic://node-b:19443",
|
||||||
|
Reachability: "relay",
|
||||||
|
ConnectivityMode: "relay_required",
|
||||||
|
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
|
||||||
|
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||||
|
if routeSet.Primary.RouteID != "node-b-relay" {
|
||||||
|
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
|
||||||
|
}
|
||||||
|
if len(routeSet.Primary.Hops) != 2 {
|
||||||
|
t.Fatalf("hops = %+v, want local + target only", routeSet.Primary.Hops)
|
||||||
|
}
|
||||||
|
targetHop := routeSet.Primary.Hops[1]
|
||||||
|
if targetHop.NodeID != "node-b" || targetHop.Mode != FabricRouteRelay || targetHop.PeerCertSHA256 != "abc123" {
|
||||||
|
t.Fatalf("target hop = %+v, want relay-mode target with cert", targetHop)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesAcceptsExplicitQUICModes(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
transport string
|
||||||
|
wantMode FabricRouteMode
|
||||||
|
}{
|
||||||
|
{name: "lan", transport: "lan_quic", wantMode: FabricRouteLAN},
|
||||||
|
{name: "reverse", transport: "reverse_quic", wantMode: FabricRouteReverse},
|
||||||
|
{name: "relay", transport: "relay_quic", wantMode: FabricRouteRelay},
|
||||||
|
{name: "ice", transport: "ice_quic", wantMode: FabricRouteICE},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||||
|
EndpointID: "node-b-" + tc.name,
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: tc.transport,
|
||||||
|
Address: "quic://node-b:19443",
|
||||||
|
Reachability: "private",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
|
||||||
|
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||||
|
if routeSet.Primary.RouteID == "" {
|
||||||
|
t.Fatalf("%s candidate produced empty route set", tc.transport)
|
||||||
|
}
|
||||||
|
hop := routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1]
|
||||||
|
if hop.Mode != tc.wantMode {
|
||||||
|
t.Fatalf("mode = %q, want %q", hop.Mode, tc.wantMode)
|
||||||
|
}
|
||||||
|
if hop.PeerCertSHA256 != "abc123" {
|
||||||
|
t.Fatalf("peer cert = %q, want abc123", hop.PeerCertSHA256)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesTreatsSameNATGroupAsLAN(t *testing.T) {
|
||||||
|
metadata, _ := json.Marshal(FabricCandidateMetadata{NATGroupID: "nat-a"})
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||||
|
EndpointID: "node-b-nat-lan",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "quic",
|
||||||
|
Address: "quic://10.44.0.12:19443",
|
||||||
|
Reachability: "private",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
NATType: "symmetric",
|
||||||
|
Metadata: metadata,
|
||||||
|
}}, FabricRoutePlannerConfig{
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
LocalNATGroupID: "nat-a",
|
||||||
|
})
|
||||||
|
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
|
||||||
|
t.Fatalf("route = %+v, want LAN mode for same NAT group", routeSet.Primary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteSetForPeerEndpointCandidatesRejectsNonQUIC(t *testing.T) {
|
||||||
|
for _, candidate := range []PeerEndpointCandidate{
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-http",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_http",
|
||||||
|
Address: "http://node-b:8080",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-legacy-relay",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "relay",
|
||||||
|
Address: "quic://node-r:19443",
|
||||||
|
Reachability: "relay",
|
||||||
|
ConnectivityMode: "relay_required",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-legacy-reverse",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "outbound_reverse",
|
||||||
|
Address: "quic://node-b:19443",
|
||||||
|
Reachability: "outbound_only",
|
||||||
|
ConnectivityMode: "outbound_only",
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||||
|
if routeSet.Primary.RouteID != "" || len(routeSet.WarmStandby) != 0 {
|
||||||
|
t.Fatalf("non-quic candidate produced route set: %+v", routeSet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,137 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FabricRoutePressureTracker struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
active map[string]int
|
||||||
|
maxActive map[string]int
|
||||||
|
acquiredTotal uint64
|
||||||
|
releasedTotal uint64
|
||||||
|
maxActiveTotal int
|
||||||
|
lastAcquiredRoute string
|
||||||
|
lastReleasedRoute string
|
||||||
|
}
|
||||||
|
|
||||||
|
type FabricRoutePressureSnapshot struct {
|
||||||
|
Active map[string]int `json:"active"`
|
||||||
|
MaxActive map[string]int `json:"max_active"`
|
||||||
|
ActiveTotal int `json:"active_total"`
|
||||||
|
MaxActiveTotal int `json:"max_active_total"`
|
||||||
|
AcquiredTotal uint64 `json:"acquired_total"`
|
||||||
|
ReleasedTotal uint64 `json:"released_total"`
|
||||||
|
LastAcquiredRoute string `json:"last_acquired_route,omitempty"`
|
||||||
|
LastReleasedRoute string `json:"last_released_route,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFabricRoutePressureTracker() *FabricRoutePressureTracker {
|
||||||
|
return &FabricRoutePressureTracker{
|
||||||
|
active: map[string]int{},
|
||||||
|
maxActive: map[string]int{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRoutePressureTracker) Apply(routeSet FabricRouteSet) FabricRouteSet {
|
||||||
|
if t == nil {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
active := t.Snapshot()
|
||||||
|
if len(active) == 0 {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
apply := func(route FabricRoute) FabricRoute {
|
||||||
|
if count := active[route.RouteID]; count > 0 {
|
||||||
|
route.ActiveChannels += count
|
||||||
|
}
|
||||||
|
return route
|
||||||
|
}
|
||||||
|
routeSet.Primary = apply(routeSet.Primary)
|
||||||
|
for i := range routeSet.WarmStandby {
|
||||||
|
routeSet.WarmStandby[i] = apply(routeSet.WarmStandby[i])
|
||||||
|
}
|
||||||
|
for i := range routeSet.ColdFallbacks {
|
||||||
|
routeSet.ColdFallbacks[i] = apply(routeSet.ColdFallbacks[i])
|
||||||
|
}
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRoutePressureTracker) Acquire(routeID string) func() {
|
||||||
|
routeID = strings.TrimSpace(routeID)
|
||||||
|
if t == nil || routeID == "" {
|
||||||
|
return func() {}
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
if t.active == nil {
|
||||||
|
t.active = map[string]int{}
|
||||||
|
}
|
||||||
|
if t.maxActive == nil {
|
||||||
|
t.maxActive = map[string]int{}
|
||||||
|
}
|
||||||
|
t.active[routeID]++
|
||||||
|
if t.active[routeID] > t.maxActive[routeID] {
|
||||||
|
t.maxActive[routeID] = t.active[routeID]
|
||||||
|
}
|
||||||
|
t.acquiredTotal++
|
||||||
|
t.lastAcquiredRoute = routeID
|
||||||
|
if activeTotal := activeTotalLocked(t.active); activeTotal > t.maxActiveTotal {
|
||||||
|
t.maxActiveTotal = activeTotal
|
||||||
|
}
|
||||||
|
t.mu.Unlock()
|
||||||
|
var released atomic.Bool
|
||||||
|
return func() {
|
||||||
|
if released.Swap(true) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
if t.active[routeID] <= 1 {
|
||||||
|
delete(t.active, routeID)
|
||||||
|
} else {
|
||||||
|
t.active[routeID]--
|
||||||
|
}
|
||||||
|
t.releasedTotal++
|
||||||
|
t.lastReleasedRoute = routeID
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRoutePressureTracker) Snapshot() map[string]int {
|
||||||
|
return t.SnapshotPressure().Active
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *FabricRoutePressureTracker) SnapshotPressure() FabricRoutePressureSnapshot {
|
||||||
|
if t == nil {
|
||||||
|
return FabricRoutePressureSnapshot{}
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
active := make(map[string]int, len(t.active))
|
||||||
|
for routeID, count := range t.active {
|
||||||
|
active[routeID] = count
|
||||||
|
}
|
||||||
|
maxActive := make(map[string]int, len(t.maxActive))
|
||||||
|
for routeID, count := range t.maxActive {
|
||||||
|
maxActive[routeID] = count
|
||||||
|
}
|
||||||
|
return FabricRoutePressureSnapshot{
|
||||||
|
Active: active,
|
||||||
|
MaxActive: maxActive,
|
||||||
|
ActiveTotal: activeTotalLocked(active),
|
||||||
|
MaxActiveTotal: t.maxActiveTotal,
|
||||||
|
AcquiredTotal: t.acquiredTotal,
|
||||||
|
ReleasedTotal: t.releasedTotal,
|
||||||
|
LastAcquiredRoute: t.lastAcquiredRoute,
|
||||||
|
LastReleasedRoute: t.lastReleasedRoute,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func activeTotalLocked(active map[string]int) int {
|
||||||
|
total := 0
|
||||||
|
for _, count := range active {
|
||||||
|
total += count
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFabricRoutePressureTrackerAppliesAndReleasesActiveChannels(t *testing.T) {
|
||||||
|
tracker := NewFabricRoutePressureTracker()
|
||||||
|
releaseA := tracker.Acquire("route-a")
|
||||||
|
releaseAAgain := tracker.Acquire("route-a")
|
||||||
|
releaseB := tracker.Acquire("route-b")
|
||||||
|
routeSet := FabricRouteSet{
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: "node-b",
|
||||||
|
Primary: testFabricRoute("route-a", "node-b", 10, 100, 3, true),
|
||||||
|
WarmStandby: []FabricRoute{
|
||||||
|
testFabricRoute("route-b", "node-b", 10, 100, 0, true),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
withPressure := tracker.Apply(routeSet)
|
||||||
|
if withPressure.Primary.ActiveChannels != 5 {
|
||||||
|
t.Fatalf("primary active = %d, want 5", withPressure.Primary.ActiveChannels)
|
||||||
|
}
|
||||||
|
if withPressure.WarmStandby[0].ActiveChannels != 1 {
|
||||||
|
t.Fatalf("standby active = %d, want 1", withPressure.WarmStandby[0].ActiveChannels)
|
||||||
|
}
|
||||||
|
|
||||||
|
releaseA()
|
||||||
|
releaseA()
|
||||||
|
releaseAAgain()
|
||||||
|
releaseB()
|
||||||
|
snapshot := tracker.SnapshotPressure()
|
||||||
|
if len(snapshot.Active) != 0 || snapshot.ActiveTotal != 0 {
|
||||||
|
t.Fatalf("snapshot after release = %+v, want inactive", snapshot)
|
||||||
|
}
|
||||||
|
if snapshot.AcquiredTotal != 3 || snapshot.ReleasedTotal != 3 {
|
||||||
|
t.Fatalf("snapshot totals = %+v, want acquired/released 3", snapshot)
|
||||||
|
}
|
||||||
|
if snapshot.MaxActive["route-a"] != 2 || snapshot.MaxActive["route-b"] != 1 || snapshot.MaxActiveTotal != 3 {
|
||||||
|
t.Fatalf("snapshot max = %+v", snapshot)
|
||||||
|
}
|
||||||
|
if snapshot.LastAcquiredRoute != "route-b" || snapshot.LastReleasedRoute != "route-b" {
|
||||||
|
t.Fatalf("snapshot last routes = %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,6 +14,7 @@ func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||||
if entry.Event == "fabric_session_websocket_opened" {
|
if entry.Event == "fabric_session_websocket_opened" {
|
||||||
opened++
|
opened++
|
||||||
@@ -85,6 +86,7 @@ func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||||
if entry.Event == "fabric_session_websocket_opened" {
|
if entry.Event == "fabric_session_websocket_opened" {
|
||||||
opened++
|
opened++
|
||||||
@@ -133,6 +135,7 @@ func TestFabricSessionPeerManagerReopensClosedPump(t *testing.T) {
|
|||||||
server := httptest.NewServer(Server{
|
server := httptest.NewServer(Server{
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||||
FabricSessionEnabled: true,
|
FabricSessionEnabled: true,
|
||||||
|
FabricSessionWebSocketEnabled: true,
|
||||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||||
if entry.Event == "fabric_session_websocket_opened" {
|
if entry.Event == "fabric_session_websocket_opened" {
|
||||||
opened++
|
opened++
|
||||||
|
|||||||
@@ -40,73 +40,22 @@ type FabricTransportTarget struct {
|
|||||||
ErrorBuffer int
|
ErrorBuffer int
|
||||||
}
|
}
|
||||||
|
|
||||||
func FabricTransportForTarget(target FabricTransportTarget, websocket *WebSocketFabricTransport, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
|
func FabricTransportForTarget(target FabricTransportTarget, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
|
||||||
transportLabel := strings.ToLower(strings.TrimSpace(target.Transport))
|
transportLabel := strings.ToLower(strings.TrimSpace(target.Transport))
|
||||||
endpoint := strings.TrimSpace(target.Endpoint)
|
endpoint := strings.TrimSpace(target.Endpoint)
|
||||||
if strings.HasPrefix(strings.ToLower(endpoint), "quic://") {
|
if strings.HasPrefix(strings.ToLower(endpoint), "quic://") {
|
||||||
|
if transportLabel == "" {
|
||||||
transportLabel = "quic"
|
transportLabel = "quic"
|
||||||
|
}
|
||||||
target.Endpoint = strings.TrimPrefix(endpoint, "quic://")
|
target.Endpoint = strings.TrimPrefix(endpoint, "quic://")
|
||||||
}
|
}
|
||||||
switch transportLabel {
|
switch transportLabel {
|
||||||
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||||
if quicTransport == nil {
|
if quicTransport == nil {
|
||||||
quicTransport = NewQUICFabricTransport(nil)
|
quicTransport = NewQUICFabricTransport(nil)
|
||||||
}
|
}
|
||||||
return quicTransport, target, nil
|
return quicTransport, target, nil
|
||||||
case "", "websocket", "ws", "wss", "direct_http", "direct_https", "direct_tcp_tls":
|
|
||||||
if websocket == nil {
|
|
||||||
websocket = NewWebSocketFabricTransport(nil)
|
|
||||||
}
|
|
||||||
return websocket, target, nil
|
|
||||||
default:
|
default:
|
||||||
return nil, target, fmt.Errorf("unsupported fabric transport %q", target.Transport)
|
return nil, target, fmt.Errorf("unsupported fabric transport %q: quic is required", target.Transport)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type WebSocketFabricTransport struct {
|
|
||||||
Manager *FabricSessionPeerManager
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewWebSocketFabricTransport(manager *FabricSessionPeerManager) *WebSocketFabricTransport {
|
|
||||||
if manager == nil {
|
|
||||||
manager = NewFabricSessionPeerManager()
|
|
||||||
}
|
|
||||||
return &WebSocketFabricTransport{Manager: manager}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *WebSocketFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
|
||||||
manager := t.Manager
|
|
||||||
if manager == nil {
|
|
||||||
manager = NewFabricSessionPeerManager()
|
|
||||||
t.Manager = manager
|
|
||||||
}
|
|
||||||
return manager.Get(ctx, FabricSessionPeerTarget{
|
|
||||||
PeerID: target.PeerID,
|
|
||||||
BaseURL: target.Endpoint,
|
|
||||||
Options: FabricSessionDialOptions{
|
|
||||||
Token: target.Token,
|
|
||||||
Header: target.Header,
|
|
||||||
Timeout: target.Timeout,
|
|
||||||
MaxPayload: target.MaxPayload,
|
|
||||||
},
|
|
||||||
Pump: FabricSessionPumpOptions{
|
|
||||||
OutboundBuffer: target.OutboundBuffer,
|
|
||||||
InboundBuffer: target.InboundBuffer,
|
|
||||||
ErrorBuffer: target.ErrorBuffer,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *WebSocketFabricTransport) Close() error {
|
|
||||||
if t == nil || t.Manager == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return t.Manager.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *WebSocketFabricTransport) Snapshot() FabricSessionPeerManagerSnapshot {
|
|
||||||
if t == nil || t.Manager == nil {
|
|
||||||
return FabricSessionPeerManagerSnapshot{SchemaVersion: "rap.fabric_session_peer_manager.v1"}
|
|
||||||
}
|
|
||||||
return t.Manager.Snapshot()
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,117 +1,27 @@
|
|||||||
package mesh
|
package mesh
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"strings"
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestWebSocketFabricTransportConnectsAndReusesSession(t *testing.T) {
|
func TestFabricTransportRejectsWebSocketTransport(t *testing.T) {
|
||||||
var opened int
|
for _, target := range []FabricTransportTarget{
|
||||||
server := httptest.NewServer(Server{
|
{Transport: "wss", Endpoint: "wss://node-a.example/fabric/session"},
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
{Transport: "relay", Endpoint: "quic://node-r.example:19443"},
|
||||||
FabricSessionEnabled: true,
|
{Transport: "outbound_reverse", Endpoint: "quic://node-b.example:19443"},
|
||||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
} {
|
||||||
if entry.Event == "fabric_session_websocket_opened" {
|
_, _, err := FabricTransportForTarget(target, nil)
|
||||||
opened++
|
if err == nil || !strings.Contains(err.Error(), "quic is required") {
|
||||||
|
t.Fatalf("target = %+v err = %v, want quic-only rejection", target, err)
|
||||||
}
|
}
|
||||||
},
|
|
||||||
}.Handler())
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
transport := NewWebSocketFabricTransport(nil)
|
|
||||||
defer transport.Close()
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
target := FabricTransportTarget{
|
|
||||||
PeerID: "node-a",
|
|
||||||
Endpoint: server.URL,
|
|
||||||
Token: "rap_fsn_transport",
|
|
||||||
Timeout: time.Second,
|
|
||||||
OutboundBuffer: 4,
|
|
||||||
InboundBuffer: 4,
|
|
||||||
ErrorBuffer: 4,
|
|
||||||
}
|
|
||||||
|
|
||||||
first, err := transport.Connect(ctx, target)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("first connect: %v", err)
|
|
||||||
}
|
|
||||||
second, err := transport.Connect(ctx, target)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("second connect: %v", err)
|
|
||||||
}
|
|
||||||
if first != second {
|
|
||||||
t.Fatal("transport did not reuse session")
|
|
||||||
}
|
|
||||||
if opened != 1 {
|
|
||||||
t.Fatalf("opened = %d, want 1", opened)
|
|
||||||
}
|
|
||||||
if err := first.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, Sequence: 1, Payload: []byte("transport")}); err != nil {
|
|
||||||
t.Fatalf("send ping: %v", err)
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case frame := <-first.Frames():
|
|
||||||
if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "transport" {
|
|
||||||
t.Fatalf("frame = %+v", frame)
|
|
||||||
}
|
|
||||||
case err := <-first.Errors():
|
|
||||||
t.Fatalf("session error: %v", err)
|
|
||||||
case <-ctx.Done():
|
|
||||||
t.Fatal(ctx.Err())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWebSocketFabricTransportReopensClosedSession(t *testing.T) {
|
|
||||||
var opened int
|
|
||||||
server := httptest.NewServer(Server{
|
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
|
||||||
FabricSessionEnabled: true,
|
|
||||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
|
||||||
if entry.Event == "fabric_session_websocket_opened" {
|
|
||||||
opened++
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}.Handler())
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
transport := NewWebSocketFabricTransport(nil)
|
|
||||||
defer transport.Close()
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
target := FabricTransportTarget{
|
|
||||||
PeerID: "node-a",
|
|
||||||
Endpoint: server.URL,
|
|
||||||
Token: "rap_fsn_transport_reopen",
|
|
||||||
Timeout: time.Second,
|
|
||||||
}
|
|
||||||
|
|
||||||
first, err := transport.Connect(ctx, target)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("first connect: %v", err)
|
|
||||||
}
|
|
||||||
if err := first.Close(); err != nil {
|
|
||||||
t.Fatalf("close first session: %v", err)
|
|
||||||
}
|
|
||||||
second, err := transport.Connect(ctx, target)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("second connect: %v", err)
|
|
||||||
}
|
|
||||||
if first == second {
|
|
||||||
t.Fatal("transport reused closed session")
|
|
||||||
}
|
|
||||||
if opened != 2 {
|
|
||||||
t.Fatalf("opened = %d, want 2", opened)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
|
func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
|
||||||
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
|
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
|
||||||
Endpoint: "quic://127.0.0.1:4433",
|
Endpoint: "quic://127.0.0.1:4433",
|
||||||
}, nil, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("select transport: %v", err)
|
t.Fatalf("select transport: %v", err)
|
||||||
}
|
}
|
||||||
@@ -123,15 +33,12 @@ func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFabricTransportForTargetSelectsWebSocketByDefault(t *testing.T) {
|
func TestFabricTransportForTargetRejectsNonQUICByDefault(t *testing.T) {
|
||||||
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
|
_, target, err := FabricTransportForTarget(FabricTransportTarget{
|
||||||
Endpoint: "https://node.example",
|
Endpoint: "https://node.example",
|
||||||
}, nil, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err == nil {
|
||||||
t.Fatalf("select transport: %v", err)
|
t.Fatal("non-QUIC target unexpectedly selected a transport")
|
||||||
}
|
|
||||||
if _, ok := transport.(*WebSocketFabricTransport); !ok {
|
|
||||||
t.Fatalf("transport = %T, want websocket", transport)
|
|
||||||
}
|
}
|
||||||
if target.Endpoint != "https://node.example" {
|
if target.Endpoint != "https://node.example" {
|
||||||
t.Fatalf("endpoint = %q", target.Endpoint)
|
t.Fatalf("endpoint = %q", target.Endpoint)
|
||||||
|
|||||||
@@ -1,42 +0,0 @@
|
|||||||
package mesh
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// HTTPPeerTransport sends synthetic mesh envelopes to explicitly configured
|
|
||||||
// peer endpoints. It is intentionally narrow: production forwarding remains
|
|
||||||
// disabled and only SyntheticRuntime messages use this transport.
|
|
||||||
type HTTPPeerTransport struct {
|
|
||||||
PeerURLs map[string]string
|
|
||||||
HTTPClient *http.Client
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewHTTPPeerTransport(peerURLs map[string]string) *HTTPPeerTransport {
|
|
||||||
normalized := make(map[string]string, len(peerURLs))
|
|
||||||
for nodeID, baseURL := range peerURLs {
|
|
||||||
nodeID = strings.TrimSpace(nodeID)
|
|
||||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
|
||||||
if nodeID != "" && baseURL != "" {
|
|
||||||
normalized[nodeID] = baseURL
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &HTTPPeerTransport{PeerURLs: normalized}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *HTTPPeerTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
|
||||||
if t == nil {
|
|
||||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
|
||||||
}
|
|
||||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
|
||||||
if baseURL == "" {
|
|
||||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
|
||||||
}
|
|
||||||
client := NewClient(baseURL)
|
|
||||||
if t.HTTPClient != nil {
|
|
||||||
client.HTTPClient = t.HTTPClient
|
|
||||||
}
|
|
||||||
return client.SendSynthetic(ctx, envelope)
|
|
||||||
}
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
package mesh
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestHTTPPeerTransportDirectSyntheticProbe(t *testing.T) {
|
|
||||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
|
||||||
defer nodeA.Close()
|
|
||||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
|
||||||
defer nodeB.Close()
|
|
||||||
|
|
||||||
route := liveSyntheticRoute("route-direct", []string{"node-a", "node-b"})
|
|
||||||
routes := []SyntheticRoute{route}
|
|
||||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-b": nodeB.URL})
|
|
||||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
|
||||||
|
|
||||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-direct")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("send live direct probe: %v", err)
|
|
||||||
}
|
|
||||||
if ack.MessageType != SyntheticMessageProbeAck {
|
|
||||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
|
||||||
}
|
|
||||||
payload := decodeAckPayload(t, ack)
|
|
||||||
if got, want := payload.Path, []string{"node-a", "node-b"}; !sameStrings(got, want) {
|
|
||||||
t.Fatalf("path = %v, want %v", got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestHTTPPeerTransportSingleRelaySyntheticProbe(t *testing.T) {
|
|
||||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
|
||||||
defer nodeA.Close()
|
|
||||||
nodeR := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"})
|
|
||||||
defer nodeR.Close()
|
|
||||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
|
||||||
defer nodeB.Close()
|
|
||||||
|
|
||||||
route := liveSyntheticRoute("route-relay", []string{"node-a", "node-r", "node-b"})
|
|
||||||
routes := []SyntheticRoute{route}
|
|
||||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-r": nodeR.URL})
|
|
||||||
nodeR.Runtime = newLiveRuntime(nodeR.Local, routes, map[string]string{"node-b": nodeB.URL})
|
|
||||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
|
||||||
|
|
||||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-relay")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("send live relay probe: %v", err)
|
|
||||||
}
|
|
||||||
if ack.MessageType != SyntheticMessageProbeAck {
|
|
||||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
|
||||||
}
|
|
||||||
payload := decodeAckPayload(t, ack)
|
|
||||||
if got, want := payload.Path, []string{"node-a", "node-r", "node-b"}; !sameStrings(got, want) {
|
|
||||||
t.Fatalf("path = %v, want %v", got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestHTTPPeerTransportMissingPeer(t *testing.T) {
|
|
||||||
transport := NewHTTPPeerTransport(map[string]string{})
|
|
||||||
_, err := transport.SendSynthetic(context.Background(), "node-missing", SyntheticEnvelope{})
|
|
||||||
if !errors.Is(err, ErrSyntheticPeerUnavailable) {
|
|
||||||
t.Fatalf("err = %v, want ErrSyntheticPeerUnavailable", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type liveSyntheticNode struct {
|
|
||||||
Local PeerIdentity
|
|
||||||
Runtime *SyntheticRuntime
|
|
||||||
URL string
|
|
||||||
server *httptest.Server
|
|
||||||
}
|
|
||||||
|
|
||||||
func newLiveSyntheticNode(t *testing.T, local PeerIdentity) *liveSyntheticNode {
|
|
||||||
t.Helper()
|
|
||||||
node := &liveSyntheticNode{Local: local}
|
|
||||||
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
Server{Local: node.Local, SyntheticRuntime: node.Runtime}.Handler().ServeHTTP(w, r)
|
|
||||||
}))
|
|
||||||
node.URL = node.server.URL
|
|
||||||
return node
|
|
||||||
}
|
|
||||||
|
|
||||||
func (n *liveSyntheticNode) Close() {
|
|
||||||
if n.server != nil {
|
|
||||||
n.server.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newLiveRuntime(local PeerIdentity, routes []SyntheticRoute, peers map[string]string) *SyntheticRuntime {
|
|
||||||
return NewSyntheticRuntime(SyntheticRuntimeConfig{
|
|
||||||
Enabled: true,
|
|
||||||
Local: local,
|
|
||||||
Routes: routes,
|
|
||||||
Transport: NewHTTPPeerTransport(peers),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
|
|
||||||
return SyntheticRoute{
|
|
||||||
RouteID: routeID,
|
|
||||||
ClusterID: "cluster-1",
|
|
||||||
SourceNodeID: hops[0],
|
|
||||||
DestinationNodeID: hops[len(hops)-1],
|
|
||||||
Hops: hops,
|
|
||||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
|
||||||
MaxTTL: 8,
|
|
||||||
MaxHops: 8,
|
|
||||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
|
||||||
RouteVersion: "route-v1",
|
|
||||||
PolicyVersion: "policy-v1",
|
|
||||||
PeerDirectoryVersion: "peers-v1",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func sameStrings(left, right []string) bool {
|
|
||||||
if len(left) != len(right) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := range left {
|
|
||||||
if left[i] != right[i] {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package mesh
|
package mesh
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -53,9 +54,11 @@ type PeerCacheEntry struct {
|
|||||||
BestReachability string `json:"best_reachability,omitempty"`
|
BestReachability string `json:"best_reachability,omitempty"`
|
||||||
BestConnectivity string `json:"best_connectivity,omitempty"`
|
BestConnectivity string `json:"best_connectivity,omitempty"`
|
||||||
BestNATType string `json:"best_nat_type,omitempty"`
|
BestNATType string `json:"best_nat_type,omitempty"`
|
||||||
|
BestRegion string `json:"best_region,omitempty"`
|
||||||
BestPolicyTags []string `json:"best_policy_tags,omitempty"`
|
BestPolicyTags []string `json:"best_policy_tags,omitempty"`
|
||||||
BestCandidateScore int `json:"best_candidate_score,omitempty"`
|
BestCandidateScore int `json:"best_candidate_score,omitempty"`
|
||||||
BestScoreReasons []string `json:"best_score_reasons,omitempty"`
|
BestScoreReasons []string `json:"best_score_reasons,omitempty"`
|
||||||
|
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
|
||||||
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
|
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
|
||||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||||
@@ -132,9 +135,11 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
entry.BestReachability = scored[0].Candidate.Reachability
|
entry.BestReachability = scored[0].Candidate.Reachability
|
||||||
entry.BestConnectivity = scored[0].Candidate.ConnectivityMode
|
entry.BestConnectivity = scored[0].Candidate.ConnectivityMode
|
||||||
entry.BestNATType = scored[0].Candidate.NATType
|
entry.BestNATType = scored[0].Candidate.NATType
|
||||||
|
entry.BestRegion = scored[0].Candidate.Region
|
||||||
entry.BestPolicyTags = append([]string{}, scored[0].Candidate.PolicyTags...)
|
entry.BestPolicyTags = append([]string{}, scored[0].Candidate.PolicyTags...)
|
||||||
entry.BestCandidateScore = scored[0].Score
|
entry.BestCandidateScore = scored[0].Score
|
||||||
entry.BestScoreReasons = append([]string{}, scored[0].Reasons...)
|
entry.BestScoreReasons = append([]string{}, scored[0].Reasons...)
|
||||||
|
entry.BestPeerCertSHA256 = candidatePeerCertSHA256(scored[0].Candidate)
|
||||||
entry.bestScore = scored[0].Score
|
entry.bestScore = scored[0].Score
|
||||||
if strings.TrimSpace(scored[0].Candidate.Address) != "" {
|
if strings.TrimSpace(scored[0].Candidate.Address) != "" {
|
||||||
entry.Endpoint = strings.TrimSpace(scored[0].Candidate.Address)
|
entry.Endpoint = strings.TrimSpace(scored[0].Candidate.Address)
|
||||||
@@ -188,6 +193,7 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
if lease.PeerNodeID != cfg.Local.NodeID {
|
if lease.PeerNodeID != cfg.Local.NodeID {
|
||||||
entry := peerCacheEntry(entries, lease.PeerNodeID)
|
entry := peerCacheEntry(entries, lease.PeerNodeID)
|
||||||
useLeaseEndpoint := shouldUseRendezvousEndpoint(*entry)
|
useLeaseEndpoint := shouldUseRendezvousEndpoint(*entry)
|
||||||
|
localRelay := lease.RelayNodeID == cfg.Local.NodeID
|
||||||
entry.RendezvousLeaseID = lease.LeaseID
|
entry.RendezvousLeaseID = lease.LeaseID
|
||||||
entry.RelayNodeID = lease.RelayNodeID
|
entry.RelayNodeID = lease.RelayNodeID
|
||||||
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||||
@@ -195,12 +201,21 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
|
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
|
||||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
|
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
|
||||||
if useLeaseEndpoint {
|
if useLeaseEndpoint {
|
||||||
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_control")
|
if localRelay {
|
||||||
|
entry.BestTransport = "reverse_quic"
|
||||||
|
} else {
|
||||||
|
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_quic")
|
||||||
|
}
|
||||||
entry.BestReachability = "relay"
|
entry.BestReachability = "relay"
|
||||||
entry.BestConnectivity = firstNonEmpty(lease.ConnectivityMode, "relay_required")
|
entry.BestConnectivity = firstNonEmpty(lease.ConnectivityMode, "relay_required")
|
||||||
|
if !localRelay {
|
||||||
entry.Endpoint = entry.RelayEndpoint
|
entry.Endpoint = entry.RelayEndpoint
|
||||||
entry.BestCandidateID = lease.LeaseID
|
entry.BestCandidateID = lease.LeaseID
|
||||||
entry.BestCandidateAddr = entry.RelayEndpoint
|
entry.BestCandidateAddr = entry.RelayEndpoint
|
||||||
|
entry.BestPeerCertSHA256 = rendezvousLeasePeerCertSHA256(lease)
|
||||||
|
} else if strings.TrimSpace(entry.Endpoint) == "" {
|
||||||
|
entry.Endpoint = firstNonEmpty(entry.BestCandidateAddr, entry.RelayEndpoint)
|
||||||
|
}
|
||||||
entry.bestScore = maxInt(entry.bestScore, 500)
|
entry.bestScore = maxInt(entry.bestScore, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -262,6 +277,20 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
|||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func rendezvousLeasePeerCertSHA256(lease PeerRendezvousLease) string {
|
||||||
|
var metadata struct {
|
||||||
|
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||||
|
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||||
|
}
|
||||||
|
if len(lease.Metadata) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(lease.Metadata, &metadata); err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
|
||||||
|
}
|
||||||
|
|
||||||
func (c *PeerCache) Snapshot() PeerCacheSnapshot {
|
func (c *PeerCache) Snapshot() PeerCacheSnapshot {
|
||||||
if c == nil {
|
if c == nil {
|
||||||
return PeerCacheSnapshot{}
|
return PeerCacheSnapshot{}
|
||||||
|
|||||||
@@ -10,15 +10,15 @@ func TestPeerCacheSelectsAdjacentWarmPeersWithinLimit(t *testing.T) {
|
|||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: map[string]string{
|
PeerEndpoints: map[string]string{
|
||||||
"node-a": "http://node-a:19000",
|
"node-a": "quic://node-a:19443",
|
||||||
"node-r": "http://node-r:19000",
|
"node-r": "quic://node-r:19443",
|
||||||
"node-c": "http://node-c:19000",
|
"node-c": "quic://node-c:19443",
|
||||||
},
|
},
|
||||||
Routes: []SyntheticRoute{
|
Routes: []SyntheticRoute{
|
||||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"}),
|
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"}),
|
||||||
},
|
},
|
||||||
RecoverySeeds: []PeerRecoverySeed{
|
RecoverySeeds: []PeerRecoverySeed{
|
||||||
{NodeID: "node-seed", Endpoint: "https://seed.example.test", Transport: "direct_tcp_tls", Priority: 10},
|
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", Priority: 10},
|
||||||
},
|
},
|
||||||
WarmPeerLimit: 2,
|
WarmPeerLimit: 2,
|
||||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||||
@@ -42,7 +42,7 @@ func TestPeerCachePromotesRecoverySeedAfterRoutePeers(t *testing.T) {
|
|||||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r"}),
|
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r"}),
|
||||||
},
|
},
|
||||||
RecoverySeeds: []PeerRecoverySeed{
|
RecoverySeeds: []PeerRecoverySeed{
|
||||||
{NodeID: "node-seed", Endpoint: "wss://seed.example.test/mesh", Transport: "wss", ConnectivityMode: "direct", Priority: 1},
|
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", ConnectivityMode: "direct", Priority: 1},
|
||||||
},
|
},
|
||||||
WarmPeerLimit: 3,
|
WarmPeerLimit: 3,
|
||||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||||
@@ -68,7 +68,7 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-relay",
|
EndpointID: "node-b-relay",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "relay",
|
Transport: "relay_quic",
|
||||||
Address: "relay.example.test",
|
Address: "relay.example.test",
|
||||||
Reachability: "relay",
|
Reachability: "relay",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
@@ -77,8 +77,8 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -119,10 +119,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
|||||||
LastVerifiedAt: &now,
|
LastVerifiedAt: &now,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-wss",
|
EndpointID: "node-b-ice",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "wss",
|
Transport: "ice_quic",
|
||||||
Address: "https://node-b.example.test:443",
|
Address: "quic://node-b.example.test:19444",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -148,10 +148,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
t.Fatal("node-b missing from cache")
|
t.Fatal("node-b missing from cache")
|
||||||
}
|
}
|
||||||
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
|
if entry.BestCandidateID != "node-b-ice" || entry.Endpoint != "quic://node-b.example.test:19444" {
|
||||||
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
||||||
}
|
}
|
||||||
if !containsString(entry.BestScoreReasons, "transport:wss") {
|
if !containsString(entry.BestScoreReasons, "transport:ice_quic") {
|
||||||
t.Fatalf("peer cache did not expose score reasons: %+v", entry.BestScoreReasons)
|
t.Fatalf("peer cache did not expose score reasons: %+v", entry.BestScoreReasons)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -161,15 +161,15 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
|||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpoints: map[string]string{
|
PeerEndpoints: map[string]string{
|
||||||
"node-b": "https://node-b.public.example.test:443",
|
"node-b": "quic://node-b.public.example.test:19443",
|
||||||
},
|
},
|
||||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
"node-b": {
|
"node-b": {
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "https://node-b.public.example.test:443",
|
Address: "quic://node-b.public.example.test:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -179,8 +179,8 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-corp-lan",
|
EndpointID: "node-b-corp-lan",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "lan_quic",
|
||||||
Address: "http://10.24.10.20:19001",
|
Address: "quic://10.24.10.20:19443",
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
NATType: "none",
|
NATType: "none",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -199,7 +199,7 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
t.Fatal("node-b missing from peer cache")
|
t.Fatal("node-b missing from peer cache")
|
||||||
}
|
}
|
||||||
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "http://10.24.10.20:19001" {
|
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "quic://10.24.10.20:19443" {
|
||||||
t.Fatalf("peer cache did not choose corp LAN endpoint: %+v", entry)
|
t.Fatalf("peer cache did not choose corp LAN endpoint: %+v", entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ type PeerConnectionIntentPlanConfig struct {
|
|||||||
PeerCache PeerCacheSnapshot
|
PeerCache PeerCacheSnapshot
|
||||||
RecoveryPlan PeerRecoveryPlan
|
RecoveryPlan PeerRecoveryPlan
|
||||||
RendezvousLeases []PeerRendezvousLease
|
RendezvousLeases []PeerRendezvousLease
|
||||||
|
PreferredRegion string
|
||||||
Now time.Time
|
Now time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,12 +63,14 @@ type PeerConnectionIntent struct {
|
|||||||
Reachability string `json:"reachability,omitempty"`
|
Reachability string `json:"reachability,omitempty"`
|
||||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||||
NATType string `json:"nat_type,omitempty"`
|
NATType string `json:"nat_type,omitempty"`
|
||||||
|
Region string `json:"region,omitempty"`
|
||||||
PolicyTags []string `json:"policy_tags,omitempty"`
|
PolicyTags []string `json:"policy_tags,omitempty"`
|
||||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||||
DirectCandidate bool `json:"direct_candidate"`
|
DirectCandidate bool `json:"direct_candidate"`
|
||||||
RelayCandidate bool `json:"relay_candidate"`
|
RelayCandidate bool `json:"relay_candidate"`
|
||||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||||
|
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
|
||||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||||
@@ -103,8 +106,10 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
|
|||||||
Reachability: entry.BestReachability,
|
Reachability: entry.BestReachability,
|
||||||
ConnectivityMode: entry.BestConnectivity,
|
ConnectivityMode: entry.BestConnectivity,
|
||||||
NATType: entry.BestNATType,
|
NATType: entry.BestNATType,
|
||||||
|
Region: entry.BestRegion,
|
||||||
PolicyTags: append([]string{}, entry.BestPolicyTags...),
|
PolicyTags: append([]string{}, entry.BestPolicyTags...),
|
||||||
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
|
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
|
||||||
|
BestPeerCertSHA256: entry.BestPeerCertSHA256,
|
||||||
RendezvousLeaseID: entry.RendezvousLeaseID,
|
RendezvousLeaseID: entry.RendezvousLeaseID,
|
||||||
RelayNodeID: entry.RelayNodeID,
|
RelayNodeID: entry.RelayNodeID,
|
||||||
RelayEndpoint: entry.RelayEndpoint,
|
RelayEndpoint: entry.RelayEndpoint,
|
||||||
@@ -114,13 +119,13 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
|
|||||||
Priority: candidate.Priority,
|
Priority: candidate.Priority,
|
||||||
GeneratedAt: now,
|
GeneratedAt: now,
|
||||||
}
|
}
|
||||||
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent)
|
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent, cfg.PreferredRegion)
|
||||||
intent.TransportMode = mode
|
intent.TransportMode = mode
|
||||||
intent.RequiresRendezvous = requiresRendezvous
|
intent.RequiresRendezvous = requiresRendezvous
|
||||||
intent.DirectCandidate = directCandidate
|
intent.DirectCandidate = directCandidate
|
||||||
if intent.RequiresRendezvous {
|
if intent.RequiresRendezvous {
|
||||||
if lease, ok := rendezvousLeaseForPeer(cfg.RendezvousLeases, intent.NodeID, now); ok {
|
if lease, ok := rendezvousLeaseForPeer(cfg.RendezvousLeases, intent.NodeID, now); ok {
|
||||||
applyRendezvousLease(&intent, lease)
|
applyRendezvousLease(&intent, lease, cfg.PeerCache.LocalNodeID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
intents = append(intents, intent)
|
intents = append(intents, intent)
|
||||||
@@ -185,10 +190,12 @@ func connectionIntentAction(candidate PeerRecoveryCandidate) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
|
func classifyPeerTransport(intent PeerConnectionIntent, preferredRegion string) (string, bool, bool) {
|
||||||
transport := strings.ToLower(strings.TrimSpace(intent.Transport))
|
transport := strings.ToLower(strings.TrimSpace(intent.Transport))
|
||||||
connectivity := strings.ToLower(strings.TrimSpace(intent.ConnectivityMode))
|
connectivity := strings.ToLower(strings.TrimSpace(intent.ConnectivityMode))
|
||||||
reachability := strings.ToLower(strings.TrimSpace(intent.Reachability))
|
reachability := strings.ToLower(strings.TrimSpace(intent.Reachability))
|
||||||
|
region := strings.TrimSpace(intent.Region)
|
||||||
|
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||||
tags := lowerStringSet(intent.PolicyTags)
|
tags := lowerStringSet(intent.PolicyTags)
|
||||||
|
|
||||||
if strings.Contains(transport, "relay") || connectivity == "relay_required" || reachability == "relay" {
|
if strings.Contains(transport, "relay") || connectivity == "relay_required" || reachability == "relay" {
|
||||||
@@ -201,6 +208,9 @@ func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
|
|||||||
return PeerTransportModeCorporateLAN, false, true
|
return PeerTransportModeCorporateLAN, false, true
|
||||||
}
|
}
|
||||||
if tags["private-lan"] || reachability == "private" || endpointHasPrivateHost(intent.Endpoint) {
|
if tags["private-lan"] || reachability == "private" || endpointHasPrivateHost(intent.Endpoint) {
|
||||||
|
if preferredRegion != "" && region != "" && !strings.EqualFold(region, preferredRegion) {
|
||||||
|
return PeerTransportModeRelayRequired, true, false
|
||||||
|
}
|
||||||
return PeerTransportModePrivateLAN, false, true
|
return PeerTransportModePrivateLAN, false, true
|
||||||
}
|
}
|
||||||
if strings.Contains(transport, "direct") || reachability == "public" || connectivity == "direct" {
|
if strings.Contains(transport, "direct") || reachability == "public" || connectivity == "direct" {
|
||||||
@@ -246,9 +256,16 @@ func rendezvousLeaseForPeer(leases []PeerRendezvousLease, peerNodeID string, now
|
|||||||
return candidates[0], true
|
return candidates[0], true
|
||||||
}
|
}
|
||||||
|
|
||||||
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease) {
|
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease, localNodeID string) {
|
||||||
|
localRelay := strings.TrimSpace(lease.RelayNodeID) == strings.TrimSpace(localNodeID)
|
||||||
|
if !localRelay {
|
||||||
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||||
intent.Transport = firstNonEmpty(lease.Transport, "relay_control")
|
}
|
||||||
|
if localRelay {
|
||||||
|
intent.Transport = "reverse_quic"
|
||||||
|
} else {
|
||||||
|
intent.Transport = firstNonEmpty(lease.Transport, "relay_quic")
|
||||||
|
}
|
||||||
intent.TransportMode = PeerTransportModeRelayControl
|
intent.TransportMode = PeerTransportModeRelayControl
|
||||||
intent.RequiresRendezvous = false
|
intent.RequiresRendezvous = false
|
||||||
intent.RendezvousResolved = true
|
intent.RendezvousResolved = true
|
||||||
@@ -256,17 +273,33 @@ func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLeas
|
|||||||
intent.RelayCandidate = true
|
intent.RelayCandidate = true
|
||||||
intent.RendezvousLeaseID = lease.LeaseID
|
intent.RendezvousLeaseID = lease.LeaseID
|
||||||
intent.RelayNodeID = lease.RelayNodeID
|
intent.RelayNodeID = lease.RelayNodeID
|
||||||
intent.RelayEndpoint = intent.Endpoint
|
intent.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||||
intent.ControlPlaneOnly = true
|
intent.ControlPlaneOnly = true
|
||||||
|
if certSHA256 := rendezvousLeasePeerCertSHA256(lease); certSHA256 != "" && !localRelay {
|
||||||
|
intent.BestPeerCertSHA256 = certSHA256
|
||||||
|
}
|
||||||
if lease.ConnectivityMode != "" {
|
if lease.ConnectivityMode != "" {
|
||||||
intent.ConnectivityMode = lease.ConnectivityMode
|
intent.ConnectivityMode = lease.ConnectivityMode
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func endpointHasPrivateHost(rawEndpoint string) bool {
|
func endpointHasPrivateHost(rawEndpoint string) bool {
|
||||||
|
addr, ok := endpointHostAddr(rawEndpoint)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
|
||||||
|
}
|
||||||
|
|
||||||
|
func endpointHasUnspecifiedHost(rawEndpoint string) bool {
|
||||||
|
addr, ok := endpointHostAddr(rawEndpoint)
|
||||||
|
return ok && addr.IsUnspecified()
|
||||||
|
}
|
||||||
|
|
||||||
|
func endpointHostAddr(rawEndpoint string) (netip.Addr, bool) {
|
||||||
rawEndpoint = strings.TrimSpace(rawEndpoint)
|
rawEndpoint = strings.TrimSpace(rawEndpoint)
|
||||||
if rawEndpoint == "" {
|
if rawEndpoint == "" {
|
||||||
return false
|
return netip.Addr{}, false
|
||||||
}
|
}
|
||||||
host := rawEndpoint
|
host := rawEndpoint
|
||||||
if parsed, err := url.Parse(rawEndpoint); err == nil && parsed.Host != "" {
|
if parsed, err := url.Parse(rawEndpoint); err == nil && parsed.Host != "" {
|
||||||
@@ -277,9 +310,9 @@ func endpointHasPrivateHost(rawEndpoint string) bool {
|
|||||||
}
|
}
|
||||||
addr, err := netip.ParseAddr(strings.Trim(host, "[]"))
|
addr, err := netip.ParseAddr(strings.Trim(host, "[]"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return netip.Addr{}, false
|
||||||
}
|
}
|
||||||
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
|
return addr, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func lowerStringSet(values []string) map[string]bool {
|
func lowerStringSet(values []string) map[string]bool {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package mesh
|
package mesh
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -11,8 +12,8 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
|
|||||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "http://10.24.10.20:19001",
|
Endpoint: "quic://10.24.10.20:19443",
|
||||||
BestTransport: "direct_tcp_tls",
|
BestTransport: "lan_quic",
|
||||||
BestReachability: "private",
|
BestReachability: "private",
|
||||||
BestConnectivity: "direct",
|
BestConnectivity: "direct",
|
||||||
BestPolicyTags: []string{"corp-lan", "same-site"},
|
BestPolicyTags: []string{"corp-lan", "same-site"},
|
||||||
@@ -23,7 +24,7 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
|
|||||||
Candidates: []PeerRecoveryCandidate{
|
Candidates: []PeerRecoveryCandidate{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "http://10.24.10.20:19001",
|
Endpoint: "quic://10.24.10.20:19443",
|
||||||
ConnectionState: PeerConnectionReady,
|
ConnectionState: PeerConnectionReady,
|
||||||
Reason: "maintain_ready",
|
Reason: "maintain_ready",
|
||||||
Priority: 100,
|
Priority: 100,
|
||||||
@@ -48,15 +49,15 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
|||||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "https://node-b.example.test:443",
|
Endpoint: "quic://node-b.example.test:19443",
|
||||||
BestTransport: "direct_tcp_tls",
|
BestTransport: "reverse_quic",
|
||||||
BestReachability: "outbound_only",
|
BestReachability: "outbound_only",
|
||||||
BestConnectivity: "outbound_only",
|
BestConnectivity: "outbound_only",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NodeID: "node-c",
|
NodeID: "node-c",
|
||||||
Endpoint: "relay://fabric-relay/node-c",
|
Endpoint: "relay://fabric-relay/node-c",
|
||||||
BestTransport: "relay",
|
BestTransport: "relay_quic",
|
||||||
BestReachability: "relay",
|
BestReachability: "relay",
|
||||||
BestConnectivity: "relay_required",
|
BestConnectivity: "relay_required",
|
||||||
},
|
},
|
||||||
@@ -66,7 +67,7 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
|||||||
Candidates: []PeerRecoveryCandidate{
|
Candidates: []PeerRecoveryCandidate{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "https://node-b.example.test:443",
|
Endpoint: "quic://node-b.example.test:19443",
|
||||||
ConnectionState: PeerConnectionDisconnected,
|
ConnectionState: PeerConnectionDisconnected,
|
||||||
Reason: "recover_warm",
|
Reason: "recover_warm",
|
||||||
Priority: 90,
|
Priority: 90,
|
||||||
@@ -91,6 +92,42 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPeerConnectionIntentsRequireRendezvousForRemotePrivateRegion(t *testing.T) {
|
||||||
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
|
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||||
|
PreferredRegion: "ifcm",
|
||||||
|
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||||
|
{
|
||||||
|
NodeID: "node-b",
|
||||||
|
Endpoint: "quic://192.168.200.61:19132",
|
||||||
|
BestTransport: "direct_quic",
|
||||||
|
BestReachability: "private",
|
||||||
|
BestConnectivity: "private_lan",
|
||||||
|
BestRegion: "docker-test",
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
RecoveryPlan: PeerRecoveryPlan{
|
||||||
|
Mode: PeerRecoveryModeRecovery,
|
||||||
|
Candidates: []PeerRecoveryCandidate{{
|
||||||
|
NodeID: "node-b",
|
||||||
|
Endpoint: "quic://192.168.200.61:19132",
|
||||||
|
ConnectionState: PeerConnectionDisconnected,
|
||||||
|
Reason: "recover_warm",
|
||||||
|
Priority: 100,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
Now: now,
|
||||||
|
})
|
||||||
|
|
||||||
|
if plan.IntentCount != 1 || plan.RelayRequiredCount != 1 || plan.RendezvousRequiredCount != 1 {
|
||||||
|
t.Fatalf("unexpected remote private plan counts: %+v", plan)
|
||||||
|
}
|
||||||
|
intent := plan.Intents[0]
|
||||||
|
if intent.DirectCandidate || !intent.RequiresRendezvous || intent.TransportMode != PeerTransportModeRelayRequired {
|
||||||
|
t.Fatalf("unexpected remote private intent: %+v", intent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||||
@@ -120,13 +157,14 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
|||||||
LeaseID: "lease-node-b-via-node-r",
|
LeaseID: "lease-node-b-via-node-r",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r",
|
RelayNodeID: "node-r",
|
||||||
RelayEndpoint: "http://node-r:19000",
|
RelayEndpoint: "quic://node-r:19443",
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
ControlPlaneOnly: true,
|
ControlPlaneOnly: true,
|
||||||
IssuedAt: now.Add(-time.Minute),
|
IssuedAt: now.Add(-time.Minute),
|
||||||
ExpiresAt: now.Add(time.Minute),
|
ExpiresAt: now.Add(time.Minute),
|
||||||
|
Metadata: peerConnectionIntentLeaseMetadata(t, "abc123"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Now: now,
|
Now: now,
|
||||||
@@ -137,9 +175,10 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
|||||||
}
|
}
|
||||||
intent := plan.Intents[0]
|
intent := plan.Intents[0]
|
||||||
if intent.TransportMode != PeerTransportModeRelayControl ||
|
if intent.TransportMode != PeerTransportModeRelayControl ||
|
||||||
intent.Endpoint != "http://node-r:19000" ||
|
intent.Endpoint != "quic://node-r:19443" ||
|
||||||
intent.RelayNodeID != "node-r" ||
|
intent.RelayNodeID != "node-r" ||
|
||||||
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
|
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
|
||||||
|
intent.BestPeerCertSHA256 != "abc123" ||
|
||||||
!intent.RelayCandidate ||
|
!intent.RelayCandidate ||
|
||||||
!intent.RendezvousResolved ||
|
!intent.RendezvousResolved ||
|
||||||
intent.RequiresRendezvous {
|
intent.RequiresRendezvous {
|
||||||
@@ -176,8 +215,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
|||||||
LeaseID: "lease-expired-preferred",
|
LeaseID: "lease-expired-preferred",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r-old",
|
RelayNodeID: "node-r-old",
|
||||||
RelayEndpoint: "http://node-r-old:19000",
|
RelayEndpoint: "quic://node-r-old:19443",
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
Priority: 1,
|
Priority: 1,
|
||||||
ControlPlaneOnly: true,
|
ControlPlaneOnly: true,
|
||||||
@@ -188,8 +227,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
|||||||
LeaseID: "lease-active-reselected",
|
LeaseID: "lease-active-reselected",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r-new",
|
RelayNodeID: "node-r-new",
|
||||||
RelayEndpoint: "http://node-r-new:19000",
|
RelayEndpoint: "quic://node-r-new:19443",
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
Priority: 20,
|
Priority: 20,
|
||||||
ControlPlaneOnly: true,
|
ControlPlaneOnly: true,
|
||||||
@@ -206,20 +245,29 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
|||||||
intent := plan.Intents[0]
|
intent := plan.Intents[0]
|
||||||
if intent.RendezvousLeaseID != "lease-active-reselected" ||
|
if intent.RendezvousLeaseID != "lease-active-reselected" ||
|
||||||
intent.RelayNodeID != "node-r-new" ||
|
intent.RelayNodeID != "node-r-new" ||
|
||||||
intent.Endpoint != "http://node-r-new:19000" {
|
intent.Endpoint != "quic://node-r-new:19443" {
|
||||||
t.Fatalf("expired lease was not skipped: %+v", intent)
|
t.Fatalf("expired lease was not skipped: %+v", intent)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func peerConnectionIntentLeaseMetadata(t *testing.T, certSHA256 string) json.RawMessage {
|
||||||
|
t.Helper()
|
||||||
|
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal metadata: %v", err)
|
||||||
|
}
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
func TestPeerConnectionIntentsClassifyPrivateEndpointWithoutCandidateHints(t *testing.T) {
|
func TestPeerConnectionIntentsClassifyPrivateEndpointWithoutCandidateHints(t *testing.T) {
|
||||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||||
{NodeID: "node-b", Endpoint: "http://192.168.10.20:19001"},
|
{NodeID: "node-b", Endpoint: "quic://192.168.10.20:19443"},
|
||||||
}},
|
}},
|
||||||
RecoveryPlan: PeerRecoveryPlan{Candidates: []PeerRecoveryCandidate{
|
RecoveryPlan: PeerRecoveryPlan{Candidates: []PeerRecoveryCandidate{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "http://192.168.10.20:19001",
|
Endpoint: "quic://192.168.10.20:19443",
|
||||||
ConnectionState: PeerConnectionDisconnected,
|
ConnectionState: PeerConnectionDisconnected,
|
||||||
Reason: "recover_peer",
|
Reason: "recover_peer",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package mesh
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -25,6 +26,8 @@ type PeerConnectionManagerConfig struct {
|
|||||||
Tracker *PeerConnectionTracker
|
Tracker *PeerConnectionTracker
|
||||||
RendezvousLeases []PeerRendezvousLease
|
RendezvousLeases []PeerRendezvousLease
|
||||||
HTTPClient *http.Client
|
HTTPClient *http.Client
|
||||||
|
QUICTransport *QUICFabricTransport
|
||||||
|
PreferredRegion string
|
||||||
ProbeTimeout time.Duration
|
ProbeTimeout time.Duration
|
||||||
Now func() time.Time
|
Now func() time.Time
|
||||||
}
|
}
|
||||||
@@ -35,6 +38,8 @@ type PeerConnectionManager struct {
|
|||||||
tracker *PeerConnectionTracker
|
tracker *PeerConnectionTracker
|
||||||
rendezvousLeases []PeerRendezvousLease
|
rendezvousLeases []PeerRendezvousLease
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
|
quicTransport *QUICFabricTransport
|
||||||
|
preferredRegion string
|
||||||
probeTimeout time.Duration
|
probeTimeout time.Duration
|
||||||
now func() time.Time
|
now func() time.Time
|
||||||
|
|
||||||
@@ -104,6 +109,7 @@ type peerConnectionProbeTarget struct {
|
|||||||
CandidateID string
|
CandidateID string
|
||||||
Endpoint string
|
Endpoint string
|
||||||
Transport string
|
Transport string
|
||||||
|
PeerCertSHA256 string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
|
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
|
||||||
@@ -132,6 +138,8 @@ func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionMa
|
|||||||
tracker: cfg.Tracker,
|
tracker: cfg.Tracker,
|
||||||
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
|
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
|
||||||
httpClient: httpClient,
|
httpClient: httpClient,
|
||||||
|
quicTransport: cfg.QUICTransport,
|
||||||
|
preferredRegion: strings.TrimSpace(cfg.PreferredRegion),
|
||||||
probeTimeout: probeTimeout,
|
probeTimeout: probeTimeout,
|
||||||
now: now,
|
now: now,
|
||||||
}
|
}
|
||||||
@@ -155,6 +163,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
|||||||
PeerCache: peerSnapshot,
|
PeerCache: peerSnapshot,
|
||||||
RecoveryPlan: recoveryPlan,
|
RecoveryPlan: recoveryPlan,
|
||||||
RendezvousLeases: rendezvousLeases,
|
RendezvousLeases: rendezvousLeases,
|
||||||
|
PreferredRegion: m.preferredRegion,
|
||||||
Now: startedAt,
|
Now: startedAt,
|
||||||
})
|
})
|
||||||
entriesByNode := map[string]PeerCacheEntry{}
|
entriesByNode := map[string]PeerCacheEntry{}
|
||||||
@@ -215,6 +224,15 @@ func (m *PeerConnectionManager) UpdatePeerConfig(peerCache *PeerCache, rendezvou
|
|||||||
m.rendezvousLeases = append([]PeerRendezvousLease{}, rendezvousLeases...)
|
m.rendezvousLeases = append([]PeerRendezvousLease{}, rendezvousLeases...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *PeerConnectionManager) UpdateQUICTransport(transport *QUICFabricTransport) {
|
||||||
|
if m == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.quicTransport = transport
|
||||||
|
}
|
||||||
|
|
||||||
func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvousLease) {
|
func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvousLease) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@@ -253,6 +271,7 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
|||||||
RelayNodeID: intent.RelayNodeID,
|
RelayNodeID: intent.RelayNodeID,
|
||||||
RelayEndpoint: intent.RelayEndpoint,
|
RelayEndpoint: intent.RelayEndpoint,
|
||||||
RelayControl: intent.RelayCandidate,
|
RelayControl: intent.RelayCandidate,
|
||||||
|
BestPeerCertSHA256: firstNonEmpty(intent.BestPeerCertSHA256, cacheEntry.BestPeerCertSHA256),
|
||||||
}
|
}
|
||||||
if intent.RequiresRendezvous {
|
if intent.RequiresRendezvous {
|
||||||
result.LinkStatus = PeerConnectionProbeDeferred
|
result.LinkStatus = PeerConnectionProbeDeferred
|
||||||
@@ -282,13 +301,12 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
|||||||
ClusterID: m.local.ClusterID,
|
ClusterID: m.local.ClusterID,
|
||||||
NodeID: intent.NodeID,
|
NodeID: intent.NodeID,
|
||||||
}
|
}
|
||||||
if intent.RelayCandidate && intent.RelayNodeID != "" {
|
target.NodeID = peerConnectionProbeTargetNodeID(intent, m.local.NodeID)
|
||||||
target.NodeID = intent.RelayNodeID
|
|
||||||
}
|
|
||||||
targets := []peerConnectionProbeTarget{{
|
targets := []peerConnectionProbeTarget{{
|
||||||
CandidateID: intent.BestCandidateID,
|
CandidateID: intent.BestCandidateID,
|
||||||
Endpoint: intent.Endpoint,
|
Endpoint: intent.Endpoint,
|
||||||
Transport: intent.Transport,
|
Transport: intent.Transport,
|
||||||
|
PeerCertSHA256: intent.BestPeerCertSHA256,
|
||||||
}}
|
}}
|
||||||
if intent.DirectCandidate {
|
if intent.DirectCandidate {
|
||||||
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
||||||
@@ -300,13 +318,14 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
|||||||
probePeer.BestCandidateID = strings.TrimSpace(probeTarget.CandidateID)
|
probePeer.BestCandidateID = strings.TrimSpace(probeTarget.CandidateID)
|
||||||
probePeer.BestCandidateAddr = probePeer.Endpoint
|
probePeer.BestCandidateAddr = probePeer.Endpoint
|
||||||
probePeer.BestTransport = strings.TrimSpace(probeTarget.Transport)
|
probePeer.BestTransport = strings.TrimSpace(probeTarget.Transport)
|
||||||
|
probePeer.BestPeerCertSHA256 = firstNonEmpty(probeTarget.PeerCertSHA256, probePeer.BestPeerCertSHA256)
|
||||||
if probePeer.Endpoint == "" {
|
if probePeer.Endpoint == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
candidateStartedAt := normalizedNow(m.now())
|
candidateStartedAt := normalizedNow(m.now())
|
||||||
m.tracker.BeginProbe(probePeer, candidateStartedAt)
|
m.tracker.BeginProbe(probePeer, candidateStartedAt)
|
||||||
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
||||||
_, err := NewClient(probePeer.Endpoint).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
|
err := m.probePeerTarget(probeCtx, probePeer, target)
|
||||||
cancel()
|
cancel()
|
||||||
completedAt := normalizedNow(m.now())
|
completedAt := normalizedNow(m.now())
|
||||||
candidateResult := PeerConnectionCandidateProbeResult{
|
candidateResult := PeerConnectionCandidateProbeResult{
|
||||||
@@ -354,14 +373,51 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func peerConnectionProbeTargetNodeID(intent PeerConnectionIntent, localNodeID string) string {
|
||||||
|
if intent.RelayCandidate && strings.TrimSpace(intent.RelayNodeID) != "" && strings.TrimSpace(intent.RelayNodeID) != strings.TrimSpace(localNodeID) {
|
||||||
|
return intent.RelayNodeID
|
||||||
|
}
|
||||||
|
return intent.NodeID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer PeerCacheEntry, target PeerIdentity) error {
|
||||||
|
endpoint := strings.TrimRight(strings.TrimSpace(probePeer.Endpoint), "/")
|
||||||
|
transport := strings.TrimSpace(probePeer.BestTransport)
|
||||||
|
if hasLegacyEndpointScheme(endpoint) {
|
||||||
|
return fmt.Errorf("non_quic_probe_rejected")
|
||||||
|
}
|
||||||
|
if peerConnectionTargetIsQUIC(transport, endpoint) {
|
||||||
|
carrier, selectedTarget, err := FabricTransportForTarget(FabricTransportTarget{
|
||||||
|
EndpointID: probePeer.BestCandidateID,
|
||||||
|
PeerID: target.NodeID,
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Transport: transport,
|
||||||
|
Timeout: m.probeTimeout,
|
||||||
|
PeerCertSHA256: strings.TrimSpace(probePeer.BestPeerCertSHA256),
|
||||||
|
}, m.quicTransport)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
session, err := carrier.Connect(ctx, selectedTarget)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return session.Close()
|
||||||
|
}
|
||||||
|
return fmt.Errorf("non_quic_probe_rejected")
|
||||||
|
}
|
||||||
|
|
||||||
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
||||||
seen := map[string]struct{}{}
|
seen := map[string]struct{}{}
|
||||||
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
||||||
add := func(candidateID, endpoint, transport string) {
|
add := func(candidateID, endpoint, transport, peerCertSHA256 string) {
|
||||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||||
if endpoint == "" {
|
if endpoint == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if endpointHasUnspecifiedHost(endpoint) {
|
||||||
|
return
|
||||||
|
}
|
||||||
key := candidateID + "|" + endpoint
|
key := candidateID + "|" + endpoint
|
||||||
if _, ok := seen[key]; ok {
|
if _, ok := seen[key]; ok {
|
||||||
return
|
return
|
||||||
@@ -371,30 +427,43 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
|||||||
CandidateID: strings.TrimSpace(candidateID),
|
CandidateID: strings.TrimSpace(candidateID),
|
||||||
Endpoint: endpoint,
|
Endpoint: endpoint,
|
||||||
Transport: strings.TrimSpace(transport),
|
Transport: strings.TrimSpace(transport),
|
||||||
|
PeerCertSHA256: strings.TrimSpace(peerCertSHA256),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||||
if !candidateUsableForDirectProbe(candidate) {
|
if !candidateUsableForDirectProbe(candidate) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
add(candidate.EndpointID, candidate.Address, candidate.Transport)
|
add(candidate.EndpointID, candidate.Address, candidate.Transport, candidatePeerCertSHA256(candidate))
|
||||||
}
|
}
|
||||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport)
|
add(intent.BestCandidateID, intent.Endpoint, intent.Transport, cacheEntry.BestPeerCertSHA256)
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func peerConnectionTargetIsQUIC(transport string, endpoint string) bool {
|
||||||
|
return isQUICOnlyCandidateTransport(transport) || strings.HasPrefix(strings.ToLower(strings.TrimSpace(endpoint)), "quic://")
|
||||||
|
}
|
||||||
|
|
||||||
func candidateUsableForDirectProbe(candidate PeerEndpointCandidate) bool {
|
func candidateUsableForDirectProbe(candidate PeerEndpointCandidate) bool {
|
||||||
endpoint := strings.TrimSpace(candidate.Address)
|
endpoint := strings.TrimSpace(candidate.Address)
|
||||||
if endpoint == "" || strings.HasPrefix(endpoint, "relay://") || strings.HasPrefix(endpoint, "outbound://") {
|
if endpoint == "" || strings.HasPrefix(endpoint, "relay://") || strings.HasPrefix(endpoint, "outbound://") {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if endpointHasUnspecifiedHost(endpoint) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||||
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
||||||
if connectivity == "outbound_only" || connectivity == "relay_required" || reachability == "outbound_only" || reachability == "relay" {
|
if connectivity == "outbound_only" || connectivity == "relay_required" || reachability == "outbound_only" || reachability == "relay" {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return transport == "" || strings.Contains(transport, "direct") || transport == "wss" || strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://")
|
return transport == "" ||
|
||||||
|
strings.Contains(transport, "direct_quic") ||
|
||||||
|
transport == "quic" ||
|
||||||
|
transport == "lan_quic" ||
|
||||||
|
transport == "ice_quic" ||
|
||||||
|
strings.HasPrefix(endpoint, "quic://")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
|
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ package mesh
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -11,12 +11,18 @@ import (
|
|||||||
func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
||||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
current := now
|
current := now
|
||||||
server := httptest.NewServer(Server{
|
tlsConfig := testQUICTLSConfig(t)
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
}.Handler())
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: tlsConfig,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
@@ -24,19 +30,20 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-direct",
|
EndpointID: "node-b-direct",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: server.URL,
|
Address: "quic://" + server.Addr().String(),
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
PolicyTags: []string{"corp-lan", "same-site"},
|
PolicyTags: []string{"corp-lan", "same-site"},
|
||||||
Priority: 1,
|
Priority: 1,
|
||||||
|
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"node-c": {
|
"node-c": {
|
||||||
{
|
{
|
||||||
EndpointID: "node-c-relay",
|
EndpointID: "node-c-relay",
|
||||||
NodeID: "node-c",
|
NodeID: "node-c",
|
||||||
Transport: "relay",
|
Transport: "relay_quic",
|
||||||
Address: "relay://fabric/node-c",
|
Address: "relay://fabric/node-c",
|
||||||
Reachability: "relay",
|
Reachability: "relay",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
@@ -52,6 +59,7 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
|||||||
Local: local,
|
Local: local,
|
||||||
PeerCache: cache,
|
PeerCache: cache,
|
||||||
Tracker: tracker,
|
Tracker: tracker,
|
||||||
|
QUICTransport: NewQUICFabricTransport(nil),
|
||||||
ProbeTimeout: time.Second,
|
ProbeTimeout: time.Second,
|
||||||
Now: func() time.Time {
|
Now: func() time.Time {
|
||||||
current = current.Add(10 * time.Millisecond)
|
current = current.Add(10 * time.Millisecond)
|
||||||
@@ -116,24 +124,31 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
|
|||||||
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
current := now
|
current := now
|
||||||
server := httptest.NewServer(Server{
|
tlsConfig := testQUICTLSConfig(t)
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
}.Handler())
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: tlsConfig,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||||
leases := []PeerRendezvousLease{
|
leases := []PeerRendezvousLease{
|
||||||
{
|
{
|
||||||
LeaseID: "lease-node-b-via-node-r",
|
LeaseID: "lease-node-b-via-node-r",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r",
|
RelayNodeID: "node-r",
|
||||||
RelayEndpoint: server.URL,
|
RelayEndpoint: "quic://" + server.Addr().String(),
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
ControlPlaneOnly: true,
|
ControlPlaneOnly: true,
|
||||||
IssuedAt: now.Add(-time.Minute),
|
IssuedAt: now.Add(-time.Minute),
|
||||||
ExpiresAt: now.Add(time.Minute),
|
ExpiresAt: now.Add(time.Minute),
|
||||||
|
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
@@ -143,7 +158,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-relay",
|
EndpointID: "node-b-relay",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "relay",
|
Transport: "relay_quic",
|
||||||
Address: "relay://fabric/node-b",
|
Address: "relay://fabric/node-b",
|
||||||
Reachability: "relay",
|
Reachability: "relay",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
@@ -161,6 +176,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
|||||||
PeerCache: cache,
|
PeerCache: cache,
|
||||||
Tracker: tracker,
|
Tracker: tracker,
|
||||||
RendezvousLeases: leases,
|
RendezvousLeases: leases,
|
||||||
|
QUICTransport: NewQUICFabricTransport(nil),
|
||||||
ProbeTimeout: time.Second,
|
ProbeTimeout: time.Second,
|
||||||
Now: func() time.Time {
|
Now: func() time.Time {
|
||||||
current = current.Add(10 * time.Millisecond)
|
current = current.Add(10 * time.Millisecond)
|
||||||
@@ -189,15 +205,37 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPeerConnectionProbeTargetKeepsPeerForLocalRelayReverseQUIC(t *testing.T) {
|
||||||
|
intent := PeerConnectionIntent{
|
||||||
|
NodeID: "node-b",
|
||||||
|
RelayCandidate: true,
|
||||||
|
RelayNodeID: "node-a",
|
||||||
|
Transport: "reverse_quic",
|
||||||
|
}
|
||||||
|
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-b" {
|
||||||
|
t.Fatalf("local relay reverse probe target = %q, want peer node-b", got)
|
||||||
|
}
|
||||||
|
intent.RelayNodeID = "node-r"
|
||||||
|
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-r" {
|
||||||
|
t.Fatalf("remote relay probe target = %q, want relay node-r", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||||
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
|
||||||
current := now
|
current := now
|
||||||
server := httptest.NewServer(Server{
|
tlsConfig := testQUICTLSConfig(t)
|
||||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
}.Handler())
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: tlsConfig,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||||
cache := NewPeerCache(PeerCacheConfig{
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
Local: local,
|
Local: local,
|
||||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
@@ -205,8 +243,8 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-dead",
|
EndpointID: "node-b-dead",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_http",
|
Transport: "lan_quic",
|
||||||
Address: "http://127.0.0.1:1",
|
Address: "quic://127.0.0.1:1",
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
ConnectivityMode: "private_lan",
|
ConnectivityMode: "private_lan",
|
||||||
Priority: 1,
|
Priority: 1,
|
||||||
@@ -214,11 +252,12 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-live",
|
EndpointID: "node-b-live",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_http",
|
Transport: "lan_quic",
|
||||||
Address: server.URL,
|
Address: "quic://" + server.Addr().String(),
|
||||||
Reachability: "private",
|
Reachability: "private",
|
||||||
ConnectivityMode: "private_lan",
|
ConnectivityMode: "private_lan",
|
||||||
Priority: 2,
|
Priority: 2,
|
||||||
|
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -230,7 +269,7 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
|||||||
Local: local,
|
Local: local,
|
||||||
PeerCache: cache,
|
PeerCache: cache,
|
||||||
Tracker: tracker,
|
Tracker: tracker,
|
||||||
HTTPClient: &http.Client{Timeout: 100 * time.Millisecond},
|
QUICTransport: NewQUICFabricTransport(nil),
|
||||||
ProbeTimeout: 100 * time.Millisecond,
|
ProbeTimeout: 100 * time.Millisecond,
|
||||||
Now: func() time.Time {
|
Now: func() time.Time {
|
||||||
current = current.Add(10 * time.Millisecond)
|
current = current.Add(10 * time.Millisecond)
|
||||||
@@ -243,7 +282,7 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
|||||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||||
}
|
}
|
||||||
result := cycle.Results[0]
|
result := cycle.Results[0]
|
||||||
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != server.URL {
|
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||||
t.Fatalf("fallback did not select live candidate: %+v", result)
|
t.Fatalf("fallback did not select live candidate: %+v", result)
|
||||||
}
|
}
|
||||||
if len(result.CandidateResults) != 2 ||
|
if len(result.CandidateResults) != 2 ||
|
||||||
@@ -252,7 +291,85 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
|||||||
t.Fatalf("candidate probe trail mismatch: %+v", result.CandidateResults)
|
t.Fatalf("candidate probe trail mismatch: %+v", result.CandidateResults)
|
||||||
}
|
}
|
||||||
snapshot := tracker.Snapshot()
|
snapshot := tracker.Snapshot()
|
||||||
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != server.URL {
|
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != "quic://"+server.Addr().String() {
|
||||||
t.Fatalf("tracker did not retain selected candidate: %+v", snapshot)
|
t.Fatalf("tracker did not retain selected candidate: %+v", snapshot)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPeerConnectionManagerSkipsUnspecifiedQUICCandidates(t *testing.T) {
|
||||||
|
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
|
||||||
|
current := now
|
||||||
|
tlsConfig := testQUICTLSConfig(t)
|
||||||
|
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||||
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TLSConfig: tlsConfig,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start quic fabric server: %v", err)
|
||||||
|
}
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||||
|
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||||
|
cache := NewPeerCache(PeerCacheConfig{
|
||||||
|
Local: local,
|
||||||
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
|
"node-b": {
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-unspecified-v6",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://[::]:19131",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-live",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "quic://" + server.Addr().String(),
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
Priority: 2,
|
||||||
|
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
WarmPeerLimit: 1,
|
||||||
|
Now: now,
|
||||||
|
})
|
||||||
|
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||||
|
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||||
|
Local: local,
|
||||||
|
PeerCache: cache,
|
||||||
|
Tracker: tracker,
|
||||||
|
QUICTransport: NewQUICFabricTransport(nil),
|
||||||
|
ProbeTimeout: time.Second,
|
||||||
|
Now: func() time.Time {
|
||||||
|
current = current.Add(10 * time.Millisecond)
|
||||||
|
return current
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
cycle := manager.ProbeOnce(context.Background())
|
||||||
|
if cycle.Attempted != 1 || cycle.Succeeded != 1 || len(cycle.Results) != 1 {
|
||||||
|
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||||
|
}
|
||||||
|
result := cycle.Results[0]
|
||||||
|
if result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||||
|
t.Fatalf("manager did not skip unspecified endpoint: %+v", result)
|
||||||
|
}
|
||||||
|
if len(result.CandidateResults) != 1 || result.CandidateResults[0].CandidateID != "node-b-live" {
|
||||||
|
t.Fatalf("unspecified endpoint should not be probed: %+v", result.CandidateResults)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func peerConnectionProbeMetadata(t *testing.T, certSHA256 string) json.RawMessage {
|
||||||
|
t.Helper()
|
||||||
|
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal probe metadata: %v", err)
|
||||||
|
}
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ func TestPeerConnectionTrackerTransitionsReadyAndDegraded(t *testing.T) {
|
|||||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||||
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
||||||
Entries: []PeerCacheEntry{
|
Entries: []PeerCacheEntry{
|
||||||
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "http://node-b:19000"},
|
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "quic://node-b:19443"},
|
||||||
},
|
},
|
||||||
}, now)
|
}, now)
|
||||||
|
|
||||||
|
|||||||
@@ -76,12 +76,12 @@ func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
|||||||
Entries: []PeerCacheEntry{
|
Entries: []PeerCacheEntry{
|
||||||
{
|
{
|
||||||
NodeID: "node-c",
|
NodeID: "node-c",
|
||||||
Endpoint: "http://relay:19001",
|
Endpoint: "quic://relay:19443",
|
||||||
Warm: true,
|
Warm: true,
|
||||||
WarmReason: "rendezvous_lease",
|
WarmReason: "rendezvous_lease",
|
||||||
RendezvousLeaseID: "lease-1",
|
RendezvousLeaseID: "lease-1",
|
||||||
RelayNodeID: "node-r",
|
RelayNodeID: "node-r",
|
||||||
RelayEndpoint: "http://relay:19001",
|
RelayEndpoint: "quic://relay:19443",
|
||||||
RelayControl: true,
|
RelayControl: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -121,7 +121,7 @@ func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
|||||||
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
|
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
|
||||||
return PeerCacheEntry{
|
return PeerCacheEntry{
|
||||||
NodeID: nodeID,
|
NodeID: nodeID,
|
||||||
Endpoint: "http://" + nodeID + ":19001",
|
Endpoint: "quic://" + nodeID + ":19443",
|
||||||
Warm: warm,
|
Warm: warm,
|
||||||
WarmReason: warmReason,
|
WarmReason: warmReason,
|
||||||
RecoverySeed: recoverySeed,
|
RecoverySeed: recoverySeed,
|
||||||
|
|||||||
@@ -2,42 +2,369 @@ package mesh
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"net/http"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ProductionForwardTransport interface {
|
type ProductionForwardTransport interface {
|
||||||
SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error)
|
SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type HTTPProductionForwardTransport struct {
|
type QUICProductionForwardTransport struct {
|
||||||
PeerURLs map[string]string
|
Targets map[string]FabricTransportTarget
|
||||||
HTTPClient *http.Client
|
RouteSets map[string]FabricRouteSet
|
||||||
|
Transport FabricTransport
|
||||||
|
Router FabricChannelRouter
|
||||||
|
Timeout time.Duration
|
||||||
|
Pressure *FabricRoutePressureTracker
|
||||||
|
Health *FabricRouteHealthTracker
|
||||||
|
sequence atomic.Uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHTTPProductionForwardTransport(peerURLs map[string]string) *HTTPProductionForwardTransport {
|
type QUICProductionForwardTransportSnapshot struct {
|
||||||
normalized := make(map[string]string, len(peerURLs))
|
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
|
||||||
for nodeID, baseURL := range peerURLs {
|
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewQUICProductionForwardTransport(targets map[string]FabricTransportTarget, transport *QUICFabricTransport) *QUICProductionForwardTransport {
|
||||||
|
routeSets := make(map[string]FabricRouteSet, len(targets))
|
||||||
|
for nodeID, target := range targets {
|
||||||
nodeID = strings.TrimSpace(nodeID)
|
nodeID = strings.TrimSpace(nodeID)
|
||||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
|
||||||
if nodeID != "" && baseURL != "" {
|
target.Transport = strings.TrimSpace(target.Transport)
|
||||||
normalized[nodeID] = baseURL
|
if nodeID != "" && target.Endpoint != "" {
|
||||||
|
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), nodeID)
|
||||||
|
routeSets[nodeID] = FabricRouteSetForTransportTargets("", "", nodeID, []FabricTransportTarget{target})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &HTTPProductionForwardTransport{PeerURLs: normalized}
|
if transport == nil {
|
||||||
|
transport = NewQUICFabricTransport(nil)
|
||||||
|
}
|
||||||
|
return NewQUICProductionForwardTransportFromRouteSets(routeSets, transport)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *HTTPProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
func NewQUICProductionForwardTransportFromRouteSets(routeSets map[string]FabricRouteSet, transport FabricTransport) *QUICProductionForwardTransport {
|
||||||
|
normalizedRouteSets := make(map[string]FabricRouteSet, len(routeSets))
|
||||||
|
targets := make(map[string]FabricTransportTarget, len(routeSets))
|
||||||
|
for nodeID, routeSet := range routeSets {
|
||||||
|
nodeID = strings.TrimSpace(nodeID)
|
||||||
|
if nodeID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
normalizedRouteSets[nodeID] = routeSet
|
||||||
|
if target, err := FabricTransportTargetForRoute(routeSet.Primary); err == nil {
|
||||||
|
targets[nodeID] = target
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if transport == nil {
|
||||||
|
transport = NewQUICFabricTransport(nil)
|
||||||
|
}
|
||||||
|
return &QUICProductionForwardTransport{
|
||||||
|
Targets: targets,
|
||||||
|
RouteSets: normalizedRouteSets,
|
||||||
|
Transport: transport,
|
||||||
|
Router: NewFabricChannelRouter(FabricChannelRouterConfig{
|
||||||
|
MaxAckLatencyMs: 2000,
|
||||||
|
MinRerouteInterval: 50 * time.Millisecond,
|
||||||
|
}),
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
Pressure: NewFabricRoutePressureTracker(),
|
||||||
|
Health: NewFabricRouteHealthTracker(30 * time.Second),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||||
|
if t == nil || t.Transport == nil {
|
||||||
|
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
nextNodeID = strings.TrimSpace(nextNodeID)
|
||||||
|
routeSet, ok := t.RouteSets[nextNodeID]
|
||||||
|
if !ok {
|
||||||
|
target, targetOK := t.Targets[nextNodeID]
|
||||||
|
if !targetOK || strings.TrimSpace(target.Endpoint) == "" {
|
||||||
|
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
routeSet = FabricRouteSetForTransportTargets(envelope.ClusterID, envelope.CurrentHopNodeID, nextNodeID, []FabricTransportTarget{target})
|
||||||
|
}
|
||||||
|
spec := FabricChannelSpec{
|
||||||
|
ChannelID: firstNonEmpty(strings.TrimSpace(envelope.MessageID), fmt.Sprintf("production-%d", t.sequence.Add(1))),
|
||||||
|
ClusterID: envelope.ClusterID,
|
||||||
|
SourceNodeID: firstNonEmpty(productionRouteSetSourceNodeID(routeSet), envelope.CurrentHopNodeID),
|
||||||
|
TargetKind: FabricChannelTargetNode,
|
||||||
|
TargetID: nextNodeID,
|
||||||
|
TrafficClass: FabricServiceChannelReliable,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
payload, err := json.Marshal(envelope)
|
||||||
|
if err != nil {
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
result, err := t.sendProductionWithRouteSet(ctx, spec, routeSet, payload)
|
||||||
|
if err != nil {
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func productionRouteSetSourceNodeID(routeSet FabricRouteSet) string {
|
||||||
|
for _, route := range flattenFabricRouteSet(routeSet) {
|
||||||
|
if sourceNodeID := strings.TrimSpace(route.SourceNodeID); sourceNodeID != "" {
|
||||||
|
return sourceNodeID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) sendProductionWithRouteSet(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (ProductionForwardResult, error) {
|
||||||
|
router := t.Router
|
||||||
|
if router.Config.MaxRoutePressure == 0 {
|
||||||
|
router = NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 2000, MinRerouteInterval: 50 * time.Millisecond})
|
||||||
|
}
|
||||||
|
routeSet = t.routeSetForScheduling(routeSet)
|
||||||
|
channel, _, err := router.OpenChannel(spec, routeSet, time.Now().UTC())
|
||||||
|
if err != nil {
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
timeout := t.Timeout
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 30 * time.Second
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
routeSet = t.routeSetForScheduling(routeSet)
|
||||||
|
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||||
|
if !ok {
|
||||||
|
return ProductionForwardResult{}, ErrFabricRouteNotFound
|
||||||
|
}
|
||||||
|
target, err := FabricTransportTargetForRoute(route)
|
||||||
|
if err != nil {
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), spec.TargetID)
|
||||||
|
target.MaxPayload = fabricproto.DefaultMaxPayload
|
||||||
|
releaseRoute := t.acquireProductionRoute(route.RouteID)
|
||||||
|
session, err := t.Transport.Connect(ctx, target)
|
||||||
|
if err != nil {
|
||||||
|
releaseRoute()
|
||||||
|
t.markProductionRouteFailure(route.RouteID, err)
|
||||||
|
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "connect_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
if event.Type == FabricChannelRouteEventReroute {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rerouteErr != nil {
|
||||||
|
return ProductionForwardResult{}, rerouteErr
|
||||||
|
}
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
response, ackMs, err := t.sendProductionOnSession(ctx, session, payload, timeout)
|
||||||
|
_ = session.Close()
|
||||||
|
releaseRoute()
|
||||||
|
if err == nil {
|
||||||
|
t.markProductionRouteSuccess(route.RouteID)
|
||||||
|
_, _, _ = router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
AckLatencyMs: ackMs,
|
||||||
|
BytesSent: uint64(len(payload)),
|
||||||
|
FramesSent: 1,
|
||||||
|
BytesRecv: uint64(len(response.Payload)),
|
||||||
|
FramesRecv: 1,
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
return decodeQUICProductionForwardResponse(response.Payload)
|
||||||
|
}
|
||||||
|
t.markProductionRouteFailure(route.RouteID, err)
|
||||||
|
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||||
|
ChannelID: spec.ChannelID,
|
||||||
|
RouteID: route.RouteID,
|
||||||
|
Failed: true,
|
||||||
|
Reason: "response_failed",
|
||||||
|
ObservedAt: time.Now().UTC(),
|
||||||
|
}, time.Now().UTC())
|
||||||
|
channel = updated
|
||||||
|
if event.Type == FabricChannelRouteEventReroute {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rerouteErr != nil {
|
||||||
|
return ProductionForwardResult{}, rerouteErr
|
||||||
|
}
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
|
||||||
|
if t == nil || t.Pressure == nil {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
return t.Pressure.Apply(routeSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
|
||||||
|
if t != nil && t.Health != nil {
|
||||||
|
routeSet = t.Health.Apply(routeSet, time.Now().UTC())
|
||||||
|
}
|
||||||
|
return t.routeSetWithActiveChannels(routeSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) acquireProductionRoute(routeID string) func() {
|
||||||
|
if t == nil || t.Pressure == nil {
|
||||||
|
return func() {}
|
||||||
|
}
|
||||||
|
return t.Pressure.Acquire(routeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) markProductionRouteFailure(routeID string, err error) {
|
||||||
|
if t == nil || t.Health == nil || err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) markProductionRouteSuccess(routeID string) {
|
||||||
|
if t == nil || t.Health == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Health.MarkSuccess(routeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) Snapshot() QUICProductionForwardTransportSnapshot {
|
||||||
if t == nil {
|
if t == nil {
|
||||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
return QUICProductionForwardTransportSnapshot{}
|
||||||
}
|
}
|
||||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
var pressure FabricRoutePressureSnapshot
|
||||||
if baseURL == "" {
|
if t.Pressure != nil {
|
||||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
pressure = t.Pressure.SnapshotPressure()
|
||||||
}
|
}
|
||||||
client := NewClient(baseURL)
|
var health FabricRouteHealthSnapshot
|
||||||
if t.HTTPClient != nil {
|
if t.Health != nil {
|
||||||
client.HTTPClient = t.HTTPClient
|
health = t.Health.Snapshot(time.Now().UTC())
|
||||||
}
|
}
|
||||||
return client.SendProduction(ctx, envelope)
|
return QUICProductionForwardTransportSnapshot{RoutePressure: pressure, RouteHealth: health}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *QUICProductionForwardTransport) sendProductionOnSession(ctx context.Context, session FabricTransportSession, payload []byte, timeout time.Duration) (fabricproto.Frame, int64, error) {
|
||||||
|
sequence := t.sequence.Add(1)
|
||||||
|
if err := session.Send(ctx, fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: fabricproto.TrafficClassReliable,
|
||||||
|
StreamID: ProductionForwardQUICStreamID,
|
||||||
|
Sequence: sequence,
|
||||||
|
Payload: payload,
|
||||||
|
}); err != nil {
|
||||||
|
return fabricproto.Frame{}, 0, err
|
||||||
|
}
|
||||||
|
waitCtx := ctx
|
||||||
|
if timeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
waitCtx, cancel = context.WithTimeout(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
started := time.Now()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-waitCtx.Done():
|
||||||
|
return fabricproto.Frame{}, 0, waitCtx.Err()
|
||||||
|
case err, ok := <-session.Errors():
|
||||||
|
if !ok {
|
||||||
|
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fabricproto.Frame{}, 0, err
|
||||||
|
}
|
||||||
|
case frame, ok := <-session.Frames():
|
||||||
|
if !ok {
|
||||||
|
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID || frame.Sequence != sequence {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return frame, time.Since(started).Milliseconds(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeQUICProductionForwardResponse(payload []byte) (ProductionForwardResult, error) {
|
||||||
|
var response quicProductionForwardResponse
|
||||||
|
if err := json.Unmarshal(payload, &response); err != nil {
|
||||||
|
return ProductionForwardResult{}, err
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(response.Error) != "" {
|
||||||
|
return ProductionForwardResult{}, fmt.Errorf("%w: %s", ErrForwardPeerUnavailable, response.Error)
|
||||||
|
}
|
||||||
|
return response.Result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FabricRouteSetForTransportTargets(clusterID string, sourceNodeID string, targetNodeID string, targets []FabricTransportTarget) FabricRouteSet {
|
||||||
|
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: strings.TrimSpace(targetNodeID)}
|
||||||
|
routes := make([]FabricRoute, 0, len(targets))
|
||||||
|
for index, target := range targets {
|
||||||
|
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
|
||||||
|
if strings.TrimSpace(target.Endpoint) == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
peerID := firstNonEmpty(strings.TrimSpace(target.PeerID), strings.TrimSpace(targetNodeID))
|
||||||
|
routeID := strings.TrimSpace(target.EndpointID)
|
||||||
|
if routeID == "" {
|
||||||
|
routeID = fmt.Sprintf("%s-quic-%d", peerID, index)
|
||||||
|
}
|
||||||
|
routes = append(routes, FabricRoute{
|
||||||
|
RouteID: routeID,
|
||||||
|
ClusterID: strings.TrimSpace(clusterID),
|
||||||
|
SourceNodeID: strings.TrimSpace(sourceNodeID),
|
||||||
|
DestinationNodeID: peerID,
|
||||||
|
Hops: []FabricRouteHop{{
|
||||||
|
NodeID: peerID,
|
||||||
|
Mode: fabricRouteModeForTransportTarget(target),
|
||||||
|
EndpointID: strings.TrimSpace(target.EndpointID),
|
||||||
|
Address: target.Endpoint,
|
||||||
|
PeerCertSHA256: strings.TrimSpace(target.PeerCertSHA256),
|
||||||
|
}},
|
||||||
|
BaseLatencyMs: routeLatencyForIndex(index),
|
||||||
|
Capacity: 100,
|
||||||
|
ActiveChannels: 0,
|
||||||
|
Healthy: true,
|
||||||
|
LastUpdatedAt: time.Now().UTC(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if len(routes) == 0 {
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
routeSet.Primary = routes[0]
|
||||||
|
if len(routes) > 1 {
|
||||||
|
routeSet.WarmStandby = append(routeSet.WarmStandby, routes[1:]...)
|
||||||
|
}
|
||||||
|
return routeSet
|
||||||
|
}
|
||||||
|
|
||||||
|
func fabricRouteModeForTransportTarget(target FabricTransportTarget) FabricRouteMode {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(target.Transport)) {
|
||||||
|
case string(FabricRouteLAN):
|
||||||
|
return FabricRouteLAN
|
||||||
|
case string(FabricRouteReverse):
|
||||||
|
return FabricRouteReverse
|
||||||
|
case string(FabricRouteRelay):
|
||||||
|
return FabricRouteRelay
|
||||||
|
case string(FabricRouteICE):
|
||||||
|
return FabricRouteICE
|
||||||
|
default:
|
||||||
|
return FabricRouteDirect
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func routeLatencyForIndex(index int) int {
|
||||||
|
if index <= 0 {
|
||||||
|
return 10
|
||||||
|
}
|
||||||
|
return 10 + index
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,339 @@
|
|||||||
|
package mesh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQUICProductionForwardTransportReroutesOnConnectFailure(t *testing.T) {
|
||||||
|
transport := newFakeProductionForwardFabricTransport()
|
||||||
|
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||||
|
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{
|
||||||
|
Delivered: true,
|
||||||
|
MessageID: "message-1",
|
||||||
|
RouteID: "route-1",
|
||||||
|
}
|
||||||
|
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||||
|
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||||
|
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||||
|
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||||
|
}),
|
||||||
|
}, transport)
|
||||||
|
forward.Timeout = time.Second
|
||||||
|
|
||||||
|
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send production: %v", err)
|
||||||
|
}
|
||||||
|
if !result.Delivered || result.MessageID != "message-1" {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("dead connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("fast connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
snapshot := forward.Snapshot()
|
||||||
|
if snapshot.RoutePressure.AcquiredTotal != 2 || snapshot.RoutePressure.ReleasedTotal != 2 || snapshot.RoutePressure.MaxActiveTotal == 0 {
|
||||||
|
t.Fatalf("route pressure snapshot = %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQUICProductionForwardTransportQuarantinesFailedRoute(t *testing.T) {
|
||||||
|
transport := newFakeProductionForwardFabricTransport()
|
||||||
|
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||||
|
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||||
|
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||||
|
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||||
|
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||||
|
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||||
|
}),
|
||||||
|
}, transport)
|
||||||
|
forward.Timeout = time.Second
|
||||||
|
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
|
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send production #%d: %v", i+1, err)
|
||||||
|
}
|
||||||
|
if !result.Delivered {
|
||||||
|
t.Fatalf("result #%d = %+v", i+1, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("dead connect count = %d, want quarantine after first failure", got)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
|
||||||
|
t.Fatalf("fast connect count = %d, want both sends on healthy route", got)
|
||||||
|
}
|
||||||
|
snapshot := forward.Snapshot()
|
||||||
|
if snapshot.RouteHealth.Quarantined["dead"].Failures != 1 {
|
||||||
|
t.Fatalf("route health snapshot = %+v, want dead route quarantined", snapshot.RouteHealth)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFabricRouteHealthTrackerExpiresQuarantine(t *testing.T) {
|
||||||
|
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||||
|
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||||
|
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||||
|
})
|
||||||
|
tracker := NewFabricRouteHealthTracker(time.Second)
|
||||||
|
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||||
|
|
||||||
|
tracker.MarkFailure("dead", "connect failed", now)
|
||||||
|
applied := tracker.Apply(routeSet, now.Add(500*time.Millisecond))
|
||||||
|
if applied.Primary.Healthy || !applied.Primary.Degraded {
|
||||||
|
t.Fatalf("primary after quarantine = %+v, want unhealthy degraded route", applied.Primary)
|
||||||
|
}
|
||||||
|
if len(tracker.Snapshot(now.Add(500*time.Millisecond)).Quarantined) != 1 {
|
||||||
|
t.Fatalf("route health snapshot = %+v, want one quarantined route", tracker.Snapshot(now.Add(500*time.Millisecond)))
|
||||||
|
}
|
||||||
|
|
||||||
|
applied = tracker.Apply(routeSet, now.Add(2*time.Second))
|
||||||
|
if !applied.Primary.Healthy || applied.Primary.Degraded {
|
||||||
|
t.Fatalf("primary after ttl = %+v, want route restored", applied.Primary)
|
||||||
|
}
|
||||||
|
if snapshot := tracker.Snapshot(now.Add(2 * time.Second)); len(snapshot.Quarantined) != 0 {
|
||||||
|
t.Fatalf("route health snapshot after ttl = %+v, want empty quarantine", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQUICProductionForwardTransportReroutesOnResponseTimeout(t *testing.T) {
|
||||||
|
transport := newFakeProductionForwardFabricTransport()
|
||||||
|
transport.delays["quic://slow.example.test:19443"] = 100 * time.Millisecond
|
||||||
|
transport.results["quic://slow.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||||
|
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||||
|
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||||
|
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||||
|
{EndpointID: "slow", PeerID: "node-b", Endpoint: "quic://slow.example.test:19443", Transport: "quic"},
|
||||||
|
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||||
|
}),
|
||||||
|
}, transport)
|
||||||
|
forward.Timeout = 10 * time.Millisecond
|
||||||
|
|
||||||
|
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send production: %v", err)
|
||||||
|
}
|
||||||
|
if !result.Delivered || result.MessageID != "message-1" {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("slow connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("fast connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQUICProductionForwardTransportSchedulesWithRouteSetSourceForForwardedEnvelope(t *testing.T) {
|
||||||
|
transport := newFakeProductionForwardFabricTransport()
|
||||||
|
transport.results["quic://node-c.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-forwarded"}
|
||||||
|
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||||
|
"node-c": FabricRouteSetForTransportTargets("cluster-a", "node-b", "node-c", []FabricTransportTarget{
|
||||||
|
{EndpointID: "node-c-direct", PeerID: "node-c", Endpoint: "quic://node-c.example.test:19443", Transport: "quic"},
|
||||||
|
}),
|
||||||
|
}, transport)
|
||||||
|
forward.Timeout = time.Second
|
||||||
|
envelope := testProductionForwardEnvelope("message-forwarded")
|
||||||
|
envelope.ClusterID = "cluster-a"
|
||||||
|
envelope.SourceNodeID = "node-a"
|
||||||
|
envelope.DestinationNodeID = "node-c"
|
||||||
|
envelope.CurrentHopNodeID = "node-c"
|
||||||
|
envelope.NextHopNodeID = "node-c"
|
||||||
|
|
||||||
|
result, err := forward.SendProduction(context.Background(), "node-c", envelope)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("send production: %v", err)
|
||||||
|
}
|
||||||
|
if !result.Delivered || result.MessageID != "message-forwarded" {
|
||||||
|
t.Fatalf("result = %+v", result)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://node-c.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQUICProductionForwardTransportSpreadsConcurrentChannelsByActivePressure(t *testing.T) {
|
||||||
|
transport := newFakeProductionForwardFabricTransport()
|
||||||
|
transport.delays["quic://route-a.example.test:19443"] = 80 * time.Millisecond
|
||||||
|
transport.results["quic://route-a.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||||
|
transport.results["quic://route-b.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-2"}
|
||||||
|
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||||
|
{EndpointID: "route-a", PeerID: "node-b", Endpoint: "quic://route-a.example.test:19443", Transport: "quic"},
|
||||||
|
{EndpointID: "route-b", PeerID: "node-b", Endpoint: "quic://route-b.example.test:19443", Transport: "quic"},
|
||||||
|
})
|
||||||
|
routeSet.Primary.Capacity = 100
|
||||||
|
routeSet.WarmStandby[0].Capacity = 100
|
||||||
|
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{"node-b": routeSet}, transport)
|
||||||
|
forward.Timeout = time.Second
|
||||||
|
|
||||||
|
firstDone := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
_, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||||
|
firstDone <- err
|
||||||
|
}()
|
||||||
|
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
|
||||||
|
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-2"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("second send production: %v", err)
|
||||||
|
}
|
||||||
|
if !result.Delivered || result.MessageID != "message-2" {
|
||||||
|
t.Fatalf("second result = %+v", result)
|
||||||
|
}
|
||||||
|
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
|
||||||
|
t.Fatalf("route-b connect count = %d, want 1", got)
|
||||||
|
}
|
||||||
|
if err := <-firstDone; err != nil {
|
||||||
|
t.Fatalf("first send production: %v", err)
|
||||||
|
}
|
||||||
|
snapshot := forward.Snapshot()
|
||||||
|
if snapshot.RoutePressure.MaxActive["route-a"] != 1 || snapshot.RoutePressure.MaxActive["route-b"] != 1 || snapshot.RoutePressure.AcquiredTotal != 2 {
|
||||||
|
t.Fatalf("route pressure snapshot = %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeProductionForwardFabricTransport struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
failConnect map[string]bool
|
||||||
|
delays map[string]time.Duration
|
||||||
|
results map[string]ProductionForwardResult
|
||||||
|
connects map[string]int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeProductionForwardFabricTransport() *fakeProductionForwardFabricTransport {
|
||||||
|
return &fakeProductionForwardFabricTransport{
|
||||||
|
failConnect: map[string]bool{},
|
||||||
|
delays: map[string]time.Duration{},
|
||||||
|
results: map[string]ProductionForwardResult{},
|
||||||
|
connects: map[string]int{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeProductionForwardFabricTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||||
|
endpoint := target.Endpoint
|
||||||
|
t.mu.Lock()
|
||||||
|
t.connects[endpoint]++
|
||||||
|
fail := t.failConnect[endpoint]
|
||||||
|
delay := t.delays[endpoint]
|
||||||
|
result := t.results[endpoint]
|
||||||
|
t.mu.Unlock()
|
||||||
|
if fail {
|
||||||
|
return nil, ErrForwardPeerUnavailable
|
||||||
|
}
|
||||||
|
return &fakeProductionForwardFabricSession{
|
||||||
|
delay: delay,
|
||||||
|
result: result,
|
||||||
|
frames: make(chan fabricproto.Frame, 16),
|
||||||
|
errors: make(chan error, 1),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeProductionForwardFabricTransport) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeProductionForwardFabricTransport) connectCount(endpoint string) int {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
return t.connects[endpoint]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *fakeProductionForwardFabricTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
|
||||||
|
tb.Helper()
|
||||||
|
deadline := time.Now().Add(time.Second)
|
||||||
|
for {
|
||||||
|
t.mu.Lock()
|
||||||
|
got := t.connects[endpoint]
|
||||||
|
t.mu.Unlock()
|
||||||
|
if got >= count {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if time.Now().After(deadline) {
|
||||||
|
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
|
||||||
|
}
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeProductionForwardFabricSession struct {
|
||||||
|
delay time.Duration
|
||||||
|
result ProductionForwardResult
|
||||||
|
frames chan fabricproto.Frame
|
||||||
|
errors chan error
|
||||||
|
done chan struct{}
|
||||||
|
once sync.Once
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeProductionForwardFabricSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||||
|
if frame.Type != fabricproto.FrameData {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
responsePayload, _ := json.Marshal(quicProductionForwardResponse{Result: s.result})
|
||||||
|
go func() {
|
||||||
|
if s.delay > 0 {
|
||||||
|
time.Sleep(s.delay)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
case s.frames <- fabricproto.Frame{
|
||||||
|
Type: fabricproto.FrameData,
|
||||||
|
TrafficClass: frame.TrafficClass,
|
||||||
|
StreamID: frame.StreamID,
|
||||||
|
Sequence: frame.Sequence,
|
||||||
|
Payload: responsePayload,
|
||||||
|
}:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeProductionForwardFabricSession) Frames() <-chan fabricproto.Frame {
|
||||||
|
return s.frames
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeProductionForwardFabricSession) Errors() <-chan error {
|
||||||
|
return s.errors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeProductionForwardFabricSession) Close() error {
|
||||||
|
s.once.Do(func() {
|
||||||
|
close(s.done)
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeProductionForwardFabricSession) Closed() bool {
|
||||||
|
select {
|
||||||
|
case <-s.done:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testProductionForwardEnvelope(messageID string) ProductionEnvelope {
|
||||||
|
now := time.Now().UTC()
|
||||||
|
return ProductionEnvelope{
|
||||||
|
FabricProtocolVersion: ProtocolVersion,
|
||||||
|
MessageID: messageID,
|
||||||
|
RouteID: "route-1",
|
||||||
|
ClusterID: "cluster-a",
|
||||||
|
SourceNodeID: "node-a",
|
||||||
|
DestinationNodeID: "node-b",
|
||||||
|
CurrentHopNodeID: "node-a",
|
||||||
|
NextHopNodeID: "node-b",
|
||||||
|
ChannelClass: ProductionChannelFabricControl,
|
||||||
|
MessageType: ProductionMessageFabricControl,
|
||||||
|
TTL: 8,
|
||||||
|
CreatedAt: now,
|
||||||
|
ExpiresAt: now.Add(time.Minute),
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -106,6 +106,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
|||||||
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
|
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
|
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
|
||||||
}
|
}
|
||||||
|
if hasLegacyEndpointScheme(endpoint) {
|
||||||
|
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for nodeID, candidates := range cfg.PeerEndpointCandidates {
|
for nodeID, candidates := range cfg.PeerEndpointCandidates {
|
||||||
if strings.TrimSpace(nodeID) == "" {
|
if strings.TrimSpace(nodeID) == "" {
|
||||||
@@ -121,6 +124,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
|||||||
strings.TrimSpace(candidate.ConnectivityMode) == "" {
|
strings.TrimSpace(candidate.ConnectivityMode) == "" {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
|
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
|
||||||
}
|
}
|
||||||
|
if !isQUICOnlyCandidateTransport(candidate.Transport) || hasLegacyEndpointScheme(candidate.Address) {
|
||||||
|
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint candidate")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for endpointID, observation := range cfg.PeerEndpointObservations {
|
for endpointID, observation := range cfg.PeerEndpointObservations {
|
||||||
@@ -179,6 +185,14 @@ func validatePeerDirectory(entries []PeerDirectoryEntry, localNodeID string) err
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||||
|
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||||
|
return strings.HasPrefix(endpoint, "http://") ||
|
||||||
|
strings.HasPrefix(endpoint, "https://") ||
|
||||||
|
strings.HasPrefix(endpoint, "ws://") ||
|
||||||
|
strings.HasPrefix(endpoint, "wss://")
|
||||||
|
}
|
||||||
|
|
||||||
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||||
if len(seeds) > 20 {
|
if len(seeds) > 20 {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains too many recovery seeds")
|
return fmt.Errorf("scoped synthetic mesh config contains too many recovery seeds")
|
||||||
@@ -191,6 +205,9 @@ func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
|||||||
strings.TrimSpace(seed.Transport) == "" {
|
strings.TrimSpace(seed.Transport) == "" {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
|
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
|
||||||
}
|
}
|
||||||
|
if !isQUICOnlyCandidateTransport(seed.Transport) || hasLegacyEndpointScheme(seed.Endpoint) {
|
||||||
|
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC recovery seed")
|
||||||
|
}
|
||||||
if _, duplicate := seen[key]; duplicate {
|
if _, duplicate := seen[key]; duplicate {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate recovery seed")
|
return fmt.Errorf("scoped synthetic mesh config contains duplicate recovery seed")
|
||||||
}
|
}
|
||||||
@@ -224,6 +241,9 @@ func validateRendezvousLeases(leases []PeerRendezvousLease, routes []SyntheticRo
|
|||||||
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
|
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
|
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
|
||||||
}
|
}
|
||||||
|
if !isQUICOnlyCandidateTransport(lease.Transport) || hasLegacyEndpointScheme(lease.RelayEndpoint) {
|
||||||
|
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC rendezvous lease")
|
||||||
|
}
|
||||||
if _, duplicate := seen[lease.LeaseID]; duplicate {
|
if _, duplicate := seen[lease.LeaseID]; duplicate {
|
||||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate rendezvous lease")
|
return fmt.Errorf("scoped synthetic mesh config contains duplicate rendezvous lease")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,14 +18,14 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
|||||||
ConfigVersion: "config-v1",
|
ConfigVersion: "config-v1",
|
||||||
PeerDirectoryVersion: "peers-v1",
|
PeerDirectoryVersion: "peers-v1",
|
||||||
PolicyVersion: "policy-v1",
|
PolicyVersion: "policy-v1",
|
||||||
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
|
PeerEndpoints: map[string]string{"node-b": "quic://127.0.0.1:19443"},
|
||||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
"node-b": {
|
"node-b": {
|
||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
NATType: "restricted",
|
NATType: "restricted",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
@@ -55,8 +55,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
|||||||
RecoverySeeds: []PeerRecoverySeed{
|
RecoverySeeds: []PeerRecoverySeed{
|
||||||
{
|
{
|
||||||
NodeID: "node-b",
|
NodeID: "node-b",
|
||||||
Endpoint: "https://node-b.example.test:443",
|
Endpoint: "quic://node-b.example.test:19443",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
Priority: 10,
|
Priority: 10,
|
||||||
},
|
},
|
||||||
@@ -66,8 +66,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
|||||||
LeaseID: "lease-node-b-via-node-r",
|
LeaseID: "lease-node-b-via-node-r",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r",
|
RelayNodeID: "node-r",
|
||||||
RelayEndpoint: "http://node-r:19000",
|
RelayEndpoint: "quic://node-r:19443",
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
ConnectivityMode: "relay_required",
|
ConnectivityMode: "relay_required",
|
||||||
RouteIDs: []string{"route-a-b"},
|
RouteIDs: []string{"route-a-b"},
|
||||||
AllowedChannels: []string{"fabric_control", "route_control"},
|
AllowedChannels: []string{"fabric_control", "route_control"},
|
||||||
@@ -158,8 +158,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
|
|||||||
{
|
{
|
||||||
EndpointID: "node-b-public",
|
EndpointID: "node-b-public",
|
||||||
NodeID: "node-c",
|
NodeID: "node-c",
|
||||||
Transport: "direct_tcp_tls",
|
Transport: "direct_quic",
|
||||||
Address: "203.0.113.20:443",
|
Address: "quic://203.0.113.20:19443",
|
||||||
Reachability: "public",
|
Reachability: "public",
|
||||||
ConnectivityMode: "direct",
|
ConnectivityMode: "direct",
|
||||||
},
|
},
|
||||||
@@ -174,6 +174,73 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpoint(t *testing.T) {
|
||||||
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
|
SchemaVersion: "c17f.synthetic.v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
PeerEndpoints: map[string]string{"node-b": "https://node-b.example.test:443"},
|
||||||
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected non-QUIC peer endpoint error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateTransport(t *testing.T) {
|
||||||
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
|
SchemaVersion: "c17f.synthetic.v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
|
"node-b": {
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-websocket",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "websocket",
|
||||||
|
Address: "quic://203.0.113.20:19443",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected non-QUIC peer endpoint candidate error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateScheme(t *testing.T) {
|
||||||
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
|
SchemaVersion: "c17f.synthetic.v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||||
|
"node-b": {
|
||||||
|
{
|
||||||
|
EndpointID: "node-b-https",
|
||||||
|
NodeID: "node-b",
|
||||||
|
Transport: "direct_quic",
|
||||||
|
Address: "https://node-b.example.test:443",
|
||||||
|
Reachability: "public",
|
||||||
|
ConnectivityMode: "direct",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected non-QUIC peer endpoint candidate error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointObservation(t *testing.T) {
|
func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointObservation(t *testing.T) {
|
||||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
SchemaVersion: "c17f.synthetic.v1",
|
SchemaVersion: "c17f.synthetic.v1",
|
||||||
@@ -217,7 +284,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
|||||||
ClusterID: "cluster-1",
|
ClusterID: "cluster-1",
|
||||||
LocalNodeID: "node-a",
|
LocalNodeID: "node-a",
|
||||||
RecoverySeeds: []PeerRecoverySeed{
|
RecoverySeeds: []PeerRecoverySeed{
|
||||||
{NodeID: "node-b", Endpoint: "", Transport: "direct_tcp_tls"},
|
{NodeID: "node-b", Endpoint: "", Transport: "direct_quic"},
|
||||||
},
|
},
|
||||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||||
})
|
})
|
||||||
@@ -228,6 +295,23 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadScopedSyntheticConfigRejectsLegacyRecoverySeed(t *testing.T) {
|
||||||
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
|
SchemaVersion: "c17f.synthetic.v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
RecoverySeeds: []PeerRecoverySeed{
|
||||||
|
{NodeID: "node-b", Endpoint: "https://node-b.example.test:443", Transport: "direct_quic"},
|
||||||
|
},
|
||||||
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected non-QUIC recovery seed error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
SchemaVersion: "c17z12.synthetic.v1",
|
SchemaVersion: "c17z12.synthetic.v1",
|
||||||
@@ -238,8 +322,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
|||||||
LeaseID: "lease-node-b-via-node-r",
|
LeaseID: "lease-node-b-via-node-r",
|
||||||
PeerNodeID: "node-b",
|
PeerNodeID: "node-b",
|
||||||
RelayNodeID: "node-r",
|
RelayNodeID: "node-r",
|
||||||
RelayEndpoint: "http://node-r:19000",
|
RelayEndpoint: "quic://node-r:19443",
|
||||||
Transport: "relay_control",
|
Transport: "relay_quic",
|
||||||
RouteIDs: []string{"route-a-b"},
|
RouteIDs: []string{"route-a-b"},
|
||||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||||
},
|
},
|
||||||
@@ -253,6 +337,36 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadScopedSyntheticConfigRejectsLegacyRendezvousLease(t *testing.T) {
|
||||||
|
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||||
|
SchemaVersion: "c17z12.synthetic.v1",
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
LocalNodeID: "node-a",
|
||||||
|
RendezvousLeases: []PeerRendezvousLease{
|
||||||
|
{
|
||||||
|
LeaseID: "lease-node-b-via-node-r",
|
||||||
|
PeerNodeID: "node-b",
|
||||||
|
RelayNodeID: "node-r",
|
||||||
|
RelayEndpoint: "https://node-r.example.test:443",
|
||||||
|
Transport: "relay_quic",
|
||||||
|
ConnectivityMode: "relay_required",
|
||||||
|
RouteIDs: []string{"route-a-b"},
|
||||||
|
AllowedChannels: []string{"fabric_control", "route_control"},
|
||||||
|
Priority: 10,
|
||||||
|
ControlPlaneOnly: true,
|
||||||
|
IssuedAt: time.Now().UTC().Add(-time.Minute),
|
||||||
|
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-r", "node-b"})},
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected non-QUIC rendezvous lease error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
payload, err := json.Marshal(cfg)
|
payload, err := json.Marshal(cfg)
|
||||||
@@ -265,3 +379,32 @@ func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
|||||||
}
|
}
|
||||||
return path
|
return path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
|
||||||
|
return SyntheticRoute{
|
||||||
|
RouteID: routeID,
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
SourceNodeID: hops[0],
|
||||||
|
DestinationNodeID: hops[len(hops)-1],
|
||||||
|
Hops: hops,
|
||||||
|
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||||
|
MaxTTL: 8,
|
||||||
|
MaxHops: 8,
|
||||||
|
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||||
|
RouteVersion: "route-v1",
|
||||||
|
PolicyVersion: "policy-v1",
|
||||||
|
PeerDirectoryVersion: "peers-v1",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameStrings(left, right []string) bool {
|
||||||
|
if len(left) != len(right) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range left {
|
||||||
|
if left[i] != right[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user