Refactor RDP proxy handling and update related tests
This commit is contained in:
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
const Version = "0.2.280-fabricsession"
|
||||
const Version = "0.2.309-latencyaware"
|
||||
|
||||
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
|
||||
return client.EnrollRequest{
|
||||
@@ -38,9 +38,12 @@ func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) cli
|
||||
"vpn_local_gateway_shortcut": false,
|
||||
"vpn_farm_owned_dataplane": true,
|
||||
"fabric_data_session_v1": true,
|
||||
"fabric_session_websocket_smoke": true,
|
||||
"fabric_session_quic_smoke": true,
|
||||
"vpn_backend_relay_fallback": false,
|
||||
"fabric_service_channel_required": true,
|
||||
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
|
||||
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
|
||||
"web_ingress_runtime_enabled": false,
|
||||
"external_backend_entry_proxy": true,
|
||||
},
|
||||
ReportedFacts: map[string]any{
|
||||
@@ -67,9 +70,12 @@ func HeartbeatPayload() client.HeartbeatRequest {
|
||||
"vpn_local_gateway_shortcut": false,
|
||||
"vpn_farm_owned_dataplane": true,
|
||||
"fabric_data_session_v1": true,
|
||||
"fabric_session_websocket_smoke": true,
|
||||
"fabric_session_quic_smoke": true,
|
||||
"vpn_backend_relay_fallback": false,
|
||||
"fabric_service_channel_required": true,
|
||||
"web_ingress_workload_contract": "rap.web_ingress.workload_contract.v1",
|
||||
"web_ingress_real_listener_gate": "RAP_WEB_INGRESS_RUNTIME_ENABLED",
|
||||
"web_ingress_runtime_enabled": false,
|
||||
"external_backend_entry_proxy": true,
|
||||
},
|
||||
ServiceStates: map[string]any{
|
||||
|
||||
@@ -14,6 +14,8 @@ import (
|
||||
const (
|
||||
AuthoritySchemaVersion = "rap.cluster_authority.v1"
|
||||
SignatureSchemaVersion = "rap.cluster_authority.signature.v1"
|
||||
QuorumSchemaVersion = "rap.cluster_authority.quorum.v1"
|
||||
QuorumEnvelopeVersion = "rap.cluster_authority.quorum_envelope.v1"
|
||||
AlgorithmEd25519 = "ed25519"
|
||||
)
|
||||
|
||||
@@ -30,6 +32,34 @@ type Signature struct {
|
||||
Signature string `json:"signature"`
|
||||
}
|
||||
|
||||
type QuorumMember struct {
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
Role string `json:"role,omitempty"`
|
||||
PublicKey string `json:"public_key"`
|
||||
PublicKeyFingerprint string `json:"public_key_fingerprint"`
|
||||
Scopes []string `json:"scopes,omitempty"`
|
||||
}
|
||||
|
||||
type QuorumDescriptor struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Epoch string `json:"epoch"`
|
||||
Threshold int `json:"threshold"`
|
||||
Members []QuorumMember `json:"members"`
|
||||
}
|
||||
|
||||
type QuorumEnvelope struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Epoch string `json:"epoch"`
|
||||
Threshold int `json:"threshold"`
|
||||
PayloadSHA256 string `json:"payload_sha256"`
|
||||
QuorumSHA256 string `json:"quorum_sha256"`
|
||||
Signatures []Signature `json:"signatures"`
|
||||
AllowedScopes []string `json:"allowed_scopes,omitempty"`
|
||||
DecisionReason string `json:"decision_reason,omitempty"`
|
||||
}
|
||||
|
||||
func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature) error {
|
||||
if signature.SchemaVersion != SignatureSchemaVersion {
|
||||
return fmt.Errorf("%w: schema_version must be %s", ErrInvalidSignature, SignatureSchemaVersion)
|
||||
@@ -58,6 +88,86 @@ func VerifyRaw(publicKeyB64 string, payload json.RawMessage, signature Signature
|
||||
return nil
|
||||
}
|
||||
|
||||
func VerifyQuorumRaw(descriptor QuorumDescriptor, payload json.RawMessage, envelope QuorumEnvelope, requiredScope string) error {
|
||||
if descriptor.SchemaVersion != QuorumSchemaVersion {
|
||||
return fmt.Errorf("%w: quorum schema_version must be %s", ErrInvalidSignature, QuorumSchemaVersion)
|
||||
}
|
||||
if envelope.SchemaVersion != QuorumEnvelopeVersion {
|
||||
return fmt.Errorf("%w: quorum envelope schema_version must be %s", ErrInvalidSignature, QuorumEnvelopeVersion)
|
||||
}
|
||||
if strings.TrimSpace(descriptor.ClusterID) == "" || descriptor.ClusterID != envelope.ClusterID {
|
||||
return fmt.Errorf("%w: quorum cluster mismatch", ErrInvalidSignature)
|
||||
}
|
||||
if strings.TrimSpace(descriptor.Epoch) == "" || descriptor.Epoch != envelope.Epoch {
|
||||
return fmt.Errorf("%w: quorum epoch mismatch", ErrInvalidSignature)
|
||||
}
|
||||
threshold := descriptor.Threshold
|
||||
if envelope.Threshold > threshold {
|
||||
threshold = envelope.Threshold
|
||||
}
|
||||
if threshold <= 0 || threshold > len(descriptor.Members) {
|
||||
return fmt.Errorf("%w: invalid quorum threshold", ErrInvalidSignature)
|
||||
}
|
||||
payloadHash, err := HashRaw(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if envelope.PayloadSHA256 != payloadHash {
|
||||
return fmt.Errorf("%w: quorum payload hash mismatch", ErrInvalidSignature)
|
||||
}
|
||||
descriptorHash, err := HashRaw(mustMarshalQuorumDescriptor(descriptor))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if envelope.QuorumSHA256 != descriptorHash {
|
||||
return fmt.Errorf("%w: quorum descriptor hash mismatch", ErrInvalidSignature)
|
||||
}
|
||||
members := map[string]QuorumMember{}
|
||||
for _, member := range descriptor.Members {
|
||||
fingerprint := strings.TrimSpace(member.PublicKeyFingerprint)
|
||||
if fingerprint == "" {
|
||||
publicKey, err := decodePublicKey(member.PublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fingerprint = Fingerprint(publicKey)
|
||||
}
|
||||
if _, exists := members[fingerprint]; exists {
|
||||
return fmt.Errorf("%w: duplicate quorum member", ErrInvalidSignature)
|
||||
}
|
||||
member.PublicKeyFingerprint = fingerprint
|
||||
members[fingerprint] = member
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
valid := 0
|
||||
for _, signature := range envelope.Signatures {
|
||||
fingerprint := strings.TrimSpace(signature.KeyFingerprint)
|
||||
if seen[fingerprint] {
|
||||
continue
|
||||
}
|
||||
member, ok := members[fingerprint]
|
||||
if !ok {
|
||||
return fmt.Errorf("%w: quorum signer is not a member", ErrInvalidSignature)
|
||||
}
|
||||
if requiredScope != "" && !memberAllowsScope(member, requiredScope) {
|
||||
return fmt.Errorf("%w: quorum signer scope mismatch", ErrInvalidSignature)
|
||||
}
|
||||
if err := VerifyRaw(member.PublicKey, payload, signature); err != nil {
|
||||
return err
|
||||
}
|
||||
seen[fingerprint] = true
|
||||
valid++
|
||||
}
|
||||
if valid < threshold {
|
||||
return fmt.Errorf("%w: quorum threshold not met", ErrInvalidSignature)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func QuorumDescriptorHash(descriptor QuorumDescriptor) (string, error) {
|
||||
return HashRaw(mustMarshalQuorumDescriptor(descriptor))
|
||||
}
|
||||
|
||||
func Fingerprint(publicKey ed25519.PublicKey) string {
|
||||
sum := sha256.Sum256(publicKey)
|
||||
return "rap-ca-ed25519-" + hex.EncodeToString(sum[:16])
|
||||
@@ -72,6 +182,28 @@ func HashRaw(raw json.RawMessage) (string, error) {
|
||||
return hex.EncodeToString(sum[:]), nil
|
||||
}
|
||||
|
||||
func mustMarshalQuorumDescriptor(descriptor QuorumDescriptor) json.RawMessage {
|
||||
raw, err := json.Marshal(descriptor)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
func memberAllowsScope(member QuorumMember, requiredScope string) bool {
|
||||
requiredScope = strings.TrimSpace(requiredScope)
|
||||
if requiredScope == "" {
|
||||
return true
|
||||
}
|
||||
for _, scope := range member.Scopes {
|
||||
scope = strings.TrimSpace(scope)
|
||||
if scope == "*" || scope == requiredScope {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func CanonicalJSON(raw json.RawMessage) ([]byte, error) {
|
||||
if len(raw) == 0 {
|
||||
return nil, fmt.Errorf("%w: empty payload", ErrInvalidPayload)
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -50,3 +51,114 @@ func TestVerifyRawRejectsTamperedPayload(t *testing.T) {
|
||||
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyQuorumRawAcceptsThreshold(t *testing.T) {
|
||||
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||
payloadHash, err := HashRaw(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("payload hash: %v", err)
|
||||
}
|
||||
quorumHash, err := QuorumDescriptorHash(descriptor)
|
||||
if err != nil {
|
||||
t.Fatalf("quorum hash: %v", err)
|
||||
}
|
||||
envelope := QuorumEnvelope{
|
||||
SchemaVersion: QuorumEnvelopeVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: 2,
|
||||
PayloadSHA256: payloadHash,
|
||||
QuorumSHA256: quorumHash,
|
||||
Signatures: []Signature{
|
||||
signTestPayload(t, payload, privateKeys[0]),
|
||||
signTestPayload(t, payload, privateKeys[1]),
|
||||
},
|
||||
}
|
||||
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); err != nil {
|
||||
t.Fatalf("VerifyQuorumRaw: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyQuorumRawRejectsBelowThreshold(t *testing.T) {
|
||||
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||
payloadHash, _ := HashRaw(payload)
|
||||
quorumHash, _ := QuorumDescriptorHash(descriptor)
|
||||
envelope := QuorumEnvelope{
|
||||
SchemaVersion: QuorumEnvelopeVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: 2,
|
||||
PayloadSHA256: payloadHash,
|
||||
QuorumSHA256: quorumHash,
|
||||
Signatures: []Signature{signTestPayload(t, payload, privateKeys[0])},
|
||||
}
|
||||
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
|
||||
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyQuorumRawRejectsTamperedDescriptor(t *testing.T) {
|
||||
payload := json.RawMessage(`{"schema_version":"rap.node_update_plan_authority.v1","cluster_id":"cluster-1","action":"update"}`)
|
||||
descriptor, privateKeys := testQuorumDescriptor(t, 3, 2)
|
||||
payloadHash, _ := HashRaw(payload)
|
||||
quorumHash, _ := QuorumDescriptorHash(descriptor)
|
||||
descriptor.Threshold = 1
|
||||
envelope := QuorumEnvelope{
|
||||
SchemaVersion: QuorumEnvelopeVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: 2,
|
||||
PayloadSHA256: payloadHash,
|
||||
QuorumSHA256: quorumHash,
|
||||
Signatures: []Signature{
|
||||
signTestPayload(t, payload, privateKeys[0]),
|
||||
signTestPayload(t, payload, privateKeys[1]),
|
||||
},
|
||||
}
|
||||
if err := VerifyQuorumRaw(descriptor, payload, envelope, "update-authority"); !errors.Is(err, ErrInvalidSignature) {
|
||||
t.Fatalf("err = %v, want ErrInvalidSignature", err)
|
||||
}
|
||||
}
|
||||
|
||||
func testQuorumDescriptor(t *testing.T, members int, threshold int) (QuorumDescriptor, []ed25519.PrivateKey) {
|
||||
t.Helper()
|
||||
descriptor := QuorumDescriptor{
|
||||
SchemaVersion: QuorumSchemaVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: threshold,
|
||||
}
|
||||
privateKeys := make([]ed25519.PrivateKey, 0, members)
|
||||
for i := 0; i < members; i++ {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateKey: %v", err)
|
||||
}
|
||||
descriptor.Members = append(descriptor.Members, QuorumMember{
|
||||
NodeID: fmt.Sprintf("authority-%d", i+1),
|
||||
Role: "update-authority",
|
||||
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
PublicKeyFingerprint: Fingerprint(publicKey),
|
||||
Scopes: []string{"update-authority"},
|
||||
})
|
||||
privateKeys = append(privateKeys, privateKey)
|
||||
}
|
||||
return descriptor, privateKeys
|
||||
}
|
||||
|
||||
func signTestPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) Signature {
|
||||
t.Helper()
|
||||
canonical, err := CanonicalJSON(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("CanonicalJSON: %v", err)
|
||||
}
|
||||
publicKey := privateKey.Public().(ed25519.PublicKey)
|
||||
return Signature{
|
||||
SchemaVersion: SignatureSchemaVersion,
|
||||
Algorithm: AlgorithmEd25519,
|
||||
KeyFingerprint: Fingerprint(publicKey),
|
||||
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -17,6 +18,17 @@ type Client struct {
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
type RawControlRequest struct {
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Body json.RawMessage `json:"body,omitempty"`
|
||||
}
|
||||
|
||||
type RawControlResponse struct {
|
||||
StatusCode int `json:"status_code"`
|
||||
Body json.RawMessage `json:"body,omitempty"`
|
||||
}
|
||||
|
||||
type EnrollRequest struct {
|
||||
ClusterID string `json:"cluster_id"`
|
||||
JoinToken string `json:"join_token"`
|
||||
@@ -46,14 +58,15 @@ type EnrollmentBootstrapResponse struct {
|
||||
}
|
||||
|
||||
type NodeBootstrap struct {
|
||||
NodeID string `json:"node_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
IdentityStatus string `json:"identity_status"`
|
||||
Certificate map[string]any `json:"certificate"`
|
||||
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
|
||||
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
|
||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
||||
NodeID string `json:"node_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
IdentityStatus string `json:"identity_status"`
|
||||
Certificate map[string]any `json:"certificate"`
|
||||
HeartbeatEndpoint string `json:"heartbeat_endpoint"`
|
||||
ClusterAuthority *ClusterAuthorityDescriptor `json:"cluster_authority,omitempty"`
|
||||
ClusterAuthorityQuorum json.RawMessage `json:"cluster_authority_quorum,omitempty"`
|
||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
||||
}
|
||||
|
||||
type HeartbeatRequest struct {
|
||||
@@ -123,6 +136,7 @@ type NodeUpdatePlan struct {
|
||||
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||
AuthoritySignature *ClusterSignature `json:"authority_signature,omitempty"`
|
||||
AuthorityQuorum *QuorumEnvelope `json:"authority_quorum,omitempty"`
|
||||
ProductionForwarding bool `json:"production_forwarding"`
|
||||
}
|
||||
|
||||
@@ -293,6 +307,26 @@ type SyntheticMeshConfig struct {
|
||||
ProductionForwarding bool `json:"production_forwarding"`
|
||||
}
|
||||
|
||||
type AdminRuntimeProjectionRequest struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Query string `json:"query,omitempty"`
|
||||
Host string `json:"host,omitempty"`
|
||||
Scope string `json:"scope"`
|
||||
ServiceClass string `json:"service_class"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
}
|
||||
|
||||
type AdminRuntimeProjectionResponse struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Headers map[string]string `json:"headers,omitempty"`
|
||||
Body json.RawMessage `json:"body,omitempty"`
|
||||
}
|
||||
|
||||
func (c *SyntheticMeshConfig) UnmarshalJSON(data []byte) error {
|
||||
type syntheticMeshConfigAlias SyntheticMeshConfig
|
||||
var decoded syntheticMeshConfigAlias
|
||||
@@ -448,6 +482,18 @@ type ClusterSignature struct {
|
||||
SignedAt time.Time `json:"signed_at"`
|
||||
}
|
||||
|
||||
type QuorumEnvelope struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Epoch string `json:"epoch"`
|
||||
Threshold int `json:"threshold"`
|
||||
PayloadSHA256 string `json:"payload_sha256"`
|
||||
QuorumSHA256 string `json:"quorum_sha256"`
|
||||
Signatures []ClusterSignature `json:"signatures"`
|
||||
AllowedScopes []string `json:"allowed_scopes,omitempty"`
|
||||
DecisionReason string `json:"decision_reason,omitempty"`
|
||||
}
|
||||
|
||||
type PeerDirectoryEntry struct {
|
||||
NodeID string `json:"node_id"`
|
||||
RouteIDs []string `json:"route_ids,omitempty"`
|
||||
@@ -744,6 +790,50 @@ func (c *Client) SyntheticMeshConfig(ctx context.Context, clusterID, nodeID stri
|
||||
return response.Config, nil
|
||||
}
|
||||
|
||||
func (c *Client) AdminRuntimeProjection(ctx context.Context, clusterID, nodeID string, request AdminRuntimeProjectionRequest) (AdminRuntimeProjectionResponse, error) {
|
||||
var response AdminRuntimeProjectionResponse
|
||||
path := fmt.Sprintf("/clusters/%s/nodes/%s/admin-runtime/projection", clusterID, nodeID)
|
||||
if err := c.postJSON(ctx, path, request, &response); err != nil {
|
||||
return AdminRuntimeProjectionResponse{}, err
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func (c *Client) RawControl(ctx context.Context, request RawControlRequest) (RawControlResponse, error) {
|
||||
method := strings.ToUpper(strings.TrimSpace(request.Method))
|
||||
if method == "" {
|
||||
method = http.MethodGet
|
||||
}
|
||||
path := strings.TrimSpace(request.Path)
|
||||
if !strings.HasPrefix(path, "/") {
|
||||
return RawControlResponse{}, fmt.Errorf("control path must be relative")
|
||||
}
|
||||
var body io.Reader
|
||||
if len(request.Body) > 0 && string(request.Body) != "null" {
|
||||
body = bytes.NewReader(request.Body)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
|
||||
if err != nil {
|
||||
return RawControlResponse{}, err
|
||||
}
|
||||
if body != nil {
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
httpResp, err := c.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return RawControlResponse{}, err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
payload, err := io.ReadAll(io.LimitReader(httpResp.Body, 2*1024*1024))
|
||||
if err != nil {
|
||||
return RawControlResponse{}, err
|
||||
}
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
return RawControlResponse{}, fmt.Errorf("backend returned status %d: %s", httpResp.StatusCode, string(payload))
|
||||
}
|
||||
return RawControlResponse{StatusCode: httpResp.StatusCode, Body: json.RawMessage(payload)}, nil
|
||||
}
|
||||
|
||||
func (c *Client) getJSON(ctx context.Context, path string, response any) error {
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -21,6 +21,11 @@ type Config struct {
|
||||
NodeName string
|
||||
StateDir string
|
||||
WorkloadSupervisionEnabled bool
|
||||
WebIngressRuntimeEnabled bool
|
||||
WebIngressSigningPrivateKey string
|
||||
WebIngressSigningKeyID string
|
||||
WebIngressTrustedKeysJSON string
|
||||
WebIngressRuntimeServiceClasses string
|
||||
HeartbeatInterval time.Duration
|
||||
EnrollmentPollInterval time.Duration
|
||||
EnrollmentPollTimeout time.Duration
|
||||
@@ -43,6 +48,12 @@ type Config struct {
|
||||
MeshAdvertiseTransport string
|
||||
MeshConnectivityMode string
|
||||
MeshNATType string
|
||||
MeshLocalSegmentID string
|
||||
MeshNATGroupID string
|
||||
MeshSTUNReflexiveEndpoint string
|
||||
MeshSTUNServer string
|
||||
MeshRelayNodeID string
|
||||
MeshRelayEndpoint string
|
||||
MeshRegion string
|
||||
MeshSyntheticConfigPath string
|
||||
MeshPeerEndpointsJSON string
|
||||
@@ -68,9 +79,14 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
fs.StringVar(&cfg.NodeName, "node-name", getEnv(env, "RAP_NODE_NAME", hostnameOrDefault()), "Node display name.")
|
||||
fs.StringVar(&cfg.StateDir, "state-dir", getEnv(env, "RAP_NODE_STATE_DIR", defaultStateDir), "Local node-agent state directory.")
|
||||
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getEnvBool(env, "RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable desired workload polling and status reporting. Disabled by default while service runtime is not implemented.")
|
||||
fs.BoolVar(&cfg.WebIngressRuntimeEnabled, "web-ingress-runtime-enabled", getEnvBool(env, "RAP_WEB_INGRESS_RUNTIME_ENABLED", false), "Enable the future real 80/443 web ingress listener runtime. Disabled by default; contract probe remains safe without it.")
|
||||
fs.StringVar(&cfg.WebIngressSigningPrivateKey, "web-ingress-signing-private-key", getEnv(env, "RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY", ""), "Base64 Ed25519 private key used to sign web ingress fabric envelopes. Empty keeps signing disabled.")
|
||||
fs.StringVar(&cfg.WebIngressSigningKeyID, "web-ingress-signing-key-id", getEnv(env, "RAP_WEB_INGRESS_SIGNING_KEY_ID", ""), "Optional key id for web ingress envelope signatures.")
|
||||
fs.StringVar(&cfg.WebIngressTrustedKeysJSON, "web-ingress-trusted-keys-json", getEnv(env, "RAP_WEB_INGRESS_TRUSTED_KEYS_JSON", ""), "JSON map or array of trusted Ed25519 public keys for web ingress runtime receiver.")
|
||||
fs.StringVar(&cfg.WebIngressRuntimeServiceClasses, "web-ingress-runtime-service-classes", getEnv(env, "RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES", ""), "Optional comma-separated allow-list of web ingress runtime service classes accepted by this node.")
|
||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getEnvBool(env, "RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable C17A synthetic fabric probe runtime. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getEnvBool(env, "RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production fabric-control direct next-hop forwarding gate. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session WebSocket endpoint. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshFabricSessionEnabled, "mesh-fabric-session-enabled", getEnvBool(env, "RAP_MESH_FABRIC_SESSION_ENABLED", false), "Enable authenticated fabric session endpoint. Disabled by default.")
|
||||
fs.BoolVar(&cfg.VPNFabricSessionTransportEnabled, "vpn-fabric-session-transport-enabled", getEnvBool(env, "RAP_VPN_FABRIC_SESSION_TRANSPORT_ENABLED", false), "Route VPN packet transport over persistent fabric session when explicitly enabled. Disabled by default.")
|
||||
fs.BoolVar(&cfg.MeshQUICFabricEnabled, "mesh-quic-fabric-enabled", getEnvBool(env, "RAP_MESH_QUIC_FABRIC_ENABLED", false), "Enable QUIC/UDP fabric listener. Disabled by default.")
|
||||
fs.StringVar(&cfg.MeshQUICFabricListenAddr, "mesh-quic-fabric-listen-addr", getEnv(env, "RAP_MESH_QUIC_FABRIC_LISTEN_ADDR", ""), "Listen address for QUIC/UDP fabric endpoint, for example :19443.")
|
||||
@@ -84,9 +100,15 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
fs.IntVar(&cfg.MeshListenAutoPortEnd, "mesh-listen-auto-port-end", getEnvInt(env, "RAP_MESH_LISTEN_AUTO_PORT_END", 19231), "Last port used when mesh listen port mode is auto.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseEndpoint, "mesh-advertise-endpoint", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINT", ""), "Advertised mesh endpoint reported to the Control Plane. Empty disables endpoint reporting.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseEndpointsJSON, "mesh-advertise-endpoints-json", getEnv(env, "RAP_MESH_ADVERTISE_ENDPOINTS_JSON", ""), "JSON array of advertised mesh endpoint candidates, including private/corporate endpoints.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "direct_tcp_tls"), "Transport label for the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshAdvertiseTransport, "mesh-advertise-transport", getEnv(env, "RAP_MESH_ADVERTISE_TRANSPORT", "quic"), "Transport label for the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshConnectivityMode, "mesh-connectivity-mode", getEnv(env, "RAP_MESH_CONNECTIVITY_MODE", "direct"), "Connectivity mode reported with the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshNATType, "mesh-nat-type", getEnv(env, "RAP_MESH_NAT_TYPE", "unknown"), "NAT type hint reported with the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshLocalSegmentID, "mesh-local-segment-id", getEnv(env, "RAP_MESH_LOCAL_SEGMENT_ID", ""), "Optional local LAN/site segment ID advertised with QUIC endpoint candidates.")
|
||||
fs.StringVar(&cfg.MeshNATGroupID, "mesh-nat-group-id", getEnv(env, "RAP_MESH_NAT_GROUP_ID", ""), "Optional NAT group ID advertised with QUIC endpoint candidates.")
|
||||
fs.StringVar(&cfg.MeshSTUNReflexiveEndpoint, "mesh-stun-reflexive-endpoint", getEnv(env, "RAP_MESH_STUN_REFLEXIVE_ENDPOINT", ""), "Optional STUN-discovered reflexive QUIC endpoint, for example quic://203.0.113.10:19443.")
|
||||
fs.StringVar(&cfg.MeshSTUNServer, "mesh-stun-server", getEnv(env, "RAP_MESH_STUN_SERVER", ""), "Optional STUN server name used to discover the reflexive endpoint.")
|
||||
fs.StringVar(&cfg.MeshRelayNodeID, "mesh-relay-node-id", getEnv(env, "RAP_MESH_RELAY_NODE_ID", ""), "Optional relay node ID for relay-required QUIC fallback candidates.")
|
||||
fs.StringVar(&cfg.MeshRelayEndpoint, "mesh-relay-endpoint", getEnv(env, "RAP_MESH_RELAY_ENDPOINT", ""), "Optional relay QUIC endpoint for relay-required fallback candidates.")
|
||||
fs.StringVar(&cfg.MeshRegion, "mesh-region", getEnv(env, "RAP_MESH_REGION", ""), "Optional region/site hint for the advertised mesh endpoint.")
|
||||
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
|
||||
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
|
||||
@@ -129,12 +151,27 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
cfg.MeshAdvertiseEndpoint = strings.TrimRight(strings.TrimSpace(cfg.MeshAdvertiseEndpoint), "/")
|
||||
cfg.MeshAdvertiseEndpointsJSON = strings.TrimSpace(cfg.MeshAdvertiseEndpointsJSON)
|
||||
cfg.MeshAdvertiseTransport = strings.TrimSpace(cfg.MeshAdvertiseTransport)
|
||||
if cfg.MeshAdvertiseTransport == "" {
|
||||
cfg.MeshAdvertiseTransport = "quic"
|
||||
}
|
||||
cfg.MeshAdvertiseTransport = normalizeLegacyAdvertiseTransport(cfg.MeshAdvertiseTransport)
|
||||
cfg.MeshAdvertiseEndpoint = normalizeLegacyEndpointSchemeToQUIC(cfg.MeshAdvertiseEndpoint)
|
||||
cfg.MeshConnectivityMode = strings.TrimSpace(cfg.MeshConnectivityMode)
|
||||
cfg.MeshNATType = strings.TrimSpace(cfg.MeshNATType)
|
||||
cfg.MeshLocalSegmentID = strings.TrimSpace(cfg.MeshLocalSegmentID)
|
||||
cfg.MeshNATGroupID = strings.TrimSpace(cfg.MeshNATGroupID)
|
||||
cfg.MeshSTUNReflexiveEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshSTUNReflexiveEndpoint), "/"))
|
||||
cfg.MeshSTUNServer = strings.TrimSpace(cfg.MeshSTUNServer)
|
||||
cfg.MeshRelayNodeID = strings.TrimSpace(cfg.MeshRelayNodeID)
|
||||
cfg.MeshRelayEndpoint = normalizeLegacyEndpointSchemeToQUIC(strings.TrimRight(strings.TrimSpace(cfg.MeshRelayEndpoint), "/"))
|
||||
cfg.MeshRegion = strings.TrimSpace(cfg.MeshRegion)
|
||||
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
|
||||
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
|
||||
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
|
||||
cfg.WebIngressSigningPrivateKey = strings.TrimSpace(cfg.WebIngressSigningPrivateKey)
|
||||
cfg.WebIngressSigningKeyID = strings.TrimSpace(cfg.WebIngressSigningKeyID)
|
||||
cfg.WebIngressTrustedKeysJSON = strings.TrimSpace(cfg.WebIngressTrustedKeysJSON)
|
||||
cfg.WebIngressRuntimeServiceClasses = strings.TrimSpace(cfg.WebIngressRuntimeServiceClasses)
|
||||
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
|
||||
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
|
||||
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
|
||||
@@ -176,9 +213,62 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
if cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
||||
return Config{}, errors.New("mesh listen auto port start must be less than or equal to end")
|
||||
}
|
||||
if !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
|
||||
return Config{}, errors.New("mesh advertise transport must be a QUIC transport label")
|
||||
}
|
||||
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
|
||||
return Config{}, errors.New("mesh advertise endpoint must be a QUIC endpoint")
|
||||
}
|
||||
if cfg.MeshSTUNReflexiveEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshSTUNReflexiveEndpoint) {
|
||||
return Config{}, errors.New("mesh STUN reflexive endpoint must be a QUIC endpoint")
|
||||
}
|
||||
if cfg.MeshRelayEndpoint != "" && hasLegacyEndpointScheme(cfg.MeshRelayEndpoint) {
|
||||
return Config{}, errors.New("mesh relay endpoint must be a QUIC endpoint")
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func isQUICAdvertiseTransport(label string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeLegacyAdvertiseTransport(label string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||
case "direct_http", "direct_https", "direct_tcp_tls", "http", "https", "ws", "wss", "websocket":
|
||||
return "direct_quic"
|
||||
case "outbound_reverse", "reverse", "reverse_outbound":
|
||||
return "reverse_quic"
|
||||
case "relay", "relay_control":
|
||||
return "relay_quic"
|
||||
default:
|
||||
return strings.TrimSpace(label)
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeLegacyEndpointSchemeToQUIC(endpoint string) string {
|
||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||
lower := strings.ToLower(endpoint)
|
||||
for _, prefix := range []string{"http://", "https://", "ws://", "wss://"} {
|
||||
if strings.HasPrefix(lower, prefix) {
|
||||
return "quic://" + endpoint[len(prefix):]
|
||||
}
|
||||
}
|
||||
return endpoint
|
||||
}
|
||||
|
||||
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||
return strings.HasPrefix(endpoint, "http://") ||
|
||||
strings.HasPrefix(endpoint, "https://") ||
|
||||
strings.HasPrefix(endpoint, "ws://") ||
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
}
|
||||
|
||||
func readEnv() map[string]string {
|
||||
out := map[string]string{}
|
||||
for _, pair := range os.Environ() {
|
||||
|
||||
@@ -15,6 +15,11 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
"RAP_NODE_NAME": "node-a",
|
||||
"RAP_NODE_STATE_DIR": "/tmp/rap-node",
|
||||
"RAP_WORKLOAD_SUPERVISION_ENABLED": "true",
|
||||
"RAP_WEB_INGRESS_RUNTIME_ENABLED": "true",
|
||||
"RAP_WEB_INGRESS_SIGNING_PRIVATE_KEY": " private-key-b64 ",
|
||||
"RAP_WEB_INGRESS_SIGNING_KEY_ID": " web-key-1 ",
|
||||
"RAP_WEB_INGRESS_TRUSTED_KEYS_JSON": ` {"web-key-1":"public-key-b64"} `,
|
||||
"RAP_WEB_INGRESS_RUNTIME_SERVICE_CLASSES": " platform_admin, cluster_admin ",
|
||||
"RAP_HEARTBEAT_INTERVAL_SECONDS": "7",
|
||||
"RAP_ENROLLMENT_POLL_INTERVAL_SECONDS": "3",
|
||||
"RAP_ENROLLMENT_POLL_TIMEOUT_SECONDS": "30",
|
||||
@@ -32,11 +37,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
"RAP_MESH_LISTEN_PORT_MODE": "auto",
|
||||
"RAP_MESH_LISTEN_AUTO_PORT_START": "19010",
|
||||
"RAP_MESH_LISTEN_AUTO_PORT_END": "19020",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443/",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443/",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINTS_JSON": `[{"endpoint_id":"node-a-lan","address":"10.10.0.20:19001"}]`,
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
|
||||
"RAP_MESH_CONNECTIVITY_MODE": "outbound_only",
|
||||
"RAP_MESH_NAT_TYPE": "symmetric",
|
||||
"RAP_MESH_LOCAL_SEGMENT_ID": "site-a",
|
||||
"RAP_MESH_NAT_GROUP_ID": "nat-a",
|
||||
"RAP_MESH_STUN_REFLEXIVE_ENDPOINT": "quic://203.0.113.20:19443/",
|
||||
"RAP_MESH_STUN_SERVER": "stun.example.test:3478",
|
||||
"RAP_MESH_RELAY_NODE_ID": "node-r",
|
||||
"RAP_MESH_RELAY_ENDPOINT": "quic://node-r.example.test:19443/",
|
||||
"RAP_MESH_REGION": "eu",
|
||||
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
|
||||
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
|
||||
@@ -67,6 +78,15 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
if !cfg.WorkloadSupervisionEnabled {
|
||||
t.Fatal("WorkloadSupervisionEnabled = false, want true")
|
||||
}
|
||||
if !cfg.WebIngressRuntimeEnabled {
|
||||
t.Fatal("WebIngressRuntimeEnabled = false, want true")
|
||||
}
|
||||
if cfg.WebIngressSigningPrivateKey != "private-key-b64" ||
|
||||
cfg.WebIngressSigningKeyID != "web-key-1" ||
|
||||
cfg.WebIngressTrustedKeysJSON != `{"web-key-1":"public-key-b64"}` ||
|
||||
cfg.WebIngressRuntimeServiceClasses != "platform_admin, cluster_admin" {
|
||||
t.Fatalf("unexpected web ingress key config: %+v", cfg)
|
||||
}
|
||||
if !cfg.MeshSyntheticRuntimeEnabled {
|
||||
t.Fatal("MeshSyntheticRuntimeEnabled = false, want true")
|
||||
}
|
||||
@@ -100,11 +120,17 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
if cfg.MeshListenPortMode != "auto" || cfg.MeshListenAutoPortStart != 19010 || cfg.MeshListenAutoPortEnd != 19020 {
|
||||
t.Fatalf("unexpected mesh listen port config: %+v", cfg)
|
||||
}
|
||||
if cfg.MeshAdvertiseEndpoint != "https://node-a.example.test:443" ||
|
||||
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:19443" ||
|
||||
cfg.MeshAdvertiseEndpointsJSON == "" ||
|
||||
cfg.MeshAdvertiseTransport != "wss" ||
|
||||
cfg.MeshAdvertiseTransport != "direct_quic" ||
|
||||
cfg.MeshConnectivityMode != "outbound_only" ||
|
||||
cfg.MeshNATType != "symmetric" ||
|
||||
cfg.MeshLocalSegmentID != "site-a" ||
|
||||
cfg.MeshNATGroupID != "nat-a" ||
|
||||
cfg.MeshSTUNReflexiveEndpoint != "quic://203.0.113.20:19443" ||
|
||||
cfg.MeshSTUNServer != "stun.example.test:3478" ||
|
||||
cfg.MeshRelayNodeID != "node-r" ||
|
||||
cfg.MeshRelayEndpoint != "quic://node-r.example.test:19443" ||
|
||||
cfg.MeshRegion != "eu" {
|
||||
t.Fatalf("unexpected mesh advertise config: %+v", cfg)
|
||||
}
|
||||
@@ -139,6 +165,9 @@ func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
|
||||
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
|
||||
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
|
||||
}
|
||||
if cfg.WebIngressRuntimeEnabled {
|
||||
t.Fatalf("web ingress runtime should default disabled: %+v", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
|
||||
@@ -162,3 +191,33 @@ func TestLoadConfigRejectsTooLargeProductionObservationSinkCapacity(t *testing.T
|
||||
t.Fatal("Load returned nil error for too-large sink capacity")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigNormalizesLegacyMeshAdvertiseTransport(t *testing.T) {
|
||||
cfg, err := Load(nil, map[string]string{
|
||||
"RAP_BACKEND_URL": "http://backend/api/v1",
|
||||
"RAP_NODE_NAME": "node-a",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT": "quic://node-a.example.test:19443",
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT": "wss",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Load returned error for legacy mesh advertise transport migration: %v", err)
|
||||
}
|
||||
if cfg.MeshAdvertiseTransport != "direct_quic" {
|
||||
t.Fatalf("transport = %q, want direct_quic", cfg.MeshAdvertiseTransport)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigNormalizesLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
|
||||
cfg, err := Load(nil, map[string]string{
|
||||
"RAP_BACKEND_URL": "http://backend/api/v1",
|
||||
"RAP_NODE_NAME": "node-a",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT": "https://node-a.example.test:443",
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT": "direct_quic",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Load returned error for legacy mesh advertise endpoint migration: %v", err)
|
||||
}
|
||||
if cfg.MeshAdvertiseEndpoint != "quic://node-a.example.test:443" {
|
||||
t.Fatalf("endpoint = %q, want quic scheme", cfg.MeshAdvertiseEndpoint)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package fabricproto
|
||||
|
||||
import "errors"
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrUnsupportedSessionFrame = errors.New("unsupported fabric session frame")
|
||||
@@ -62,6 +65,7 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
|
||||
TrafficClass: frame.TrafficClass,
|
||||
StreamID: frame.StreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: DataAckPayload(frame.Payload),
|
||||
}}, nil
|
||||
case FrameAck:
|
||||
if err := s.Ack(frame.StreamID, frame.Sequence); err != nil {
|
||||
@@ -103,6 +107,11 @@ func (s *Session) HandleFrame(frame Frame) (SessionEvent, []Frame, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func DataAckPayload(payload []byte) []byte {
|
||||
sum := sha256.Sum256(payload)
|
||||
return sum[:]
|
||||
}
|
||||
|
||||
func (s *Session) handleDataFrame(frame Frame) (SessionEvent, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package fabricproto
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
@@ -36,6 +37,9 @@ func TestHandleFrameOpensStreamAndReceivesData(t *testing.T) {
|
||||
if len(responses) != 1 || responses[0].Type != FrameAck || responses[0].StreamID != 7 || responses[0].Sequence != 11 {
|
||||
t.Fatalf("responses = %+v, want ack for stream 7 seq 11", responses)
|
||||
}
|
||||
if !bytes.Equal(responses[0].Payload, DataAckPayload([]byte("rdp-input"))) {
|
||||
t.Fatalf("ack checksum = %x, want sha256 payload checksum", responses[0].Payload)
|
||||
}
|
||||
snapshot := session.Snapshot()
|
||||
if snapshot.FramesReceived != 1 || snapshot.Streams[7].Received != 1 {
|
||||
t.Fatalf("received metrics = %+v stream=%+v", snapshot, snapshot.Streams[7])
|
||||
|
||||
@@ -136,6 +136,12 @@ func (cfg RuntimeConfig) ValidateInstall() error {
|
||||
if cfg.MeshListenAutoPortStart > 0 && cfg.MeshListenAutoPortEnd > 0 && cfg.MeshListenAutoPortStart > cfg.MeshListenAutoPortEnd {
|
||||
return errors.New("mesh listen auto port start must be less than or equal to end")
|
||||
}
|
||||
if cfg.MeshAdvertiseTransport != "" && !isQUICAdvertiseTransport(cfg.MeshAdvertiseTransport) {
|
||||
return errors.New("mesh advertise transport must be a QUIC transport label")
|
||||
}
|
||||
if hasLegacyEndpointScheme(cfg.MeshAdvertiseEndpoint) {
|
||||
return errors.New("mesh advertise endpoint must be a QUIC endpoint")
|
||||
}
|
||||
if cfg.ProductionObservationSinkCap < 0 {
|
||||
return errors.New("production observation sink capacity must not be negative")
|
||||
}
|
||||
@@ -153,3 +159,20 @@ func firstNonEmpty(value, fallback string) string {
|
||||
}
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func isQUICAdvertiseTransport(label string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(label)) {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||
return strings.HasPrefix(endpoint, "http://") ||
|
||||
strings.HasPrefix(endpoint, "https://") ||
|
||||
strings.HasPrefix(endpoint, "ws://") ||
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
}
|
||||
|
||||
@@ -73,7 +73,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
||||
VPNFabricQUICMaxStreamsPerConn: 24,
|
||||
VPNFabricQUICIdleTTLSeconds: 120,
|
||||
MeshListenAddr: ":19131",
|
||||
MeshAdvertiseEndpoint: "http://10.0.0.11:19131/",
|
||||
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443/",
|
||||
MeshAdvertiseTransport: "direct_quic",
|
||||
MeshConnectivityMode: "private_lan",
|
||||
})
|
||||
|
||||
@@ -94,7 +95,8 @@ func TestDockerRunArgsBuildNodeRuntimePlacement(t *testing.T) {
|
||||
"RAP_VPN_FABRIC_QUIC_MAX_STREAMS_PER_CONN=24",
|
||||
"RAP_VPN_FABRIC_QUIC_IDLE_TTL_SECONDS=120",
|
||||
"RAP_MESH_LISTEN_ADDR=:19131",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT=http://10.0.0.11:19131",
|
||||
"RAP_MESH_ADVERTISE_ENDPOINT=quic://10.0.0.11:19443",
|
||||
"RAP_MESH_ADVERTISE_TRANSPORT=direct_quic",
|
||||
"RAP_MESH_CONNECTIVITY_MODE=private_lan",
|
||||
"rap-node-agent:test",
|
||||
} {
|
||||
@@ -384,3 +386,35 @@ func TestValidateRequiresJoinTokenUnlessReplacingExistingState(t *testing.T) {
|
||||
t.Fatalf("replace update should allow missing join token: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsLegacyMeshAdvertiseTransport(t *testing.T) {
|
||||
err := RuntimeConfig{
|
||||
BackendURL: "http://control/api/v1",
|
||||
ClusterID: "cluster-1",
|
||||
JoinToken: "join-secret",
|
||||
NodeName: "node-a",
|
||||
MeshAdvertiseEndpoint: "quic://10.0.0.11:19443",
|
||||
MeshAdvertiseTransport: "wss",
|
||||
MeshQUICFabricEnabled: true,
|
||||
MeshQUICFabricListenAddr: ":19443",
|
||||
}.ValidateInstall()
|
||||
if err == nil || !strings.Contains(err.Error(), "QUIC transport") {
|
||||
t.Fatalf("expected QUIC transport validation error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsLegacyMeshAdvertiseEndpointScheme(t *testing.T) {
|
||||
err := RuntimeConfig{
|
||||
BackendURL: "http://control/api/v1",
|
||||
ClusterID: "cluster-1",
|
||||
JoinToken: "join-secret",
|
||||
NodeName: "node-a",
|
||||
MeshAdvertiseEndpoint: "http://10.0.0.11:19131",
|
||||
MeshAdvertiseTransport: "direct_quic",
|
||||
MeshQUICFabricEnabled: true,
|
||||
MeshQUICFabricListenAddr: ":19443",
|
||||
}.ValidateInstall()
|
||||
if err == nil || !strings.Contains(err.Error(), "QUIC endpoint") {
|
||||
t.Fatalf("expected QUIC endpoint validation error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
@@ -104,22 +105,37 @@ type NodeUpdatePlanResponse struct {
|
||||
}
|
||||
|
||||
type NodeUpdatePlan struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
Product string `json:"product"`
|
||||
CurrentVersion string `json:"current_version,omitempty"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
TargetVersion string `json:"target_version,omitempty"`
|
||||
Channel string `json:"channel,omitempty"`
|
||||
Strategy string `json:"strategy,omitempty"`
|
||||
RollbackAllowed bool `json:"rollback_allowed"`
|
||||
HealthWindowSec int `json:"health_window_seconds,omitempty"`
|
||||
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
|
||||
ProductionForwarding bool `json:"production_forwarding"`
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
Product string `json:"product"`
|
||||
CurrentVersion string `json:"current_version,omitempty"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
TargetVersion string `json:"target_version,omitempty"`
|
||||
Channel string `json:"channel,omitempty"`
|
||||
Strategy string `json:"strategy,omitempty"`
|
||||
RollbackAllowed bool `json:"rollback_allowed"`
|
||||
HealthWindowSec int `json:"health_window_seconds,omitempty"`
|
||||
Artifact *ReleaseArtifact `json:"artifact,omitempty"`
|
||||
AuthorityPayload json.RawMessage `json:"authority_payload,omitempty"`
|
||||
AuthoritySignature json.RawMessage `json:"authority_signature,omitempty"`
|
||||
AuthorityQuorum *clusterauth.QuorumEnvelope `json:"authority_quorum,omitempty"`
|
||||
ProductionForwarding bool `json:"production_forwarding"`
|
||||
}
|
||||
|
||||
type nodeUpdatePlanAuthorityPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
Product string `json:"product"`
|
||||
CurrentVersion string `json:"current_version,omitempty"`
|
||||
Action string `json:"action"`
|
||||
TargetVersion string `json:"target_version,omitempty"`
|
||||
ArtifactSHA256 string `json:"artifact_sha256,omitempty"`
|
||||
ArtifactURL string `json:"artifact_url,omitempty"`
|
||||
ControlPlaneOnly bool `json:"control_plane_only"`
|
||||
ProductionForwarding bool `json:"production_forwarding"`
|
||||
}
|
||||
|
||||
type ReleaseArtifact struct {
|
||||
@@ -516,9 +532,87 @@ func FetchNodeUpdatePlan(ctx context.Context, req UpdateRequest) (NodeUpdatePlan
|
||||
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
}
|
||||
if err := verifyNodeUpdatePlanAuthority(req, out.Plan); err != nil {
|
||||
return NodeUpdatePlan{}, err
|
||||
}
|
||||
return out.Plan, nil
|
||||
}
|
||||
|
||||
func verifyNodeUpdatePlanAuthority(req UpdateRequest, plan NodeUpdatePlan) error {
|
||||
identity, ok := pinnedUpdatePlanAuthority(req)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
if len(identity.ClusterAuthorityQuorum) > 0 {
|
||||
if plan.AuthorityQuorum == nil {
|
||||
return errors.New("update plan quorum authority is required by pinned cluster quorum")
|
||||
}
|
||||
var descriptor clusterauth.QuorumDescriptor
|
||||
if err := json.Unmarshal(identity.ClusterAuthorityQuorum, &descriptor); err != nil {
|
||||
return fmt.Errorf("invalid pinned cluster authority quorum: %w", err)
|
||||
}
|
||||
if len(plan.AuthorityPayload) == 0 {
|
||||
return errors.New("update plan authority payload is required by pinned cluster quorum")
|
||||
}
|
||||
if err := clusterauth.VerifyQuorumRaw(descriptor, plan.AuthorityPayload, *plan.AuthorityQuorum, "update-authority"); err != nil {
|
||||
return fmt.Errorf("update plan quorum authority rejected: %w", err)
|
||||
}
|
||||
return verifyNodeUpdatePlanAuthorityPayload(plan)
|
||||
}
|
||||
if len(plan.AuthorityPayload) == 0 || len(plan.AuthoritySignature) == 0 {
|
||||
return errors.New("update plan authority signature is required by pinned cluster authority")
|
||||
}
|
||||
var signature clusterauth.Signature
|
||||
if err := json.Unmarshal(plan.AuthoritySignature, &signature); err != nil {
|
||||
return fmt.Errorf("invalid update plan authority signature: %w", err)
|
||||
}
|
||||
if identity.ClusterAuthorityFingerprint != "" && signature.KeyFingerprint != identity.ClusterAuthorityFingerprint {
|
||||
return errors.New("update plan authority fingerprint mismatch")
|
||||
}
|
||||
if err := clusterauth.VerifyRaw(identity.ClusterAuthorityPublicKey, plan.AuthorityPayload, signature); err != nil {
|
||||
return fmt.Errorf("update plan authority signature rejected: %w", err)
|
||||
}
|
||||
return verifyNodeUpdatePlanAuthorityPayload(plan)
|
||||
}
|
||||
|
||||
func verifyNodeUpdatePlanAuthorityPayload(plan NodeUpdatePlan) error {
|
||||
var payload nodeUpdatePlanAuthorityPayload
|
||||
if err := json.Unmarshal(plan.AuthorityPayload, &payload); err != nil {
|
||||
return fmt.Errorf("invalid update plan authority payload: %w", err)
|
||||
}
|
||||
if payload.SchemaVersion != "rap.node_update_plan_authority.v1" ||
|
||||
payload.ClusterID != plan.ClusterID ||
|
||||
payload.NodeID != plan.NodeID ||
|
||||
payload.Product != plan.Product ||
|
||||
payload.CurrentVersion != plan.CurrentVersion ||
|
||||
payload.Action != plan.Action ||
|
||||
payload.TargetVersion != plan.TargetVersion ||
|
||||
payload.ProductionForwarding != plan.ProductionForwarding {
|
||||
return errors.New("update plan authority payload mismatch")
|
||||
}
|
||||
if plan.Artifact != nil {
|
||||
if payload.ArtifactSHA256 != plan.Artifact.SHA256 || payload.ArtifactURL != plan.Artifact.URL {
|
||||
return errors.New("update plan artifact authority payload mismatch")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func pinnedUpdatePlanAuthority(req UpdateRequest) (state.Identity, bool) {
|
||||
stateDir := strings.TrimSpace(req.StateDir)
|
||||
if stateDir == "" {
|
||||
return state.Identity{}, false
|
||||
}
|
||||
identity, err := state.Load(filepath.Join(stateDir, state.FileName))
|
||||
if err != nil {
|
||||
return state.Identity{}, false
|
||||
}
|
||||
if strings.TrimSpace(identity.ClusterAuthorityPublicKey) == "" {
|
||||
return state.Identity{}, false
|
||||
}
|
||||
return identity, true
|
||||
}
|
||||
|
||||
func resolveUpdateRequest(req UpdateRequest) (UpdateRequest, error) {
|
||||
req = req.Normalize()
|
||||
if err := req.Validate(); err != nil {
|
||||
|
||||
@@ -2,6 +2,9 @@ package hostagent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
cryptorand "crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
@@ -12,6 +15,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
clusterauth "github.com/example/remote-access-platform/agents/rap-node-agent/internal/authority"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
@@ -21,6 +25,101 @@ type updateRunner struct {
|
||||
inspectJSON string
|
||||
}
|
||||
|
||||
func writePinnedAuthorityIdentity(t *testing.T) (string, ed25519.PublicKey, ed25519.PrivateKey) {
|
||||
t.Helper()
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate authority key: %v", err)
|
||||
}
|
||||
dir := t.TempDir()
|
||||
identity := state.Identity{
|
||||
NodeID: "node-1",
|
||||
ClusterID: "cluster-1",
|
||||
NodeName: "node-a",
|
||||
IdentityStatus: "active",
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
ClusterAuthorityFingerprint: clusterauth.Fingerprint(publicKey),
|
||||
}
|
||||
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
|
||||
t.Fatalf("save identity: %v", err)
|
||||
}
|
||||
return dir, publicKey, privateKey
|
||||
}
|
||||
|
||||
func writePinnedQuorumIdentity(t *testing.T) (string, clusterauth.QuorumDescriptor, []ed25519.PrivateKey) {
|
||||
t.Helper()
|
||||
descriptor := clusterauth.QuorumDescriptor{
|
||||
SchemaVersion: clusterauth.QuorumSchemaVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: 2,
|
||||
}
|
||||
privateKeys := make([]ed25519.PrivateKey, 0, 3)
|
||||
for i := 0; i < 3; i++ {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(cryptorand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate authority key: %v", err)
|
||||
}
|
||||
descriptor.Members = append(descriptor.Members, clusterauth.QuorumMember{
|
||||
NodeID: fmt.Sprintf("authority-%d", i+1),
|
||||
Role: "update-authority",
|
||||
PublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
PublicKeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||
Scopes: []string{"update-authority"},
|
||||
})
|
||||
privateKeys = append(privateKeys, privateKey)
|
||||
}
|
||||
rawQuorum, err := json.Marshal(descriptor)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal quorum: %v", err)
|
||||
}
|
||||
dir := t.TempDir()
|
||||
identity := state.Identity{
|
||||
NodeID: "node-1",
|
||||
ClusterID: "cluster-1",
|
||||
NodeName: "node-a",
|
||||
IdentityStatus: "active",
|
||||
ClusterAuthorityQuorum: rawQuorum,
|
||||
}
|
||||
if err := state.Save(filepath.Join(dir, state.FileName), identity); err != nil {
|
||||
t.Fatalf("save identity: %v", err)
|
||||
}
|
||||
return dir, descriptor, privateKeys
|
||||
}
|
||||
|
||||
func signedAuthorityPayload(t *testing.T, publicKey ed25519.PublicKey, privateKey ed25519.PrivateKey, payload any) (json.RawMessage, clusterauth.Signature) {
|
||||
t.Helper()
|
||||
raw, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal payload: %v", err)
|
||||
}
|
||||
canonical, err := clusterauth.CanonicalJSON(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("canonical payload: %v", err)
|
||||
}
|
||||
return raw, clusterauth.Signature{
|
||||
SchemaVersion: clusterauth.SignatureSchemaVersion,
|
||||
Algorithm: clusterauth.AlgorithmEd25519,
|
||||
KeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||
}
|
||||
}
|
||||
|
||||
func signHostAgentPayload(t *testing.T, payload json.RawMessage, privateKey ed25519.PrivateKey) clusterauth.Signature {
|
||||
t.Helper()
|
||||
canonical, err := clusterauth.CanonicalJSON(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("canonical payload: %v", err)
|
||||
}
|
||||
publicKey := privateKey.Public().(ed25519.PublicKey)
|
||||
return clusterauth.Signature{
|
||||
SchemaVersion: clusterauth.SignatureSchemaVersion,
|
||||
Algorithm: clusterauth.AlgorithmEd25519,
|
||||
KeyFingerprint: clusterauth.Fingerprint(publicKey),
|
||||
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||
}
|
||||
}
|
||||
|
||||
func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.T) {
|
||||
urls := artifactURLsForBackend(ReleaseArtifact{
|
||||
URL: "/downloads/rap-node-agent-0.2.92.tar",
|
||||
@@ -41,6 +140,161 @@ func TestArtifactURLsForBackendResolvesControlPlaneRelativeDownloads(t *testing.
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchNodeUpdatePlanRejectsUnsignedPlanWithPinnedAuthority(t *testing.T) {
|
||||
stateDir, _, _ := writePinnedAuthorityIdentity(t)
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"node_update_plan": map[string]any{
|
||||
"schema_version": "rap.node_update_plan.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"reason": "already_current",
|
||||
"production_forwarding": false,
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
_, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||
BackendURL: server.URL,
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-1",
|
||||
StateDir: stateDir,
|
||||
CurrentVersion: "0.1.0",
|
||||
OS: "linux",
|
||||
Arch: "amd64",
|
||||
InstallType: "docker",
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "authority signature is required") {
|
||||
t.Fatalf("expected pinned authority rejection, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchNodeUpdatePlanAcceptsSignedPlanWithPinnedAuthority(t *testing.T) {
|
||||
stateDir, publicKey, privateKey := writePinnedAuthorityIdentity(t)
|
||||
plan := map[string]any{
|
||||
"schema_version": "rap.node_update_plan.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"reason": "already_current",
|
||||
"production_forwarding": false,
|
||||
}
|
||||
payload := map[string]any{
|
||||
"schema_version": "rap.node_update_plan_authority.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"target_version": "",
|
||||
"artifact_sha256": "",
|
||||
"control_plane_only": true,
|
||||
"production_forwarding": false,
|
||||
}
|
||||
rawPayload, signature := signedAuthorityPayload(t, publicKey, privateKey, payload)
|
||||
plan["authority_payload"] = json.RawMessage(rawPayload)
|
||||
plan["authority_signature"] = signature
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||
BackendURL: server.URL,
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-1",
|
||||
StateDir: stateDir,
|
||||
CurrentVersion: "0.1.0",
|
||||
OS: "linux",
|
||||
Arch: "amd64",
|
||||
InstallType: "docker",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("fetch signed plan: %v", err)
|
||||
}
|
||||
if got.Action != "none" || got.Reason != "already_current" {
|
||||
t.Fatalf("unexpected plan: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchNodeUpdatePlanAcceptsQuorumSignedPlan(t *testing.T) {
|
||||
stateDir, descriptor, privateKeys := writePinnedQuorumIdentity(t)
|
||||
plan := map[string]any{
|
||||
"schema_version": "rap.node_update_plan.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"reason": "already_current",
|
||||
"production_forwarding": false,
|
||||
}
|
||||
payload := map[string]any{
|
||||
"schema_version": "rap.node_update_plan_authority.v1",
|
||||
"cluster_id": "cluster-1",
|
||||
"node_id": "node-1",
|
||||
"product": "rap-node-agent",
|
||||
"current_version": "0.1.0",
|
||||
"action": "none",
|
||||
"target_version": "",
|
||||
"artifact_sha256": "",
|
||||
"control_plane_only": true,
|
||||
"production_forwarding": false,
|
||||
}
|
||||
rawPayload, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal payload: %v", err)
|
||||
}
|
||||
payloadHash, err := clusterauth.HashRaw(rawPayload)
|
||||
if err != nil {
|
||||
t.Fatalf("payload hash: %v", err)
|
||||
}
|
||||
quorumHash, err := clusterauth.QuorumDescriptorHash(descriptor)
|
||||
if err != nil {
|
||||
t.Fatalf("quorum hash: %v", err)
|
||||
}
|
||||
plan["authority_payload"] = json.RawMessage(rawPayload)
|
||||
plan["authority_quorum"] = clusterauth.QuorumEnvelope{
|
||||
SchemaVersion: clusterauth.QuorumEnvelopeVersion,
|
||||
ClusterID: "cluster-1",
|
||||
Epoch: "epoch-1",
|
||||
Threshold: 2,
|
||||
PayloadSHA256: payloadHash,
|
||||
QuorumSHA256: quorumHash,
|
||||
Signatures: []clusterauth.Signature{
|
||||
signHostAgentPayload(t, rawPayload, privateKeys[0]),
|
||||
signHostAgentPayload(t, rawPayload, privateKeys[1]),
|
||||
},
|
||||
}
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"node_update_plan": plan})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
got, err := FetchNodeUpdatePlan(context.Background(), UpdateRequest{
|
||||
BackendURL: server.URL,
|
||||
ClusterID: "cluster-1",
|
||||
NodeID: "node-1",
|
||||
StateDir: stateDir,
|
||||
CurrentVersion: "0.1.0",
|
||||
OS: "linux",
|
||||
Arch: "amd64",
|
||||
InstallType: "docker",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("fetch quorum plan: %v", err)
|
||||
}
|
||||
if got.Action != "none" {
|
||||
t.Fatalf("unexpected plan: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *updateRunner) Run(_ context.Context, name string, args ...string) (string, error) {
|
||||
r.calls = append(r.calls, append([]string{name}, args...))
|
||||
if len(args) >= 2 && args[0] == "inspect" && args[1] == "--format" {
|
||||
|
||||
@@ -11,8 +11,9 @@ import (
|
||||
|
||||
func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -37,8 +38,9 @@ func TestClientFabricSessionFrameRoundTrip(t *testing.T) {
|
||||
|
||||
func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -80,8 +82,9 @@ func TestClientFabricSessionPersistentRoundTrips(t *testing.T) {
|
||||
|
||||
func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -135,8 +138,9 @@ func TestClientFabricSessionPersistentDataAcks(t *testing.T) {
|
||||
|
||||
func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -202,8 +206,9 @@ func TestClientFabricSessionPumpMovesIndependentFrames(t *testing.T) {
|
||||
|
||||
func TestClientFabricSessionReportsRejectedStatus(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
|
||||
@@ -72,6 +72,10 @@ const (
|
||||
MaxProductionEnvelopePayloadBytes = 4096
|
||||
MaxProductionVPNPacketPayloadBytes = 256 * 1024
|
||||
MaxProductionEnvelopeFutureSkew = time.Minute
|
||||
ProductionForwardQUICStreamID = 1
|
||||
WebIngressForwardQUICStreamID = 2
|
||||
FabricControlForwardQUICStreamID = 3
|
||||
SyntheticForwardQUICStreamID = 1001
|
||||
)
|
||||
|
||||
type PeerIdentity struct {
|
||||
|
||||
@@ -47,6 +47,9 @@ func RankPeerEndpointCandidates(candidates []PeerEndpointCandidate, opts Endpoin
|
||||
}
|
||||
out := make([]ScoredPeerEndpointCandidate, 0, len(candidates))
|
||||
for _, candidate := range candidates {
|
||||
if endpointHasUnspecifiedHost(candidate.Address) {
|
||||
continue
|
||||
}
|
||||
out = append(out, scorePeerEndpointCandidate(candidate, opts))
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
@@ -68,25 +71,25 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
||||
score := 100
|
||||
reasons := []string{"base"}
|
||||
|
||||
switch candidate.Transport {
|
||||
switch strings.ToLower(strings.TrimSpace(candidate.Transport)) {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
||||
score += 45
|
||||
reasons = append(reasons, "transport:quic")
|
||||
case "direct_tcp_tls", "direct_http", "direct_https":
|
||||
score += 35
|
||||
reasons = append(reasons, "transport:direct")
|
||||
case "wss":
|
||||
score += 25
|
||||
reasons = append(reasons, "transport:wss")
|
||||
case "outbound_reverse":
|
||||
score += 10
|
||||
reasons = append(reasons, "transport:outbound_reverse")
|
||||
case "relay":
|
||||
case "lan_quic":
|
||||
score += 42
|
||||
reasons = append(reasons, "transport:lan_quic")
|
||||
case "ice_quic":
|
||||
score += 38
|
||||
reasons = append(reasons, "transport:ice_quic")
|
||||
case "reverse_quic":
|
||||
score += 15
|
||||
reasons = append(reasons, "transport:reverse_quic")
|
||||
case "relay_quic":
|
||||
score += 5
|
||||
reasons = append(reasons, "transport:relay")
|
||||
reasons = append(reasons, "transport:relay_quic")
|
||||
default:
|
||||
score -= 100
|
||||
reasons = append(reasons, "transport:unknown")
|
||||
reasons = append(reasons, "transport:non_quic_rejected")
|
||||
}
|
||||
|
||||
switch candidate.Reachability {
|
||||
@@ -173,7 +176,8 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
||||
score += 8
|
||||
reasons = append(reasons, "channel:control-direct")
|
||||
}
|
||||
if candidate.Transport == "relay" {
|
||||
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
||||
if transport == "relay" || transport == "relay_quic" {
|
||||
score -= 8
|
||||
reasons = append(reasons, "channel:control-relay-penalty")
|
||||
}
|
||||
@@ -234,14 +238,20 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
|
||||
}
|
||||
switch {
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
|
||||
score += 18
|
||||
score += 24
|
||||
reasons = append(reasons, "latency:low")
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 150:
|
||||
score += 8
|
||||
reasons = append(reasons, "latency:moderate")
|
||||
case observation.LastLatencyMs > 0:
|
||||
score -= 10
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 300:
|
||||
score -= 12
|
||||
reasons = append(reasons, "latency:high")
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 750:
|
||||
score -= 32
|
||||
reasons = append(reasons, "latency:very_high")
|
||||
case observation.LastLatencyMs > 0:
|
||||
score -= 60
|
||||
reasons = append(reasons, "latency:extreme")
|
||||
}
|
||||
if observation.ReliabilityScore > 0 {
|
||||
switch {
|
||||
|
||||
@@ -13,7 +13,7 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Transport: "relay_quic",
|
||||
Address: "relay.example.test/node-b",
|
||||
Reachability: "relay",
|
||||
NATType: "symmetric",
|
||||
@@ -25,8 +25,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -38,8 +38,8 @@ func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-private-stale",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "10.0.0.5:443",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://10.0.0.5:19443",
|
||||
Reachability: "private",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -74,8 +74,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
|
||||
{
|
||||
EndpointID: "endpoint-b",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.21:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.21:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -84,8 +84,8 @@ func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
|
||||
{
|
||||
EndpointID: "endpoint-a",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -103,10 +103,10 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
|
||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "wss://node-b.example.test",
|
||||
Transport: "relay_quic",
|
||||
Address: "quic://relay.example.test:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -138,14 +138,44 @@ func TestRankPeerEndpointCandidatesPrefersQUICFastPath(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesDropsUnspecifiedQUICEndpoint(t *testing.T) {
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-unspecified",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://[::]:19131",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19131",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{})
|
||||
if len(ranked) != 1 || ranked[0].Candidate.EndpointID != "node-b-public" {
|
||||
t.Fatalf("unspecified endpoint was not dropped: %+v", ranked)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -155,8 +185,8 @@ func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T)
|
||||
{
|
||||
EndpointID: "node-b-corp-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "10.24.10.20:19001",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://10.24.10.20:19443",
|
||||
Reachability: "private",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -184,7 +214,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
|
||||
{
|
||||
EndpointID: "node-b-outbound",
|
||||
NodeID: "node-b",
|
||||
Transport: "outbound_reverse",
|
||||
Transport: "reverse_quic",
|
||||
Address: "node-b.reverse.local",
|
||||
Reachability: "outbound_only",
|
||||
NATType: "symmetric",
|
||||
@@ -194,7 +224,7 @@ func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Transport: "relay_quic",
|
||||
Address: "relay.example.test/node-b",
|
||||
Reachability: "relay",
|
||||
NATType: "blocked",
|
||||
@@ -222,18 +252,18 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
EndpointID: "node-b-ice",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "node-b.example.test",
|
||||
Transport: "ice_quic",
|
||||
Address: "quic://node-b.example.test:19443",
|
||||
Reachability: "public",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -253,8 +283,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
||||
ReliabilityScore: 50,
|
||||
ObservedAt: now.Add(-time.Minute),
|
||||
},
|
||||
"node-b-wss": {
|
||||
EndpointID: "node-b-wss",
|
||||
"node-b-ice": {
|
||||
EndpointID: "node-b-ice",
|
||||
LastLatencyMs: 35,
|
||||
SuccessCount: 8,
|
||||
ReliabilityScore: 95,
|
||||
@@ -262,8 +292,8 @@ func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
||||
},
|
||||
},
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-wss" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-wss: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
if ranked[0].Candidate.EndpointID != "node-b-ice" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-ice: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "latency:low") || !containsReason(ranked[0].Reasons, "reliability:high") {
|
||||
t.Fatalf("top reasons missing health hints: %+v", ranked[0].Reasons)
|
||||
@@ -279,8 +309,8 @@ func TestRankPeerEndpointCandidatesTreatsStaleObservationAsPenalty(t *testing.T)
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -321,10 +351,10 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
EndpointID: "node-b-ice",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "https://node-b.example.test:443",
|
||||
Transport: "ice_quic",
|
||||
Address: "quic://node-b.example.test:19444",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
@@ -345,14 +375,81 @@ func TestRankPeerEndpointCandidatesDoesNotRewardZeroLatencyFailure(t *testing.T)
|
||||
},
|
||||
MaxObservationAge: time.Minute,
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-wss" {
|
||||
t.Fatalf("top endpoint = %q, want wss after repeated quic failures: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
if ranked[0].Candidate.EndpointID != "node-b-ice" {
|
||||
t.Fatalf("top endpoint = %q, want ice_quic after repeated direct QUIC failures: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if containsReason(ranked[1].Reasons, "latency:moderate") {
|
||||
t.Fatalf("zero latency failure was rewarded as moderate latency: %+v", ranked[1].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesPenalizesSevereLatencyGradient(t *testing.T) {
|
||||
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://10.0.0.2:19443",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-bad-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay_quic",
|
||||
Address: "quic://relay.example.test:19443",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
}
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
Now: now,
|
||||
MaxVerificationAge: time.Minute,
|
||||
MaxObservationAge: time.Minute,
|
||||
Observations: map[string]EndpointCandidateHealthObservation{
|
||||
"node-b-lan": {
|
||||
EndpointID: "node-b-lan",
|
||||
LastLatencyMs: 4,
|
||||
ReliabilityScore: 95,
|
||||
ObservedAt: now,
|
||||
},
|
||||
"node-b-wan": {
|
||||
EndpointID: "node-b-wan",
|
||||
LastLatencyMs: 420,
|
||||
ReliabilityScore: 95,
|
||||
ObservedAt: now,
|
||||
},
|
||||
"node-b-bad-relay": {
|
||||
EndpointID: "node-b-bad-relay",
|
||||
LastLatencyMs: 900,
|
||||
ReliabilityScore: 95,
|
||||
ObservedAt: now,
|
||||
},
|
||||
},
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-lan" || ranked[1].Candidate.EndpointID != "node-b-wan" || ranked[2].Candidate.EndpointID != "node-b-bad-relay" {
|
||||
t.Fatalf("ranked endpoints = %+v, want lan, wan, bad relay", ranked)
|
||||
}
|
||||
if !containsReason(ranked[1].Reasons, "latency:very_high") {
|
||||
t.Fatalf("wan reasons = %+v, want latency:very_high", ranked[1].Reasons)
|
||||
}
|
||||
if !containsReason(ranked[2].Reasons, "latency:extreme") {
|
||||
t.Fatalf("relay reasons = %+v, want latency:extreme", ranked[2].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesTreatsCapacityAsSoftPressure(t *testing.T) {
|
||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
|
||||
|
||||
@@ -0,0 +1,217 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type FabricChannelRouteEventType string
|
||||
|
||||
const (
|
||||
FabricChannelRouteEventNone FabricChannelRouteEventType = ""
|
||||
FabricChannelRouteEventOpened FabricChannelRouteEventType = "opened"
|
||||
FabricChannelRouteEventReroute FabricChannelRouteEventType = "reroute"
|
||||
)
|
||||
|
||||
var ErrFabricRouteRerouteSuppressed = errors.New("fabric route reroute suppressed")
|
||||
|
||||
type FabricChannelRouterConfig struct {
|
||||
SchedulerConfig FabricRouteSchedulerConfig
|
||||
MaxAckLatencyMs int64
|
||||
MaxRoutePressure int
|
||||
MinRerouteInterval time.Duration
|
||||
ProjectedChannelCost int
|
||||
}
|
||||
|
||||
type FabricChannelRouter struct {
|
||||
Config FabricChannelRouterConfig
|
||||
Scheduler FabricRouteScheduler
|
||||
}
|
||||
|
||||
type FabricChannelObservation struct {
|
||||
ChannelID string
|
||||
RouteID string
|
||||
AckLatencyMs int64
|
||||
Failed bool
|
||||
BytesSent uint64
|
||||
BytesRecv uint64
|
||||
FramesSent uint64
|
||||
FramesRecv uint64
|
||||
Reason string
|
||||
ObservedAt time.Time
|
||||
}
|
||||
|
||||
type FabricChannelRouteEvent struct {
|
||||
Type FabricChannelRouteEventType
|
||||
Reason string
|
||||
PreviousRoute FabricRoute
|
||||
NextRoute FabricRoute
|
||||
Choice FabricRouteChoice
|
||||
Observation FabricChannelObservation
|
||||
Channel FabricChannel
|
||||
OccurredAt time.Time
|
||||
}
|
||||
|
||||
func NewFabricChannelRouter(cfg FabricChannelRouterConfig) FabricChannelRouter {
|
||||
cfg = normalizeFabricChannelRouterConfig(cfg)
|
||||
return FabricChannelRouter{
|
||||
Config: cfg,
|
||||
Scheduler: NewFabricRouteScheduler(cfg.SchedulerConfig),
|
||||
}
|
||||
}
|
||||
|
||||
func (r FabricChannelRouter) OpenChannel(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
choice, err := r.Scheduler.ChooseRoute(spec, routeSet, now)
|
||||
if err != nil {
|
||||
return FabricChannel{}, FabricChannelRouteEvent{}, err
|
||||
}
|
||||
channel := FabricChannel{
|
||||
Spec: spec,
|
||||
State: FabricChannelOpen,
|
||||
RouteID: choice.Route.RouteID,
|
||||
TargetNode: choice.Route.DestinationNodeID,
|
||||
OpenedAt: now,
|
||||
}
|
||||
event := FabricChannelRouteEvent{
|
||||
Type: FabricChannelRouteEventOpened,
|
||||
Reason: choice.Reason,
|
||||
NextRoute: choice.Route,
|
||||
Choice: choice,
|
||||
Channel: channel,
|
||||
OccurredAt: now,
|
||||
}
|
||||
return channel, event, nil
|
||||
}
|
||||
|
||||
func (r FabricChannelRouter) ObserveChannel(channel FabricChannel, routeSet FabricRouteSet, observation FabricChannelObservation, now time.Time) (FabricChannel, FabricChannelRouteEvent, error) {
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
if observation.ObservedAt.IsZero() {
|
||||
observation.ObservedAt = now
|
||||
}
|
||||
channel.BytesSent += observation.BytesSent
|
||||
channel.BytesRecv += observation.BytesRecv
|
||||
channel.FramesSent += observation.FramesSent
|
||||
channel.FramesRecv += observation.FramesRecv
|
||||
if channel.State == "" {
|
||||
channel.State = FabricChannelOpen
|
||||
}
|
||||
if !r.shouldReroute(channel, observation, routeSet, now) {
|
||||
return channel, FabricChannelRouteEvent{Type: FabricChannelRouteEventNone, Observation: observation, Channel: channel, OccurredAt: now}, nil
|
||||
}
|
||||
previous, _ := findFabricRoute(routeSet, channel.RouteID)
|
||||
choice, err := r.chooseAlternativeRoute(channel.Spec, routeSet, channel.RouteID, now)
|
||||
if err != nil {
|
||||
return channel, FabricChannelRouteEvent{}, err
|
||||
}
|
||||
channel.RouteID = choice.Route.RouteID
|
||||
channel.TargetNode = choice.Route.DestinationNodeID
|
||||
channel.LastReroute = now
|
||||
channel.RerouteCount++
|
||||
reason := observation.Reason
|
||||
if strings.TrimSpace(reason) == "" {
|
||||
reason = rerouteReason(r.Config, observation, previous)
|
||||
}
|
||||
event := FabricChannelRouteEvent{
|
||||
Type: FabricChannelRouteEventReroute,
|
||||
Reason: reason,
|
||||
PreviousRoute: previous,
|
||||
NextRoute: choice.Route,
|
||||
Choice: choice,
|
||||
Observation: observation,
|
||||
Channel: channel,
|
||||
OccurredAt: now,
|
||||
}
|
||||
return channel, event, nil
|
||||
}
|
||||
|
||||
func (r FabricChannelRouter) shouldReroute(channel FabricChannel, observation FabricChannelObservation, routeSet FabricRouteSet, now time.Time) bool {
|
||||
cfg := normalizeFabricChannelRouterConfig(r.Config)
|
||||
if cfg.MinRerouteInterval > 0 && !channel.LastReroute.IsZero() && now.Sub(channel.LastReroute) < cfg.MinRerouteInterval {
|
||||
return false
|
||||
}
|
||||
if observation.Failed {
|
||||
return true
|
||||
}
|
||||
if cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs {
|
||||
return true
|
||||
}
|
||||
if cfg.MaxRoutePressure > 0 {
|
||||
if route, ok := findFabricRoute(routeSet, channel.RouteID); ok && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (r FabricChannelRouter) chooseAlternativeRoute(spec FabricChannelSpec, routeSet FabricRouteSet, currentRouteID string, now time.Time) (FabricRouteChoice, error) {
|
||||
routes := flattenFabricRouteSet(routeSet)
|
||||
alternatives := make([]FabricRoute, 0, len(routes))
|
||||
for _, route := range routes {
|
||||
if route.RouteID == currentRouteID {
|
||||
continue
|
||||
}
|
||||
alternatives = append(alternatives, route)
|
||||
}
|
||||
if len(alternatives) == 0 {
|
||||
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||
}
|
||||
return r.Scheduler.ChooseRoute(spec, routeSetFromRoutes(routeSet, alternatives), now)
|
||||
}
|
||||
|
||||
func normalizeFabricChannelRouterConfig(cfg FabricChannelRouterConfig) FabricChannelRouterConfig {
|
||||
if cfg.ProjectedChannelCost <= 0 {
|
||||
cfg.ProjectedChannelCost = 1
|
||||
}
|
||||
if cfg.SchedulerConfig.ProjectedChannelCost <= 0 {
|
||||
cfg.SchedulerConfig.ProjectedChannelCost = cfg.ProjectedChannelCost
|
||||
}
|
||||
if cfg.MaxRoutePressure <= 0 {
|
||||
cfg.MaxRoutePressure = 90
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func rerouteReason(cfg FabricChannelRouterConfig, observation FabricChannelObservation, route FabricRoute) string {
|
||||
cfg = normalizeFabricChannelRouterConfig(cfg)
|
||||
switch {
|
||||
case observation.Failed:
|
||||
return "route_failure"
|
||||
case cfg.MaxAckLatencyMs > 0 && observation.AckLatencyMs > cfg.MaxAckLatencyMs:
|
||||
return "ack_latency_threshold"
|
||||
case cfg.MaxRoutePressure > 0 && fabricRoutePressurePercent(route, cfg.ProjectedChannelCost) > cfg.MaxRoutePressure:
|
||||
return "route_capacity_pressure"
|
||||
default:
|
||||
return "route_degraded"
|
||||
}
|
||||
}
|
||||
|
||||
func findFabricRoute(routeSet FabricRouteSet, routeID string) (FabricRoute, bool) {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if routeID == "" {
|
||||
return FabricRoute{}, false
|
||||
}
|
||||
for _, route := range flattenFabricRouteSet(routeSet) {
|
||||
if route.RouteID == routeID {
|
||||
return route, true
|
||||
}
|
||||
}
|
||||
return FabricRoute{}, false
|
||||
}
|
||||
|
||||
func routeSetFromRoutes(template FabricRouteSet, routes []FabricRoute) FabricRouteSet {
|
||||
out := FabricRouteSet{TargetKind: template.TargetKind, TargetID: template.TargetID}
|
||||
if len(routes) == 0 {
|
||||
return out
|
||||
}
|
||||
out.Primary = routes[0]
|
||||
if len(routes) > 1 {
|
||||
out.WarmStandby = append(out.WarmStandby, routes[1:]...)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,151 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFabricChannelRouterOpensOnBestRoute(t *testing.T) {
|
||||
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
|
||||
now := time.Now()
|
||||
channel, event, err := router.OpenChannel(testFabricChannelSpec(FabricChannelTargetNode, "node-b"), FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testFabricRoute("route-slow", "node-b", 80, 100, 0, true),
|
||||
WarmStandby: []FabricRoute{
|
||||
testFabricRoute("route-fast", "node-b", 15, 100, 0, true),
|
||||
},
|
||||
}, now)
|
||||
if err != nil {
|
||||
t.Fatalf("open channel: %v", err)
|
||||
}
|
||||
if channel.RouteID != "route-fast" || channel.State != FabricChannelOpen {
|
||||
t.Fatalf("channel = %+v, want route-fast open", channel)
|
||||
}
|
||||
if event.Type != FabricChannelRouteEventOpened || event.NextRoute.RouteID != "route-fast" {
|
||||
t.Fatalf("event = %+v", event)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRouterReroutesOnSlowAck(t *testing.T) {
|
||||
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30})
|
||||
now := time.Now()
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
|
||||
WarmStandby: []FabricRoute{
|
||||
testFabricRoute("route-standby", "node-b", 20, 100, 0, true),
|
||||
},
|
||||
}
|
||||
channel := FabricChannel{
|
||||
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
|
||||
State: FabricChannelOpen,
|
||||
RouteID: "route-primary",
|
||||
OpenedAt: now.Add(-time.Minute),
|
||||
}
|
||||
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: channel.Spec.ChannelID,
|
||||
RouteID: channel.RouteID,
|
||||
AckLatencyMs: 120,
|
||||
BytesSent: 4096,
|
||||
FramesSent: 4,
|
||||
}, now)
|
||||
if err != nil {
|
||||
t.Fatalf("observe channel: %v", err)
|
||||
}
|
||||
if event.Type != FabricChannelRouteEventReroute || event.Reason != "ack_latency_threshold" {
|
||||
t.Fatalf("event = %+v", event)
|
||||
}
|
||||
if updated.RouteID != "route-standby" || updated.RerouteCount != 1 || updated.BytesSent != 4096 || updated.FramesSent != 4 {
|
||||
t.Fatalf("updated = %+v", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRouterReroutesPoolTargetOnFailure(t *testing.T) {
|
||||
router := NewFabricChannelRouter(FabricChannelRouterConfig{})
|
||||
now := time.Now()
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-egress",
|
||||
Primary: testFabricPoolRoute("route-node-b", "node-b", 10, true),
|
||||
WarmStandby: []FabricRoute{
|
||||
testFabricPoolRoute("route-node-c", "node-c", 20, true),
|
||||
},
|
||||
}
|
||||
channel := FabricChannel{
|
||||
Spec: testFabricChannelSpec(FabricChannelTargetPool, "pool-egress"),
|
||||
State: FabricChannelOpen,
|
||||
RouteID: "route-node-b",
|
||||
TargetNode: "node-b",
|
||||
OpenedAt: now.Add(-time.Minute),
|
||||
}
|
||||
updated, event, err := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: channel.Spec.ChannelID,
|
||||
RouteID: channel.RouteID,
|
||||
Failed: true,
|
||||
Reason: "target_failed",
|
||||
}, now)
|
||||
if err != nil {
|
||||
t.Fatalf("observe channel: %v", err)
|
||||
}
|
||||
if event.Type != FabricChannelRouteEventReroute || event.PreviousRoute.RouteID != "route-node-b" || event.NextRoute.RouteID != "route-node-c" {
|
||||
t.Fatalf("event = %+v", event)
|
||||
}
|
||||
if updated.TargetNode != "node-c" || updated.RouteID != "route-node-c" {
|
||||
t.Fatalf("updated = %+v", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRouterSuppressesRerouteInsideHysteresis(t *testing.T) {
|
||||
router := NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 30, MinRerouteInterval: time.Minute})
|
||||
now := time.Now()
|
||||
channel := FabricChannel{
|
||||
Spec: testFabricChannelSpec(FabricChannelTargetNode, "node-b"),
|
||||
State: FabricChannelOpen,
|
||||
RouteID: "route-primary",
|
||||
LastReroute: now.Add(-10 * time.Second),
|
||||
}
|
||||
updated, event, err := router.ObserveChannel(channel, FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testFabricRoute("route-primary", "node-b", 10, 100, 0, true),
|
||||
WarmStandby: []FabricRoute{testFabricRoute("route-standby", "node-b", 20, 100, 0, true)},
|
||||
}, FabricChannelObservation{AckLatencyMs: 120}, now)
|
||||
if err != nil {
|
||||
t.Fatalf("observe channel: %v", err)
|
||||
}
|
||||
if event.Type != FabricChannelRouteEventNone || updated.RouteID != "route-primary" {
|
||||
t.Fatalf("event=%+v updated=%+v", event, updated)
|
||||
}
|
||||
}
|
||||
|
||||
func testFabricChannelSpec(kind FabricChannelTargetKind, targetID string) FabricChannelSpec {
|
||||
return FabricChannelSpec{
|
||||
ChannelID: "channel-1",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: kind,
|
||||
TargetID: targetID,
|
||||
}
|
||||
}
|
||||
|
||||
func testFabricRoute(routeID string, destination string, latency int, capacity int, active int, healthy bool) FabricRoute {
|
||||
return FabricRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: destination,
|
||||
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: destination}},
|
||||
BaseLatencyMs: latency,
|
||||
Capacity: capacity,
|
||||
ActiveChannels: active,
|
||||
Healthy: healthy,
|
||||
}
|
||||
}
|
||||
|
||||
func testFabricPoolRoute(routeID string, destination string, latency int, healthy bool) FabricRoute {
|
||||
route := testFabricRoute(routeID, destination, latency, 100, 0, healthy)
|
||||
route.PoolID = "pool-egress"
|
||||
return route
|
||||
}
|
||||
@@ -0,0 +1,487 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
type FabricChannelRuntimeConfig struct {
|
||||
RouterConfig FabricChannelRouterConfig
|
||||
StreamID uint64
|
||||
TrafficClass fabricproto.TrafficClass
|
||||
Timeout time.Duration
|
||||
MaxPayload int
|
||||
RouteHealthTTL time.Duration
|
||||
}
|
||||
|
||||
type FabricChannelRuntime struct {
|
||||
Transport FabricTransport
|
||||
Router FabricChannelRouter
|
||||
Pressure *FabricRoutePressureTracker
|
||||
Health *FabricRouteHealthTracker
|
||||
Config FabricChannelRuntimeConfig
|
||||
}
|
||||
|
||||
type FabricChannelRuntimeResult struct {
|
||||
Channel FabricChannel
|
||||
BytesSent uint64
|
||||
BytesRecv uint64
|
||||
FramesSent uint64
|
||||
FramesRecv uint64
|
||||
AcksReceived uint64
|
||||
RouteEvents []FabricChannelRouteEvent
|
||||
RouteAttempts []string
|
||||
MigrationEvents int
|
||||
RoutePressure FabricRoutePressureSnapshot
|
||||
RouteHealth FabricRouteHealthSnapshot
|
||||
}
|
||||
|
||||
type FabricChannelRequestResponseResult struct {
|
||||
FabricChannelRuntimeResult
|
||||
ResponsePayload []byte
|
||||
}
|
||||
|
||||
func NewFabricChannelRuntime(transport FabricTransport, cfg FabricChannelRuntimeConfig) *FabricChannelRuntime {
|
||||
if cfg.StreamID == 0 {
|
||||
cfg.StreamID = 2
|
||||
}
|
||||
if cfg.TrafficClass == 0 {
|
||||
cfg.TrafficClass = fabricproto.TrafficClassBulk
|
||||
}
|
||||
if cfg.Timeout <= 0 {
|
||||
cfg.Timeout = 30 * time.Second
|
||||
}
|
||||
if cfg.MaxPayload <= 0 {
|
||||
cfg.MaxPayload = fabricproto.DefaultMaxPayload
|
||||
}
|
||||
return &FabricChannelRuntime{
|
||||
Transport: transport,
|
||||
Router: NewFabricChannelRouter(cfg.RouterConfig),
|
||||
Pressure: NewFabricRoutePressureTracker(),
|
||||
Health: NewFabricRouteHealthTracker(cfg.RouteHealthTTL),
|
||||
Config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) SendReliable(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payloads [][]byte) (FabricChannelRuntimeResult, error) {
|
||||
if r == nil || r.Transport == nil {
|
||||
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
routeSet = r.routeSetForScheduling(routeSet)
|
||||
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
|
||||
if err != nil {
|
||||
return FabricChannelRuntimeResult{}, err
|
||||
}
|
||||
result := FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}}
|
||||
sequence := uint64(0)
|
||||
index := 0
|
||||
for index < len(payloads) {
|
||||
routeSet = r.routeSetForScheduling(routeSet)
|
||||
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||
if !ok {
|
||||
return result, ErrFabricRouteNotFound
|
||||
}
|
||||
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
|
||||
target, err := FabricTransportTargetForRoute(route)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
releaseRoute := r.acquireRoute(route.RouteID)
|
||||
session, err := r.Transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
releaseRoute()
|
||||
r.markRouteFailure(route.RouteID, err)
|
||||
updated, event, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "connect_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
result.Channel = channel
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, event)
|
||||
result.MigrationEvents++
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return result, rerouteErr
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
migrated, sendErr := r.sendOnSession(ctx, session, &channel, routeSet, route, payloads, &index, &sequence, &result)
|
||||
_ = session.Close()
|
||||
releaseRoute()
|
||||
result.Channel = channel
|
||||
if sendErr != nil {
|
||||
return result, sendErr
|
||||
}
|
||||
if !migrated {
|
||||
break
|
||||
}
|
||||
}
|
||||
result.Channel = channel
|
||||
result.RoutePressure = r.snapshotRoutePressure()
|
||||
result.RouteHealth = r.snapshotRouteHealth()
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) SendRequestResponse(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (FabricChannelRequestResponseResult, error) {
|
||||
if r == nil || r.Transport == nil {
|
||||
return FabricChannelRequestResponseResult{}, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
if len(payload) > r.Config.MaxPayload {
|
||||
return FabricChannelRequestResponseResult{}, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), r.Config.MaxPayload)
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
routeSet = r.routeSetForScheduling(routeSet)
|
||||
channel, event, err := r.Router.OpenChannel(spec, routeSet, now)
|
||||
if err != nil {
|
||||
return FabricChannelRequestResponseResult{}, err
|
||||
}
|
||||
result := FabricChannelRequestResponseResult{
|
||||
FabricChannelRuntimeResult: FabricChannelRuntimeResult{Channel: channel, RouteEvents: []FabricChannelRouteEvent{event}},
|
||||
}
|
||||
sequence := uint64(1)
|
||||
for {
|
||||
routeSet = r.routeSetForScheduling(routeSet)
|
||||
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||
if !ok {
|
||||
return result, ErrFabricRouteNotFound
|
||||
}
|
||||
result.RouteAttempts = append(result.RouteAttempts, route.RouteID)
|
||||
target, err := FabricTransportTargetForRoute(route)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
releaseRoute := r.acquireRoute(route.RouteID)
|
||||
session, err := r.Transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
releaseRoute()
|
||||
r.markRouteFailure(route.RouteID, err)
|
||||
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "connect_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
result.Channel = channel
|
||||
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||
result.MigrationEvents++
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return result, rerouteErr
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
response, ackMs, sendErr := r.sendRequestResponseOnSession(ctx, session, route.RouteID, spec.ChannelID, payload, sequence)
|
||||
_ = session.Close()
|
||||
releaseRoute()
|
||||
result.Channel = channel
|
||||
if sendErr == nil {
|
||||
r.markRouteSuccess(route.RouteID)
|
||||
result.BytesSent += uint64(len(payload))
|
||||
result.FramesSent++
|
||||
result.BytesRecv += uint64(len(response))
|
||||
result.FramesRecv++
|
||||
result.AcksReceived++
|
||||
updated, routeEvent, observeErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
AckLatencyMs: ackMs,
|
||||
BytesSent: uint64(len(payload)),
|
||||
FramesSent: 1,
|
||||
BytesRecv: uint64(len(response)),
|
||||
FramesRecv: 1,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
result.Channel = channel
|
||||
if observeErr != nil {
|
||||
return result, observeErr
|
||||
}
|
||||
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||
result.MigrationEvents++
|
||||
}
|
||||
result.ResponsePayload = response
|
||||
result.RoutePressure = r.snapshotRoutePressure()
|
||||
result.RouteHealth = r.snapshotRouteHealth()
|
||||
return result, nil
|
||||
}
|
||||
r.markRouteFailure(route.RouteID, sendErr)
|
||||
updated, routeEvent, rerouteErr := r.Router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "response_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
result.Channel = channel
|
||||
if routeEvent.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, routeEvent)
|
||||
result.MigrationEvents++
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return result, rerouteErr
|
||||
}
|
||||
return result, sendErr
|
||||
}
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if r != nil && r.Health != nil {
|
||||
routeSet = r.Health.Apply(routeSet, time.Now().UTC())
|
||||
}
|
||||
return r.routeSetWithActiveChannels(routeSet)
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if r == nil || r.Pressure == nil {
|
||||
return routeSet
|
||||
}
|
||||
return r.Pressure.Apply(routeSet)
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) acquireRoute(routeID string) func() {
|
||||
if r == nil || r.Pressure == nil {
|
||||
return func() {}
|
||||
}
|
||||
return r.Pressure.Acquire(routeID)
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) snapshotRoutePressure() FabricRoutePressureSnapshot {
|
||||
if r == nil || r.Pressure == nil {
|
||||
return FabricRoutePressureSnapshot{}
|
||||
}
|
||||
return r.Pressure.SnapshotPressure()
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) snapshotRouteHealth() FabricRouteHealthSnapshot {
|
||||
if r == nil || r.Health == nil {
|
||||
return FabricRouteHealthSnapshot{}
|
||||
}
|
||||
return r.Health.Snapshot(time.Now().UTC())
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) markRouteFailure(routeID string, err error) {
|
||||
if r == nil || r.Health == nil || err == nil {
|
||||
return
|
||||
}
|
||||
r.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) markRouteSuccess(routeID string) {
|
||||
if r == nil || r.Health == nil {
|
||||
return
|
||||
}
|
||||
r.Health.MarkSuccess(routeID)
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) sendOnSession(ctx context.Context, session FabricTransportSession, channel *FabricChannel, routeSet FabricRouteSet, route FabricRoute, payloads [][]byte, index *int, sequence *uint64, result *FabricChannelRuntimeResult) (bool, error) {
|
||||
cfg := r.Config
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
}); err != nil {
|
||||
r.markRouteFailure(route.RouteID, err)
|
||||
return false, err
|
||||
}
|
||||
for *index < len(payloads) {
|
||||
payload := payloads[*index]
|
||||
if len(payload) > cfg.MaxPayload {
|
||||
return false, fmt.Errorf("%w: %d > %d", fabricproto.ErrInvalidPayloadLen, len(payload), cfg.MaxPayload)
|
||||
}
|
||||
(*sequence)++
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
Sequence: *sequence,
|
||||
Payload: payload,
|
||||
}); err != nil {
|
||||
r.markRouteFailure(route.RouteID, err)
|
||||
return false, err
|
||||
}
|
||||
ackOK, ackMs := waitForFabricRuntimeAck(ctx, session, cfg.StreamID, *sequence, cfg.Timeout)
|
||||
if !ackOK {
|
||||
r.markRouteFailure(route.RouteID, fmt.Errorf("ack_failed"))
|
||||
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: channel.Spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "ack_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
*channel = updated
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, event)
|
||||
result.MigrationEvents++
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
r.markRouteSuccess(route.RouteID)
|
||||
*index++
|
||||
result.BytesSent += uint64(len(payload))
|
||||
result.FramesSent++
|
||||
result.AcksReceived++
|
||||
updated, event, err := r.Router.ObserveChannel(*channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: channel.Spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
AckLatencyMs: ackMs,
|
||||
BytesSent: uint64(len(payload)),
|
||||
FramesSent: 1,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
*channel = updated
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
result.RouteEvents = append(result.RouteEvents, event)
|
||||
result.MigrationEvents++
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
_ = session.Send(context.Background(), fabricproto.Frame{
|
||||
Type: fabricproto.FrameCloseStream,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
})
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (r *FabricChannelRuntime) sendRequestResponseOnSession(ctx context.Context, session FabricTransportSession, routeID string, channelID string, payload []byte, sequence uint64) ([]byte, int64, error) {
|
||||
cfg := r.Config
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameOpenStream,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
}); err != nil {
|
||||
r.markRouteFailure(routeID, err)
|
||||
return nil, 0, err
|
||||
}
|
||||
started := time.Now()
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
Sequence: sequence,
|
||||
Payload: payload,
|
||||
}); err != nil {
|
||||
r.markRouteFailure(routeID, err)
|
||||
return nil, 0, err
|
||||
}
|
||||
waitCtx := ctx
|
||||
if cfg.Timeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
waitCtx, cancel = context.WithTimeout(ctx, cfg.Timeout)
|
||||
defer cancel()
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
return nil, 0, waitCtx.Err()
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return nil, 0, ErrForwardPeerUnavailable
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return nil, 0, ErrForwardPeerUnavailable
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != cfg.StreamID || frame.Sequence != sequence {
|
||||
continue
|
||||
}
|
||||
_ = session.Send(context.Background(), fabricproto.Frame{
|
||||
Type: fabricproto.FrameCloseStream,
|
||||
TrafficClass: cfg.TrafficClass,
|
||||
StreamID: cfg.StreamID,
|
||||
})
|
||||
return append([]byte(nil), frame.Payload...), time.Since(started).Milliseconds(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func FabricTransportTargetForRoute(route FabricRoute) (FabricTransportTarget, error) {
|
||||
if strings.TrimSpace(route.RouteID) == "" {
|
||||
return FabricTransportTarget{}, ErrFabricRouteNotFound
|
||||
}
|
||||
if route.RelayCount > 0 {
|
||||
for _, hop := range route.Hops {
|
||||
if hop.Mode != FabricRouteRelay {
|
||||
continue
|
||||
}
|
||||
if target, ok := fabricTransportTargetForHop(hop); ok {
|
||||
return target, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := len(route.Hops) - 1; i >= 0; i-- {
|
||||
if target, ok := fabricTransportTargetForHop(route.Hops[i]); ok {
|
||||
return target, nil
|
||||
}
|
||||
}
|
||||
return FabricTransportTarget{}, fmt.Errorf("%w: route %s has no transport endpoint", ErrFabricRouteNotFound, route.RouteID)
|
||||
}
|
||||
|
||||
func fabricTransportTargetForHop(hop FabricRouteHop) (FabricTransportTarget, bool) {
|
||||
endpoint := strings.TrimSpace(hop.Address)
|
||||
if endpoint == "" {
|
||||
return FabricTransportTarget{}, false
|
||||
}
|
||||
transport := string(hop.Mode)
|
||||
if transport == "" {
|
||||
transport = "quic"
|
||||
}
|
||||
return FabricTransportTarget{
|
||||
EndpointID: hop.EndpointID,
|
||||
PeerID: strings.TrimSpace(hop.NodeID),
|
||||
Endpoint: endpoint,
|
||||
Transport: transport,
|
||||
PeerCertSHA256: strings.TrimSpace(hop.PeerCertSHA256),
|
||||
}, true
|
||||
}
|
||||
|
||||
func waitForFabricRuntimeAck(ctx context.Context, session FabricTransportSession, streamID uint64, sequence uint64, timeout time.Duration) (bool, int64) {
|
||||
started := time.Now()
|
||||
if timeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
ctx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false, 0
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok || err != nil {
|
||||
return false, 0
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return false, 0
|
||||
}
|
||||
if frame.Type == fabricproto.FrameAck && frame.StreamID == streamID && frame.Sequence == sequence {
|
||||
return true, time.Since(started).Milliseconds()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,495 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestFabricChannelRuntimeMigratesSlowAckToStandbyRoute(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://slow.example.test:19443": 60 * time.Millisecond,
|
||||
"quic://fast.example.test:19443": 0,
|
||||
})
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||
},
|
||||
}
|
||||
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{
|
||||
[]byte("one"),
|
||||
[]byte("two"),
|
||||
[]byte("three"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send reliable: %v", err)
|
||||
}
|
||||
if result.MigrationEvents != 1 {
|
||||
t.Fatalf("migration events = %d, want 1: %+v", result.MigrationEvents, result.RouteEvents)
|
||||
}
|
||||
if result.Channel.RouteID != "route-fast" || result.Channel.RerouteCount != 1 {
|
||||
t.Fatalf("channel = %+v", result.Channel)
|
||||
}
|
||||
if result.BytesSent != uint64(len("one")+len("two")+len("three")) || result.AcksReceived != 3 {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
|
||||
t.Fatalf("slow connect count = %d, want 1", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||
t.Fatalf("fast connect count = %d, want 1", got)
|
||||
}
|
||||
if result.RoutePressure.AcquiredTotal != 2 || result.RoutePressure.ReleasedTotal != 2 || result.RoutePressure.MaxActiveTotal == 0 {
|
||||
t.Fatalf("route pressure = %+v", result.RoutePressure)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeReroutesOnConnectFailure(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://fast.example.test:19443": 0,
|
||||
})
|
||||
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||
},
|
||||
}
|
||||
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
|
||||
if err != nil {
|
||||
t.Fatalf("send reliable: %v", err)
|
||||
}
|
||||
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeQuarantinesFailedRouteAcrossChannels(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://fast.example.test:19443": 0,
|
||||
})
|
||||
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
RouteHealthTTL: time.Minute,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testRuntimeRoute("route-dead", "node-b", "quic://dead.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||
},
|
||||
}
|
||||
|
||||
first, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("first")})
|
||||
if err != nil {
|
||||
t.Fatalf("first send reliable: %v", err)
|
||||
}
|
||||
if first.Channel.RouteID != "route-fast" || first.RouteHealth.Quarantined["route-dead"].Failures != 1 {
|
||||
t.Fatalf("first result = %+v", first)
|
||||
}
|
||||
second, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("second")})
|
||||
if err != nil {
|
||||
t.Fatalf("second send reliable: %v", err)
|
||||
}
|
||||
if second.Channel.RouteID != "route-fast" {
|
||||
t.Fatalf("second route = %s, want route-fast", second.Channel.RouteID)
|
||||
}
|
||||
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||
t.Fatalf("dead connect count = %d, want one attempt before quarantine", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
|
||||
t.Fatalf("fast connect count = %d, want both channels on healthy route", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeReroutesOnAckTimeout(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://slow.example.test:19443": 100 * time.Millisecond,
|
||||
"quic://fast.example.test:19443": 0,
|
||||
})
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
Timeout: 10 * time.Millisecond,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testRuntimeRoute("route-slow", "node-b", "quic://slow.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-fast", "node-b", "quic://fast.example.test:19443", 20),
|
||||
},
|
||||
}
|
||||
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("payload")})
|
||||
if err != nil {
|
||||
t.Fatalf("send reliable: %v", err)
|
||||
}
|
||||
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || result.BytesSent != uint64(len("payload")) {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeSpreadsConcurrentChannelsBySharedPressure(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://route-a.example.test:19443": 80 * time.Millisecond,
|
||||
"quic://route-b.example.test:19443": 0,
|
||||
})
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{StreamID: 9})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testRuntimeRoute("route-a", "node-b", "quic://route-a.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-b", "node-b", "quic://route-b.example.test:19443", 11),
|
||||
},
|
||||
}
|
||||
|
||||
firstDone := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("one")})
|
||||
firstDone <- err
|
||||
}()
|
||||
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
|
||||
result, err := runtime.SendReliable(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-b"), routeSet, [][]byte{[]byte("two")})
|
||||
if err != nil {
|
||||
t.Fatalf("second send reliable: %v", err)
|
||||
}
|
||||
if result.Channel.RouteID != "route-b" {
|
||||
t.Fatalf("second route = %s, want route-b", result.Channel.RouteID)
|
||||
}
|
||||
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
|
||||
t.Fatalf("route-b connect count = %d, want 1", got)
|
||||
}
|
||||
if err := <-firstDone; err != nil {
|
||||
t.Fatalf("first send reliable: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeRequestResponseReturnsPayload(t *testing.T) {
|
||||
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
|
||||
"quic://runtime.example.test:19443": []byte(`{"status":"ok"}`),
|
||||
})
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-admin-runtime",
|
||||
Primary: testRuntimePoolRoute("route-runtime", "pool-admin-runtime", "node-runtime", "quic://runtime.example.test:19443", 10),
|
||||
}
|
||||
|
||||
result, err := runtime.SendRequestResponse(context.Background(), FabricChannelSpec{
|
||||
ChannelID: "channel-web-1",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-admin-runtime",
|
||||
TrafficClass: "control",
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}, routeSet, []byte(`{"request":true}`))
|
||||
if err != nil {
|
||||
t.Fatalf("request response: %v", err)
|
||||
}
|
||||
if string(result.ResponsePayload) != `{"status":"ok"}` {
|
||||
t.Fatalf("response payload = %s", string(result.ResponsePayload))
|
||||
}
|
||||
if result.Channel.RouteID != "route-runtime" ||
|
||||
result.BytesSent != uint64(len(`{"request":true}`)) ||
|
||||
result.BytesRecv != uint64(len(`{"status":"ok"}`)) ||
|
||||
result.FramesSent != 1 ||
|
||||
result.FramesRecv != 1 ||
|
||||
result.AcksReceived != 1 {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelRuntimeRequestResponseReroutesOnResponseFailure(t *testing.T) {
|
||||
transport := newFakeFabricRequestResponseTransport(map[string][]byte{
|
||||
"quic://fast.example.test:19443": []byte(`{"status":"ok"}`),
|
||||
})
|
||||
transport.failResponse["quic://slow.example.test:19443"] = true
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: FabricChannelRouterConfig{MaxAckLatencyMs: 30},
|
||||
StreamID: 9,
|
||||
Timeout: 10 * time.Millisecond,
|
||||
})
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-runtime",
|
||||
Primary: testRuntimeRoute("route-slow", "node-runtime", "quic://slow.example.test:19443", 10),
|
||||
WarmStandby: []FabricRoute{
|
||||
testRuntimeRoute("route-fast", "node-runtime", "quic://fast.example.test:19443", 20),
|
||||
},
|
||||
}
|
||||
|
||||
result, err := runtime.SendRequestResponse(context.Background(), testFabricChannelSpec(FabricChannelTargetNode, "node-runtime"), routeSet, []byte(`{"request":true}`))
|
||||
if err != nil {
|
||||
t.Fatalf("request response: %v", err)
|
||||
}
|
||||
if result.MigrationEvents != 1 || result.Channel.RouteID != "route-fast" || string(result.ResponsePayload) != `{"status":"ok"}` {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricTransportTargetForRouteUsesLastAddressedHop(t *testing.T) {
|
||||
target, err := FabricTransportTargetForRoute(FabricRoute{
|
||||
RouteID: "route-1",
|
||||
Hops: []FabricRouteHop{
|
||||
{NodeID: "node-a"},
|
||||
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443"},
|
||||
{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-quic", Address: "quic://node-b.example.test:19443"},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("target for route: %v", err)
|
||||
}
|
||||
if target.PeerID != "node-b" || target.EndpointID != "node-b-quic" || target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != string(FabricRouteDirect) {
|
||||
t.Fatalf("target = %+v", target)
|
||||
}
|
||||
}
|
||||
|
||||
type fakeFabricRequestResponseTransport struct {
|
||||
mu sync.Mutex
|
||||
responses map[string][]byte
|
||||
failResponse map[string]bool
|
||||
connects map[string]int
|
||||
}
|
||||
|
||||
func newFakeFabricRequestResponseTransport(responses map[string][]byte) *fakeFabricRequestResponseTransport {
|
||||
return &fakeFabricRequestResponseTransport{
|
||||
responses: responses,
|
||||
failResponse: map[string]bool{},
|
||||
connects: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fakeFabricRequestResponseTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||
endpoint := target.Endpoint
|
||||
t.mu.Lock()
|
||||
t.connects[endpoint]++
|
||||
response := append([]byte(nil), t.responses[endpoint]...)
|
||||
failResponse := t.failResponse[endpoint]
|
||||
t.mu.Unlock()
|
||||
return &fakeFabricRequestResponseSession{
|
||||
response: response,
|
||||
failResponse: failResponse,
|
||||
frames: make(chan fabricproto.Frame, 16),
|
||||
errors: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *fakeFabricRequestResponseTransport) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type fakeFabricRequestResponseSession struct {
|
||||
response []byte
|
||||
failResponse bool
|
||||
frames chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func (s *fakeFabricRequestResponseSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||
if frame.Type != fabricproto.FrameData || s.failResponse {
|
||||
return nil
|
||||
}
|
||||
response := append([]byte(nil), s.response...)
|
||||
go func() {
|
||||
select {
|
||||
case <-s.done:
|
||||
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence, Payload: response}:
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeFabricRequestResponseSession) Frames() <-chan fabricproto.Frame {
|
||||
return s.frames
|
||||
}
|
||||
|
||||
func (s *fakeFabricRequestResponseSession) Errors() <-chan error {
|
||||
return s.errors
|
||||
}
|
||||
|
||||
func (s *fakeFabricRequestResponseSession) Close() error {
|
||||
s.once.Do(func() {
|
||||
close(s.done)
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeFabricRequestResponseSession) Closed() bool {
|
||||
select {
|
||||
case <-s.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricTransportTargetForRouteUsesRelayHopForRelayRoute(t *testing.T) {
|
||||
target, err := FabricTransportTargetForRoute(FabricRoute{
|
||||
RouteID: "route-relay",
|
||||
RelayCount: 1,
|
||||
Hops: []FabricRouteHop{
|
||||
{NodeID: "node-a"},
|
||||
{NodeID: "node-r", Mode: FabricRouteRelay, EndpointID: "relay-1", Address: "quic://relay.example.test:19443", PeerCertSHA256: "relay-cert"},
|
||||
{NodeID: "node-b", Mode: FabricRouteRelay, EndpointID: "node-b-private", Address: "quic://10.0.0.2:19443", PeerCertSHA256: "node-b-cert"},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("target for relay route: %v", err)
|
||||
}
|
||||
if target.PeerID != "node-r" || target.EndpointID != "relay-1" || target.Endpoint != "quic://relay.example.test:19443" || target.PeerCertSHA256 != "relay-cert" {
|
||||
t.Fatalf("target = %+v", target)
|
||||
}
|
||||
}
|
||||
|
||||
type fakeFabricRuntimeTransport struct {
|
||||
mu sync.Mutex
|
||||
delays map[string]time.Duration
|
||||
failConnect map[string]bool
|
||||
connects map[string]int
|
||||
}
|
||||
|
||||
func newFakeFabricRuntimeTransport(delays map[string]time.Duration) *fakeFabricRuntimeTransport {
|
||||
return &fakeFabricRuntimeTransport{
|
||||
delays: delays,
|
||||
failConnect: map[string]bool{},
|
||||
connects: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fakeFabricRuntimeTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||
endpoint := target.Endpoint
|
||||
t.mu.Lock()
|
||||
t.connects[endpoint]++
|
||||
fail := t.failConnect[endpoint]
|
||||
delay := t.delays[endpoint]
|
||||
t.mu.Unlock()
|
||||
if fail {
|
||||
return nil, ErrForwardPeerUnavailable
|
||||
}
|
||||
return &fakeFabricRuntimeSession{
|
||||
endpoint: endpoint,
|
||||
delay: delay,
|
||||
frames: make(chan fabricproto.Frame, 64),
|
||||
errors: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *fakeFabricRuntimeTransport) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *fakeFabricRuntimeTransport) connectCount(endpoint string) int {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.connects[endpoint]
|
||||
}
|
||||
|
||||
func (t *fakeFabricRuntimeTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
|
||||
tb.Helper()
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for {
|
||||
t.mu.Lock()
|
||||
got := t.connects[endpoint]
|
||||
t.mu.Unlock()
|
||||
if got >= count {
|
||||
return
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
type fakeFabricRuntimeSession struct {
|
||||
endpoint string
|
||||
delay time.Duration
|
||||
frames chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func (s *fakeFabricRuntimeSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||
if frame.Type != fabricproto.FrameData {
|
||||
return nil
|
||||
}
|
||||
delay := s.delay
|
||||
go func() {
|
||||
if delay > 0 {
|
||||
time.Sleep(delay)
|
||||
}
|
||||
select {
|
||||
case <-s.done:
|
||||
case s.frames <- fabricproto.Frame{Type: fabricproto.FrameAck, TrafficClass: frame.TrafficClass, StreamID: frame.StreamID, Sequence: frame.Sequence}:
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeFabricRuntimeSession) Frames() <-chan fabricproto.Frame {
|
||||
return s.frames
|
||||
}
|
||||
|
||||
func (s *fakeFabricRuntimeSession) Errors() <-chan error {
|
||||
return s.errors
|
||||
}
|
||||
|
||||
func (s *fakeFabricRuntimeSession) Close() error {
|
||||
s.once.Do(func() {
|
||||
close(s.done)
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeFabricRuntimeSession) Closed() bool {
|
||||
select {
|
||||
case <-s.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func testRuntimeRoute(routeID string, destination string, endpoint string, latency int) FabricRoute {
|
||||
route := testFabricRoute(routeID, destination, latency, 100, 0, true)
|
||||
route.Hops[len(route.Hops)-1].Address = endpoint
|
||||
route.Hops[len(route.Hops)-1].EndpointID = strings.TrimPrefix(routeID, "route-")
|
||||
route.Hops[len(route.Hops)-1].Mode = FabricRouteDirect
|
||||
return route
|
||||
}
|
||||
|
||||
func testRuntimePoolRoute(routeID string, poolID string, destination string, endpoint string, latency int) FabricRoute {
|
||||
route := testRuntimeRoute(routeID, destination, endpoint, latency)
|
||||
route.PoolID = poolID
|
||||
return route
|
||||
}
|
||||
@@ -0,0 +1,390 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type FabricChannelTargetKind string
|
||||
|
||||
const (
|
||||
FabricChannelTargetNode FabricChannelTargetKind = "node"
|
||||
FabricChannelTargetPool FabricChannelTargetKind = "pool"
|
||||
)
|
||||
|
||||
type FabricChannelLifecycleState string
|
||||
|
||||
const (
|
||||
FabricChannelOpening FabricChannelLifecycleState = "opening"
|
||||
FabricChannelOpen FabricChannelLifecycleState = "open"
|
||||
FabricChannelDraining FabricChannelLifecycleState = "draining"
|
||||
FabricChannelClosed FabricChannelLifecycleState = "closed"
|
||||
)
|
||||
|
||||
type FabricRouteMode string
|
||||
|
||||
const (
|
||||
FabricRouteDirect FabricRouteMode = "direct_quic"
|
||||
FabricRouteLAN FabricRouteMode = "lan_quic"
|
||||
FabricRouteReverse FabricRouteMode = "reverse_quic"
|
||||
FabricRouteRelay FabricRouteMode = "relay_quic"
|
||||
FabricRouteICE FabricRouteMode = "ice_quic"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrFabricChannelInvalid = errors.New("fabric channel request is invalid")
|
||||
ErrFabricRouteNotFound = errors.New("fabric route not found")
|
||||
)
|
||||
|
||||
type FabricChannelSpec struct {
|
||||
ChannelID string
|
||||
ClusterID string
|
||||
SourceNodeID string
|
||||
TargetKind FabricChannelTargetKind
|
||||
TargetID string
|
||||
TrafficClass string
|
||||
MinBandwidth int64
|
||||
StickyKey string
|
||||
CreatedAt time.Time
|
||||
ForbiddenHops []string
|
||||
}
|
||||
|
||||
type FabricServiceChannelTarget struct {
|
||||
Kind FabricChannelTargetKind
|
||||
PoolIDs []string
|
||||
NodeIDs []string
|
||||
SelectedNodeID string
|
||||
ServiceRole string
|
||||
SelectionPolicy string
|
||||
SingleMemberPool bool
|
||||
}
|
||||
|
||||
type FabricServiceChannelRequest struct {
|
||||
SchemaVersion string
|
||||
ChannelID string
|
||||
ClusterID string
|
||||
OrganizationID string
|
||||
UserID string
|
||||
ResourceID string
|
||||
SourceNodeID string
|
||||
SourceRole string
|
||||
ServiceClass string
|
||||
Target FabricServiceChannelTarget
|
||||
TrafficClass string
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
type FabricChannel struct {
|
||||
Spec FabricChannelSpec
|
||||
State FabricChannelLifecycleState
|
||||
RouteID string
|
||||
TargetNode string
|
||||
OpenedAt time.Time
|
||||
LastReroute time.Time
|
||||
BytesSent uint64
|
||||
BytesRecv uint64
|
||||
FramesSent uint64
|
||||
FramesRecv uint64
|
||||
RerouteCount uint64
|
||||
}
|
||||
|
||||
type FabricRouteHop struct {
|
||||
NodeID string
|
||||
Mode FabricRouteMode
|
||||
EndpointID string
|
||||
Address string
|
||||
PeerCertSHA256 string
|
||||
}
|
||||
|
||||
type FabricRoute struct {
|
||||
RouteID string
|
||||
ClusterID string
|
||||
SourceNodeID string
|
||||
DestinationNodeID string
|
||||
PoolID string
|
||||
Hops []FabricRouteHop
|
||||
BaseLatencyMs int
|
||||
JitterMs int
|
||||
LossPermille int
|
||||
Capacity int
|
||||
ActiveChannels int
|
||||
RelayCount int
|
||||
LastUpdatedAt time.Time
|
||||
Healthy bool
|
||||
Degraded bool
|
||||
}
|
||||
|
||||
type FabricRouteSet struct {
|
||||
TargetKind FabricChannelTargetKind
|
||||
TargetID string
|
||||
Primary FabricRoute
|
||||
WarmStandby []FabricRoute
|
||||
ColdFallbacks []FabricRoute
|
||||
}
|
||||
|
||||
type FabricAdjacency struct {
|
||||
FromNodeID string
|
||||
ToNodeID string
|
||||
Mode FabricRouteMode
|
||||
RTTMs int
|
||||
JitterMs int
|
||||
LossPermille int
|
||||
Capacity int
|
||||
ActiveChannels int
|
||||
ThroughputBps int64
|
||||
PressurePercent int
|
||||
Healthy bool
|
||||
PassiveOutbound bool
|
||||
LocalSegmentID string
|
||||
NATGroupID string
|
||||
LastObservedAt time.Time
|
||||
LastFailureReason string
|
||||
}
|
||||
|
||||
type FabricRouteChoice struct {
|
||||
Route FabricRoute
|
||||
Score int
|
||||
Reason string
|
||||
PressureBefore int
|
||||
PressureAfter int
|
||||
}
|
||||
|
||||
type FabricRouteSchedulerConfig struct {
|
||||
LatencyWeight int
|
||||
JitterWeight int
|
||||
LossWeight int
|
||||
PressureWeight int
|
||||
HopPenalty int
|
||||
RelayPenalty int
|
||||
DegradedPenalty int
|
||||
ProjectedChannelCost int
|
||||
HardMaxRoutePressure int
|
||||
}
|
||||
|
||||
type FabricRouteScheduler struct {
|
||||
Config FabricRouteSchedulerConfig
|
||||
}
|
||||
|
||||
func NewFabricRouteScheduler(cfg FabricRouteSchedulerConfig) FabricRouteScheduler {
|
||||
return FabricRouteScheduler{Config: normalizeFabricRouteSchedulerConfig(cfg)}
|
||||
}
|
||||
|
||||
func (s FabricRouteScheduler) ChooseRoute(spec FabricChannelSpec, routeSet FabricRouteSet, now time.Time) (FabricRouteChoice, error) {
|
||||
if err := ValidateFabricChannelSpec(spec); err != nil {
|
||||
return FabricRouteChoice{}, err
|
||||
}
|
||||
routes := flattenFabricRouteSet(routeSet)
|
||||
if len(routes) == 0 {
|
||||
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||
}
|
||||
forbidden := stringSet(spec.ForbiddenHops)
|
||||
choices := make([]FabricRouteChoice, 0, len(routes))
|
||||
for _, route := range routes {
|
||||
if !fabricRouteUsable(spec, route, forbidden, now) {
|
||||
continue
|
||||
}
|
||||
choice := s.scoreRoute(route)
|
||||
if s.Config.HardMaxRoutePressure > 0 && choice.PressureAfter > s.Config.HardMaxRoutePressure {
|
||||
continue
|
||||
}
|
||||
choice.Route = route
|
||||
choices = append(choices, choice)
|
||||
}
|
||||
if len(choices) == 0 {
|
||||
return FabricRouteChoice{}, ErrFabricRouteNotFound
|
||||
}
|
||||
sort.SliceStable(choices, func(i, j int) bool {
|
||||
if choices[i].Score != choices[j].Score {
|
||||
return choices[i].Score < choices[j].Score
|
||||
}
|
||||
if choices[i].PressureAfter != choices[j].PressureAfter {
|
||||
return choices[i].PressureAfter < choices[j].PressureAfter
|
||||
}
|
||||
if choices[i].Route.BaseLatencyMs != choices[j].Route.BaseLatencyMs {
|
||||
return choices[i].Route.BaseLatencyMs < choices[j].Route.BaseLatencyMs
|
||||
}
|
||||
return choices[i].Route.RouteID < choices[j].Route.RouteID
|
||||
})
|
||||
return choices[0], nil
|
||||
}
|
||||
|
||||
func ValidateFabricChannelSpec(spec FabricChannelSpec) error {
|
||||
if strings.TrimSpace(spec.ChannelID) == "" || strings.TrimSpace(spec.ClusterID) == "" || strings.TrimSpace(spec.SourceNodeID) == "" || strings.TrimSpace(spec.TargetID) == "" {
|
||||
return ErrFabricChannelInvalid
|
||||
}
|
||||
switch spec.TargetKind {
|
||||
case FabricChannelTargetNode, FabricChannelTargetPool:
|
||||
return nil
|
||||
default:
|
||||
return ErrFabricChannelInvalid
|
||||
}
|
||||
}
|
||||
|
||||
func FabricChannelSpecFromServiceRequest(req FabricServiceChannelRequest, localNodeID string, now time.Time) (FabricChannelSpec, error) {
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
sourceNodeID := firstNonEmpty(strings.TrimSpace(req.SourceNodeID), strings.TrimSpace(localNodeID))
|
||||
targetKind := req.Target.Kind
|
||||
if targetKind == "" {
|
||||
targetKind = FabricChannelTargetPool
|
||||
}
|
||||
targetID := firstNonEmpty(firstString(req.Target.PoolIDs), strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs))
|
||||
if targetKind == FabricChannelTargetNode {
|
||||
targetID = firstNonEmpty(strings.TrimSpace(req.Target.SelectedNodeID), firstString(req.Target.NodeIDs), targetID)
|
||||
}
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), strings.TrimSpace(req.ResourceID)),
|
||||
ClusterID: strings.TrimSpace(req.ClusterID),
|
||||
SourceNodeID: sourceNodeID,
|
||||
TargetKind: targetKind,
|
||||
TargetID: targetID,
|
||||
TrafficClass: firstNonEmpty(strings.TrimSpace(req.TrafficClass), serviceClassDefaultTrafficClass(req.ServiceClass)),
|
||||
StickyKey: strings.TrimSpace(req.ResourceID),
|
||||
CreatedAt: now,
|
||||
}
|
||||
if err := ValidateFabricChannelSpec(spec); err != nil {
|
||||
return FabricChannelSpec{}, err
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
func serviceClassDefaultTrafficClass(serviceClass string) string {
|
||||
switch strings.TrimSpace(strings.ToLower(serviceClass)) {
|
||||
case FabricServiceClassVPNPackets:
|
||||
return FabricServiceChannelBulk
|
||||
case FabricServiceClassRemoteWorkspace:
|
||||
return FabricServiceChannelInteractive
|
||||
default:
|
||||
return FabricServiceChannelReliable
|
||||
}
|
||||
}
|
||||
|
||||
func firstString(values []string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s FabricRouteScheduler) scoreRoute(route FabricRoute) FabricRouteChoice {
|
||||
cfg := normalizeFabricRouteSchedulerConfig(s.Config)
|
||||
pressureBefore := fabricRoutePressurePercent(route, 0)
|
||||
pressureAfter := fabricRoutePressurePercent(route, cfg.ProjectedChannelCost)
|
||||
score := route.BaseLatencyMs*cfg.LatencyWeight +
|
||||
route.JitterMs*cfg.JitterWeight +
|
||||
route.LossPermille*cfg.LossWeight +
|
||||
pressureAfter*cfg.PressureWeight +
|
||||
len(route.Hops)*cfg.HopPenalty +
|
||||
route.RelayCount*cfg.RelayPenalty
|
||||
if route.Degraded {
|
||||
score += cfg.DegradedPenalty
|
||||
}
|
||||
reason := "latency_load_score"
|
||||
if pressureAfter >= 90 {
|
||||
reason = "capacity_pressure_avoidance"
|
||||
}
|
||||
if route.RelayCount > 0 {
|
||||
reason = "relay_fallback_available"
|
||||
}
|
||||
return FabricRouteChoice{Score: score, Reason: reason, PressureBefore: pressureBefore, PressureAfter: pressureAfter}
|
||||
}
|
||||
|
||||
func normalizeFabricRouteSchedulerConfig(cfg FabricRouteSchedulerConfig) FabricRouteSchedulerConfig {
|
||||
if cfg.LatencyWeight <= 0 {
|
||||
cfg.LatencyWeight = 10
|
||||
}
|
||||
if cfg.JitterWeight <= 0 {
|
||||
cfg.JitterWeight = 4
|
||||
}
|
||||
if cfg.LossWeight <= 0 {
|
||||
cfg.LossWeight = 8
|
||||
}
|
||||
if cfg.PressureWeight <= 0 {
|
||||
cfg.PressureWeight = 12
|
||||
}
|
||||
if cfg.HopPenalty <= 0 {
|
||||
cfg.HopPenalty = 5
|
||||
}
|
||||
if cfg.RelayPenalty <= 0 {
|
||||
cfg.RelayPenalty = 25
|
||||
}
|
||||
if cfg.DegradedPenalty <= 0 {
|
||||
cfg.DegradedPenalty = 500
|
||||
}
|
||||
if cfg.ProjectedChannelCost <= 0 {
|
||||
cfg.ProjectedChannelCost = 1
|
||||
}
|
||||
if cfg.HardMaxRoutePressure < 0 {
|
||||
cfg.HardMaxRoutePressure = 0
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func flattenFabricRouteSet(routeSet FabricRouteSet) []FabricRoute {
|
||||
routes := make([]FabricRoute, 0, 1+len(routeSet.WarmStandby)+len(routeSet.ColdFallbacks))
|
||||
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
|
||||
routes = append(routes, routeSet.Primary)
|
||||
}
|
||||
routes = append(routes, routeSet.WarmStandby...)
|
||||
routes = append(routes, routeSet.ColdFallbacks...)
|
||||
return routes
|
||||
}
|
||||
|
||||
func fabricRouteUsable(spec FabricChannelSpec, route FabricRoute, forbidden map[string]struct{}, now time.Time) bool {
|
||||
if strings.TrimSpace(route.RouteID) == "" || !route.Healthy {
|
||||
return false
|
||||
}
|
||||
if route.ClusterID != "" && spec.ClusterID != "" && route.ClusterID != spec.ClusterID {
|
||||
return false
|
||||
}
|
||||
if route.SourceNodeID != "" && route.SourceNodeID != spec.SourceNodeID {
|
||||
return false
|
||||
}
|
||||
switch spec.TargetKind {
|
||||
case FabricChannelTargetNode:
|
||||
if route.DestinationNodeID != "" && route.DestinationNodeID != spec.TargetID {
|
||||
return false
|
||||
}
|
||||
case FabricChannelTargetPool:
|
||||
if route.PoolID != "" && route.PoolID != spec.TargetID {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, hop := range route.Hops {
|
||||
if _, blocked := forbidden[hop.NodeID]; blocked {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func fabricRoutePressurePercent(route FabricRoute, projected int) int {
|
||||
if route.Capacity <= 0 {
|
||||
return 100
|
||||
}
|
||||
active := route.ActiveChannels + projected
|
||||
if active <= 0 {
|
||||
return 0
|
||||
}
|
||||
pressure := (active * 100) / route.Capacity
|
||||
if pressure > 100 {
|
||||
return 100
|
||||
}
|
||||
return pressure
|
||||
}
|
||||
|
||||
func stringSet(values []string) map[string]struct{} {
|
||||
out := make(map[string]struct{}, len(values))
|
||||
for _, value := range values {
|
||||
value = strings.TrimSpace(value)
|
||||
if value != "" {
|
||||
out[value] = struct{}{}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFabricRouteSchedulerAvoidsSaturatedShortestRoute(t *testing.T) {
|
||||
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: "channel-1",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
}
|
||||
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: FabricRoute{
|
||||
RouteID: "short-saturated",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-b"}},
|
||||
BaseLatencyMs: 10,
|
||||
Capacity: 10,
|
||||
ActiveChannels: 10,
|
||||
Healthy: true,
|
||||
},
|
||||
WarmStandby: []FabricRoute{{
|
||||
RouteID: "slightly-longer-free",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
|
||||
BaseLatencyMs: 18,
|
||||
Capacity: 100,
|
||||
ActiveChannels: 5,
|
||||
RelayCount: 1,
|
||||
Healthy: true,
|
||||
}},
|
||||
}, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("choose route: %v", err)
|
||||
}
|
||||
if choice.Route.RouteID != "slightly-longer-free" {
|
||||
t.Fatalf("route = %q, want slightly-longer-free score=%d pressure=%d", choice.Route.RouteID, choice.Score, choice.PressureAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelSpecFromServiceRequestTargetsPool(t *testing.T) {
|
||||
spec, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
|
||||
ChannelID: "vpn-1",
|
||||
ClusterID: "cluster-1",
|
||||
ResourceID: "vpn-1",
|
||||
ServiceClass: FabricServiceClassVPNPackets,
|
||||
Target: FabricServiceChannelTarget{
|
||||
Kind: FabricChannelTargetPool,
|
||||
PoolIDs: []string{"home-ipv4"},
|
||||
ServiceRole: "ipv4-egress",
|
||||
},
|
||||
}, "android-node", time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("service request spec: %v", err)
|
||||
}
|
||||
if spec.SourceNodeID != "android-node" || spec.TargetKind != FabricChannelTargetPool || spec.TargetID != "home-ipv4" || spec.TrafficClass != FabricServiceChannelBulk {
|
||||
t.Fatalf("unexpected spec: %+v", spec)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricChannelSpecFromServiceRequestKeepsServiceOutOfEndpointSelection(t *testing.T) {
|
||||
_, err := FabricChannelSpecFromServiceRequest(FabricServiceChannelRequest{
|
||||
ChannelID: "rdp-1",
|
||||
ClusterID: "cluster-1",
|
||||
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||
Target: FabricServiceChannelTarget{
|
||||
Kind: FabricChannelTargetPool,
|
||||
ServiceRole: "rdp-gateway",
|
||||
},
|
||||
}, "client-node", time.Now())
|
||||
if !errors.Is(err, ErrFabricChannelInvalid) {
|
||||
t.Fatalf("err = %v, want invalid without pool/node target id", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSchedulerPoolSkipsFailedEndpoint(t *testing.T) {
|
||||
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: "channel-pool",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-egress",
|
||||
}
|
||||
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-egress",
|
||||
Primary: FabricRoute{
|
||||
RouteID: "pool-node-dead",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
PoolID: "pool-egress",
|
||||
Capacity: 100,
|
||||
Healthy: false,
|
||||
},
|
||||
WarmStandby: []FabricRoute{{
|
||||
RouteID: "pool-node-live",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-c",
|
||||
PoolID: "pool-egress",
|
||||
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-c"}},
|
||||
BaseLatencyMs: 25,
|
||||
Capacity: 100,
|
||||
Healthy: true,
|
||||
}},
|
||||
}, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("choose route: %v", err)
|
||||
}
|
||||
if choice.Route.DestinationNodeID != "node-c" {
|
||||
t.Fatalf("destination = %q, want node-c", choice.Route.DestinationNodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSchedulerHonorsForbiddenHops(t *testing.T) {
|
||||
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{})
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: "channel-1",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
ForbiddenHops: []string{"node-r"},
|
||||
}
|
||||
_, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||
Primary: FabricRoute{
|
||||
RouteID: "blocked",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Hops: []FabricRouteHop{{NodeID: "node-a"}, {NodeID: "node-r"}, {NodeID: "node-b"}},
|
||||
Capacity: 100,
|
||||
Healthy: true,
|
||||
},
|
||||
}, time.Now())
|
||||
if !errors.Is(err, ErrFabricRouteNotFound) {
|
||||
t.Fatalf("err = %v, want ErrFabricRouteNotFound", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSchedulerRejectsRoutesAboveHardPressureLimit(t *testing.T) {
|
||||
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 80})
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: "channel-pressure",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
}
|
||||
choice, err := scheduler.ChooseRoute(spec, FabricRouteSet{
|
||||
Primary: FabricRoute{
|
||||
RouteID: "too-busy",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Capacity: 10,
|
||||
ActiveChannels: 9,
|
||||
Healthy: true,
|
||||
},
|
||||
WarmStandby: []FabricRoute{{
|
||||
RouteID: "admissible",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Capacity: 10,
|
||||
ActiveChannels: 5,
|
||||
Healthy: true,
|
||||
}},
|
||||
}, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("choose route: %v", err)
|
||||
}
|
||||
if choice.Route.RouteID != "admissible" {
|
||||
t.Fatalf("route = %q, want admissible", choice.Route.RouteID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSchedulerKeepsHighLatencyRouteAsFallbackUntilFastRouteSaturates(t *testing.T) {
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: "channel-latency-aware",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-egress",
|
||||
}
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetPool,
|
||||
TargetID: "pool-egress",
|
||||
Primary: FabricRoute{
|
||||
RouteID: "lan-fast",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-lan",
|
||||
PoolID: "pool-egress",
|
||||
BaseLatencyMs: 4,
|
||||
Capacity: 100,
|
||||
ActiveChannels: 85,
|
||||
Healthy: true,
|
||||
},
|
||||
WarmStandby: []FabricRoute{{
|
||||
RouteID: "wan-slow",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-wan",
|
||||
PoolID: "pool-egress",
|
||||
BaseLatencyMs: 420,
|
||||
Capacity: 100,
|
||||
ActiveChannels: 0,
|
||||
Healthy: true,
|
||||
}},
|
||||
}
|
||||
|
||||
scheduler := NewFabricRouteScheduler(FabricRouteSchedulerConfig{HardMaxRoutePressure: 90})
|
||||
choice, err := scheduler.ChooseRoute(spec, routeSet, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("choose route: %v", err)
|
||||
}
|
||||
if choice.Route.RouteID != "lan-fast" {
|
||||
t.Fatalf("route = %q, want fast LAN before hard pressure limit", choice.Route.RouteID)
|
||||
}
|
||||
|
||||
routeSet.Primary.ActiveChannels = 90
|
||||
choice, err = scheduler.ChooseRoute(spec, routeSet, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("choose fallback route: %v", err)
|
||||
}
|
||||
if choice.Route.RouteID != "wan-slow" {
|
||||
t.Fatalf("route = %q, want WAN only after LAN reaches hard pressure limit", choice.Route.RouteID)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
type FabricOverlayTransportConfig struct {
|
||||
ClusterID string
|
||||
LocalNodeID string
|
||||
RouterConfig FabricChannelRouterConfig
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type FabricOverlayTransport struct {
|
||||
Runtime *FabricChannelRuntime
|
||||
RouteSets map[string]FabricRouteSet
|
||||
Config FabricOverlayTransportConfig
|
||||
sequence atomic.Uint64
|
||||
}
|
||||
|
||||
type FabricOverlayTransportSnapshot struct {
|
||||
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
|
||||
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
|
||||
}
|
||||
|
||||
type FabricOverlaySendRequest struct {
|
||||
ChannelID string
|
||||
TargetKind FabricChannelTargetKind
|
||||
TargetID string
|
||||
TrafficClass fabricproto.TrafficClass
|
||||
Payloads [][]byte
|
||||
StickyKey string
|
||||
}
|
||||
|
||||
func NewFabricOverlayTransport(transport FabricTransport, routeSets map[string]FabricRouteSet, cfg FabricOverlayTransportConfig) *FabricOverlayTransport {
|
||||
if cfg.Timeout <= 0 {
|
||||
cfg.Timeout = 30 * time.Second
|
||||
}
|
||||
runtime := NewFabricChannelRuntime(transport, FabricChannelRuntimeConfig{
|
||||
RouterConfig: cfg.RouterConfig,
|
||||
Timeout: cfg.Timeout,
|
||||
})
|
||||
normalized := make(map[string]FabricRouteSet, len(routeSets))
|
||||
for targetID, routeSet := range routeSets {
|
||||
targetID = strings.TrimSpace(targetID)
|
||||
if targetID != "" {
|
||||
normalized[targetID] = routeSet
|
||||
}
|
||||
}
|
||||
return &FabricOverlayTransport{
|
||||
Runtime: runtime,
|
||||
RouteSets: normalized,
|
||||
Config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *FabricOverlayTransport) Send(ctx context.Context, req FabricOverlaySendRequest) (FabricChannelRuntimeResult, error) {
|
||||
if t == nil || t.Runtime == nil {
|
||||
return FabricChannelRuntimeResult{}, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
targetID := strings.TrimSpace(req.TargetID)
|
||||
if targetID == "" {
|
||||
return FabricChannelRuntimeResult{}, ErrFabricChannelInvalid
|
||||
}
|
||||
routeSet, ok := t.RouteSets[targetID]
|
||||
if !ok {
|
||||
return FabricChannelRuntimeResult{}, ErrFabricRouteNotFound
|
||||
}
|
||||
targetKind := req.TargetKind
|
||||
if targetKind == "" {
|
||||
targetKind = routeSet.TargetKind
|
||||
}
|
||||
if targetKind == "" {
|
||||
targetKind = FabricChannelTargetNode
|
||||
}
|
||||
trafficClass := req.TrafficClass
|
||||
if trafficClass == 0 {
|
||||
trafficClass = fabricproto.TrafficClassReliable
|
||||
}
|
||||
t.Runtime.Config.TrafficClass = trafficClass
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: firstNonEmpty(strings.TrimSpace(req.ChannelID), fmt.Sprintf("fabric-overlay-%d", t.sequence.Add(1))),
|
||||
ClusterID: strings.TrimSpace(t.Config.ClusterID),
|
||||
SourceNodeID: strings.TrimSpace(t.Config.LocalNodeID),
|
||||
TargetKind: targetKind,
|
||||
TargetID: targetID,
|
||||
TrafficClass: loadFabricTrafficClassName(trafficClass),
|
||||
StickyKey: strings.TrimSpace(req.StickyKey),
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}
|
||||
return t.Runtime.SendReliable(ctx, spec, routeSet, req.Payloads)
|
||||
}
|
||||
|
||||
func (t *FabricOverlayTransport) SnapshotPressure() FabricRoutePressureSnapshot {
|
||||
if t == nil || t.Runtime == nil || t.Runtime.Pressure == nil {
|
||||
return FabricRoutePressureSnapshot{}
|
||||
}
|
||||
return t.Runtime.Pressure.SnapshotPressure()
|
||||
}
|
||||
|
||||
func (t *FabricOverlayTransport) Snapshot() FabricOverlayTransportSnapshot {
|
||||
if t == nil || t.Runtime == nil {
|
||||
return FabricOverlayTransportSnapshot{}
|
||||
}
|
||||
return FabricOverlayTransportSnapshot{
|
||||
RoutePressure: t.Runtime.snapshotRoutePressure(),
|
||||
RouteHealth: t.Runtime.snapshotRouteHealth(),
|
||||
}
|
||||
}
|
||||
|
||||
func loadFabricTrafficClassName(trafficClass fabricproto.TrafficClass) string {
|
||||
switch trafficClass {
|
||||
case fabricproto.TrafficClassControl:
|
||||
return "control"
|
||||
case fabricproto.TrafficClassInteractive:
|
||||
return "interactive"
|
||||
case fabricproto.TrafficClassBulk:
|
||||
return "bulk"
|
||||
case fabricproto.TrafficClassReliable:
|
||||
return "reliable"
|
||||
default:
|
||||
return fmt.Sprintf("traffic_class_%d", trafficClass)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestFabricOverlayTransportSendsThroughRouteSet(t *testing.T) {
|
||||
transport := newFakeFabricRuntimeTransport(map[string]time.Duration{
|
||||
"quic://node-b:19443": 0,
|
||||
})
|
||||
overlay := NewFabricOverlayTransport(transport, map[string]FabricRouteSet{
|
||||
"node-b": {
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: FabricRoute{
|
||||
RouteID: "node-b-direct",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
Hops: []FabricRouteHop{{NodeID: "node-b", Mode: FabricRouteDirect, EndpointID: "node-b-direct", Address: "quic://node-b:19443"}},
|
||||
Capacity: 100,
|
||||
Healthy: true,
|
||||
},
|
||||
},
|
||||
}, FabricOverlayTransportConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
result, err := overlay.Send(ctx, FabricOverlaySendRequest{
|
||||
TargetID: "node-b",
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
Payloads: [][]byte{[]byte("payload")},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send: %v", err)
|
||||
}
|
||||
if result.BytesSent != uint64(len("payload")) || result.AcksReceived != 1 {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
if pressure := overlay.SnapshotPressure(); pressure.ActiveTotal != 0 || pressure.AcquiredTotal != pressure.ReleasedTotal {
|
||||
t.Fatalf("pressure leak: %+v", pressure)
|
||||
}
|
||||
if snapshot := overlay.Snapshot(); snapshot.RoutePressure.AcquiredTotal != 1 || len(snapshot.RouteHealth.Quarantined) != 0 {
|
||||
t.Fatalf("snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
@@ -3,28 +3,50 @@ package mesh
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/quic-go/quic-go"
|
||||
)
|
||||
|
||||
type QUICFabricServer struct {
|
||||
listener *quic.Listener
|
||||
logger FabricSessionEventLogger
|
||||
done chan struct{}
|
||||
closeOnce sync.Once
|
||||
listener *quic.Listener
|
||||
logger FabricSessionEventLogger
|
||||
reverseMu sync.RWMutex
|
||||
reverseTransport *QUICFabricTransport
|
||||
fabricFrameHandler FabricFrameHandler
|
||||
productionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||
webIngressForwardHandler func(context.Context, []byte) ([]byte, error)
|
||||
fabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||
syntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||
done chan struct{}
|
||||
closeOnce sync.Once
|
||||
}
|
||||
|
||||
type QUICFabricServerConfig struct {
|
||||
ListenAddr string
|
||||
TLSConfig *tls.Config
|
||||
QUICConfig *quic.Config
|
||||
Logger FabricSessionEventLogger
|
||||
ListenAddr string
|
||||
TLSConfig *tls.Config
|
||||
QUICConfig *quic.Config
|
||||
Logger FabricSessionEventLogger
|
||||
ReverseTransport *QUICFabricTransport
|
||||
FabricFrameHandler FabricFrameHandler
|
||||
ProductionForwardHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||
WebIngressForwardHandler func(context.Context, []byte) ([]byte, error)
|
||||
FabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||
SyntheticForwardHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||
}
|
||||
|
||||
type FabricFrameSender interface {
|
||||
SendFrame(context.Context, fabricproto.Frame) error
|
||||
}
|
||||
|
||||
type FabricFrameHandler func(context.Context, FabricFrameSender, fabricproto.Frame) (bool, error)
|
||||
|
||||
func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QUICFabricServer, error) {
|
||||
if cfg.ListenAddr == "" {
|
||||
return nil, fmt.Errorf("quic fabric listen addr is required")
|
||||
@@ -42,9 +64,15 @@ func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QU
|
||||
return nil, err
|
||||
}
|
||||
server := &QUICFabricServer{
|
||||
listener: listener,
|
||||
logger: cfg.Logger,
|
||||
done: make(chan struct{}),
|
||||
listener: listener,
|
||||
logger: cfg.Logger,
|
||||
reverseTransport: cfg.ReverseTransport,
|
||||
fabricFrameHandler: cfg.FabricFrameHandler,
|
||||
productionForwardHandler: cfg.ProductionForwardHandler,
|
||||
webIngressForwardHandler: cfg.WebIngressForwardHandler,
|
||||
fabricControlHandler: cfg.FabricControlHandler,
|
||||
syntheticForwardHandler: cfg.SyntheticForwardHandler,
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
go server.acceptLoop(ctx)
|
||||
return server, nil
|
||||
@@ -57,6 +85,15 @@ func (s *QUICFabricServer) Addr() net.Addr {
|
||||
return s.listener.Addr()
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) SetReverseTransport(transport *QUICFabricTransport) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.reverseMu.Lock()
|
||||
s.reverseTransport = transport
|
||||
s.reverseMu.Unlock()
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) Close() error {
|
||||
if s == nil {
|
||||
return nil
|
||||
@@ -95,6 +132,8 @@ func (s *QUICFabricServer) handleConn(ctx context.Context, conn *quic.Conn) {
|
||||
|
||||
func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
||||
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
||||
sender := quicStreamFrameSender{stream: stream}
|
||||
defer func() { _ = stream.Close() }()
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_stream_opened",
|
||||
AcceptedBy: "quic",
|
||||
@@ -116,6 +155,29 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
s.registerReverseHelloFrame(conn, frame)
|
||||
if s.handleProductionForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if s.handleWebIngressForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if s.handleFabricControlForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if s.handleSyntheticForwardFrame(ctx, conn, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if s.fabricFrameHandler != nil {
|
||||
handled, err := s.fabricFrameHandler(ctx, sender, frame)
|
||||
if err != nil {
|
||||
_ = conn.CloseWithError(2, err.Error())
|
||||
return
|
||||
}
|
||||
if handled {
|
||||
continue
|
||||
}
|
||||
}
|
||||
event, responses, err := session.HandleFrame(frame)
|
||||
if err != nil {
|
||||
_ = conn.CloseWithError(2, err.Error())
|
||||
@@ -140,6 +202,196 @@ func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, st
|
||||
}
|
||||
}
|
||||
|
||||
type quicStreamFrameSender struct {
|
||||
stream *quic.Stream
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (s quicStreamFrameSender) SendFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if s.stream == nil {
|
||||
return fmt.Errorf("quic fabric stream is closed")
|
||||
}
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
_ = s.stream.SetWriteDeadline(deadline)
|
||||
} else {
|
||||
_ = s.stream.SetWriteDeadline(time.Now().Add(30 * time.Second))
|
||||
}
|
||||
return fabricproto.WriteFrame(s.stream, frame)
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
|
||||
reverseTransport := s.getReverseTransport()
|
||||
if s == nil || reverseTransport == nil || conn == nil || frame.Type != fabricproto.FramePing {
|
||||
return
|
||||
}
|
||||
payload := string(frame.Payload)
|
||||
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
|
||||
return
|
||||
}
|
||||
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
|
||||
reverseTransport.RegisterReverseConn(peerID, conn)
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_reverse_registered",
|
||||
AcceptedBy: "quic_reverse_hello",
|
||||
RemoteAddr: conn.RemoteAddr().String(),
|
||||
PeerID: peerID,
|
||||
})
|
||||
}
|
||||
|
||||
type quicProductionForwardResponse struct {
|
||||
Result ProductionForwardResult `json:"result,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type quicSyntheticForwardResponse struct {
|
||||
Envelope SyntheticEnvelope `json:"envelope,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type quicWebIngressForwardResponse struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type quicFabricControlForwardResponse struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) handleProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicProductionForwardResponse{}
|
||||
if s == nil || s.productionForwardHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else {
|
||||
var envelope ProductionEnvelope
|
||||
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||
response.Error = "invalid production mesh envelope"
|
||||
} else if result, err := s.productionForwardHandler(ctx, envelope); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Result = result
|
||||
}
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: ProductionForwardQUICStreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: payload,
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) handleWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicWebIngressForwardResponse{}
|
||||
if s == nil || s.webIngressForwardHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else if payload, err := s.webIngressForwardHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Payload = append(json.RawMessage(nil), payload...)
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: WebIngressForwardQUICStreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: payload,
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) handleFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicFabricControlForwardResponse{}
|
||||
if s == nil || s.fabricControlHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else if payload, err := s.fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Payload = append(json.RawMessage(nil), payload...)
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: FabricControlForwardQUICStreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: payload,
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) handleSyntheticForwardFrame(ctx context.Context, conn *quic.Conn, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicSyntheticForwardResponse{}
|
||||
if s == nil || s.syntheticForwardHandler == nil {
|
||||
response.Error = ErrMeshRuntimeDisabled.Error()
|
||||
} else {
|
||||
var envelope SyntheticEnvelope
|
||||
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||
response.Error = "invalid synthetic mesh envelope"
|
||||
} else if ack, err := s.syntheticForwardHandler(ctx, envelope); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
s.registerReversePeerConn(envelope.From.NodeID, conn)
|
||||
response.Envelope = ack
|
||||
}
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: SyntheticForwardQUICStreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: payload,
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) registerReversePeerConn(peerID string, conn *quic.Conn) {
|
||||
reverseTransport := s.getReverseTransport()
|
||||
if s == nil || reverseTransport == nil || conn == nil {
|
||||
return
|
||||
}
|
||||
reverseTransport.RegisterReverseConn(peerID, conn)
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) getReverseTransport() *QUICFabricTransport {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
s.reverseMu.RLock()
|
||||
defer s.reverseMu.RUnlock()
|
||||
return s.reverseTransport
|
||||
}
|
||||
|
||||
func (s *QUICFabricServer) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||
if s != nil && s.logger != nil {
|
||||
s.logger(entry)
|
||||
|
||||
@@ -6,7 +6,9 @@ import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -17,6 +19,7 @@ import (
|
||||
)
|
||||
|
||||
const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
||||
const fabricQUICReverseHelloPrefix = "rap-fabric-reverse-hello-v1:"
|
||||
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
|
||||
const defaultQUICFabricMaxStreamsPerConn = 64
|
||||
const ErrQUICFabricStreamLimitReached = quicFabricError("quic fabric stream limit reached")
|
||||
@@ -28,17 +31,29 @@ func (e quicFabricError) Error() string {
|
||||
}
|
||||
|
||||
type QUICFabricTransport struct {
|
||||
Config *quic.Config
|
||||
IdleTTL time.Duration
|
||||
MaxStreamsPerConn int
|
||||
mu sync.Mutex
|
||||
conns map[string]*quicFabricConnEntry
|
||||
stats QUICFabricTransportStats
|
||||
Config *quic.Config
|
||||
LocalPeerID string
|
||||
IdleTTL time.Duration
|
||||
MaxStreamsPerConn int
|
||||
DialAddr func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error)
|
||||
mu sync.Mutex
|
||||
conns map[string]*quicFabricConnEntry
|
||||
reverseConns map[string]*quicFabricConnEntry
|
||||
inboundProductionHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||
inboundWebIngressHandler func(context.Context, []byte) ([]byte, error)
|
||||
inboundFabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||
inboundSyntheticHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||
logger FabricSessionEventLogger
|
||||
stats QUICFabricTransportStats
|
||||
}
|
||||
|
||||
type QUICFabricTransportStats struct {
|
||||
Opens uint64 `json:"opens"`
|
||||
Reuses uint64 `json:"reuses"`
|
||||
ReverseHelloSent uint64 `json:"reverse_hello_sent"`
|
||||
ReverseHelloFailed uint64 `json:"reverse_hello_failed"`
|
||||
ReverseRegisters uint64 `json:"reverse_registers"`
|
||||
ReverseReuses uint64 `json:"reverse_reuses"`
|
||||
OpenFailures uint64 `json:"open_failures"`
|
||||
ClosedEvicted uint64 `json:"closed_evicted"`
|
||||
CloseAllCalls uint64 `json:"close_all_calls"`
|
||||
@@ -50,6 +65,7 @@ type QUICFabricTransportStats struct {
|
||||
|
||||
type QUICFabricTransportSnapshot struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
LocalPeerID string `json:"local_peer_id,omitempty"`
|
||||
ActiveCount int `json:"active_count"`
|
||||
ActiveStreams int `json:"active_streams"`
|
||||
MaxStreamsPerConn int `json:"max_streams_per_conn"`
|
||||
@@ -63,6 +79,7 @@ type QUICFabricConnSnapshot struct {
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
CertSHA256 string `json:"cert_sha256,omitempty"`
|
||||
Direction string `json:"direction,omitempty"`
|
||||
ActiveStreams int `json:"active_streams"`
|
||||
MaxStreams int `json:"max_streams"`
|
||||
CapacityPressurePercent int `json:"capacity_pressure_percent"`
|
||||
@@ -92,7 +109,41 @@ type quicFabricConnEntry struct {
|
||||
}
|
||||
|
||||
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
||||
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}}
|
||||
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}, reverseConns: map[string]*quicFabricConnEntry{}}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) SetInboundHandlers(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
|
||||
t.SetInboundHandlersWithWebIngress(production, nil, synthetic, logger)
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) SetInboundHandlersWithWebIngress(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), webIngress func(context.Context, []byte) ([]byte, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
t.inboundProductionHandler = production
|
||||
t.inboundWebIngressHandler = webIngress
|
||||
t.inboundSyntheticHandler = synthetic
|
||||
t.logger = logger
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) SetInboundFabricControlHandler(handler func(context.Context, []byte) ([]byte, error)) {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
t.inboundFabricControlHandler = handler
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) SetLocalPeerID(peerID string) {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
t.LocalPeerID = strings.TrimSpace(peerID)
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
||||
@@ -186,9 +237,12 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, nil)
|
||||
return conn, "", true, err
|
||||
}
|
||||
if conn, key, ok := t.reverseConnForTarget(target); ok {
|
||||
return conn, key, false, nil
|
||||
}
|
||||
key := quicFabricConnKey(target)
|
||||
if key == "" {
|
||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
|
||||
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
|
||||
return conn, "", true, err
|
||||
}
|
||||
t.mu.Lock()
|
||||
@@ -207,7 +261,7 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
||||
}
|
||||
t.mu.Unlock()
|
||||
|
||||
conn, err := quic.DialAddr(ctx, target.Endpoint, tlsConfig, t.Config)
|
||||
conn, err := t.dialAddr(ctx, target.Endpoint, tlsConfig)
|
||||
if err != nil {
|
||||
t.mu.Lock()
|
||||
t.stats.OpenFailures++
|
||||
@@ -235,16 +289,339 @@ func (t *QUICFabricTransport) connectConn(ctx context.Context, target FabricTran
|
||||
t.conns[key] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
||||
t.stats.Opens++
|
||||
t.mu.Unlock()
|
||||
go t.acceptInboundStreams(context.Background(), conn)
|
||||
go t.sendReverseHello(context.Background(), conn)
|
||||
return conn, key, false, nil
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) dialAddr(ctx context.Context, endpoint string, tlsConfig *tls.Config) (*quic.Conn, error) {
|
||||
if t != nil && t.DialAddr != nil {
|
||||
return t.DialAddr(ctx, endpoint, tlsConfig, t.Config)
|
||||
}
|
||||
return quic.DialAddr(ctx, endpoint, tlsConfig, t.Config)
|
||||
}
|
||||
|
||||
func DialQUICAddrWithPacketConn(ctx context.Context, endpoint string, packetConn net.PacketConn, tlsConfig *tls.Config, config *quic.Config) (*quic.Conn, error) {
|
||||
if packetConn == nil {
|
||||
return nil, fmt.Errorf("quic packet connection is required")
|
||||
}
|
||||
addr, err := net.ResolveUDPAddr("udp", strings.TrimPrefix(strings.TrimSpace(endpoint), "quic://"))
|
||||
if err != nil {
|
||||
_ = packetConn.Close()
|
||||
return nil, err
|
||||
}
|
||||
transport := &quic.Transport{Conn: packetConn}
|
||||
conn, err := transport.Dial(ctx, addr, tlsConfig, config)
|
||||
if err != nil {
|
||||
_ = transport.Close()
|
||||
return nil, err
|
||||
}
|
||||
go func() {
|
||||
<-conn.Context().Done()
|
||||
_ = transport.Close()
|
||||
}()
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) sendReverseHello(ctx context.Context, conn *quic.Conn) {
|
||||
if t == nil || conn == nil {
|
||||
return
|
||||
}
|
||||
localPeerID := t.localPeerID()
|
||||
if localPeerID == "" {
|
||||
t.mu.Lock()
|
||||
t.stats.ReverseHelloFailed++
|
||||
t.mu.Unlock()
|
||||
return
|
||||
}
|
||||
helloCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
defer cancel()
|
||||
stream, err := conn.OpenStreamSync(helloCtx)
|
||||
if err != nil {
|
||||
t.mu.Lock()
|
||||
t.stats.ReverseHelloFailed++
|
||||
t.mu.Unlock()
|
||||
return
|
||||
}
|
||||
defer func() { _ = stream.Close() }()
|
||||
if err := fabricproto.WriteFrame(stream, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
Sequence: 1,
|
||||
Payload: []byte(fabricQUICReverseHelloPrefix + localPeerID),
|
||||
}); err != nil {
|
||||
t.mu.Lock()
|
||||
t.stats.ReverseHelloFailed++
|
||||
t.mu.Unlock()
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
t.stats.ReverseHelloSent++
|
||||
t.mu.Unlock()
|
||||
_, _ = fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) acceptInboundStreams(ctx context.Context, conn *quic.Conn) {
|
||||
if t == nil || conn == nil {
|
||||
return
|
||||
}
|
||||
for {
|
||||
stream, err := conn.AcceptStream(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go t.handleInboundStream(ctx, conn, stream)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) handleInboundStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
||||
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
||||
defer func() { _ = stream.Close() }()
|
||||
t.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_reverse_stream_opened",
|
||||
AcceptedBy: "quic_reverse",
|
||||
RemoteAddr: conn.RemoteAddr().String(),
|
||||
})
|
||||
defer t.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_reverse_stream_closed",
|
||||
AcceptedBy: "quic_reverse",
|
||||
RemoteAddr: conn.RemoteAddr().String(),
|
||||
})
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
_ = stream.Close()
|
||||
return
|
||||
default:
|
||||
}
|
||||
frame, err := fabricproto.ReadFrame(stream, fabricproto.DefaultMaxPayload)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
t.registerReverseHelloFrame(conn, frame)
|
||||
if t.handleInboundProductionForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if t.handleInboundWebIngressForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if t.handleInboundFabricControlForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
if t.handleInboundSyntheticForwardFrame(ctx, stream, frame) {
|
||||
continue
|
||||
}
|
||||
event, responses, err := session.HandleFrame(frame)
|
||||
if err != nil {
|
||||
_ = stream.Close()
|
||||
return
|
||||
}
|
||||
if event.Type != fabricproto.SessionEventNone {
|
||||
t.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_reverse_event",
|
||||
SessionEvent: event.Type,
|
||||
StreamID: event.StreamID,
|
||||
Sequence: event.Sequence,
|
||||
TrafficClass: event.TrafficClass,
|
||||
AcceptedBy: "quic_reverse",
|
||||
RemoteAddr: conn.RemoteAddr().String(),
|
||||
})
|
||||
}
|
||||
for _, response := range responses {
|
||||
if err := fabricproto.WriteFrame(stream, response); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) registerReverseHelloFrame(conn *quic.Conn, frame fabricproto.Frame) {
|
||||
if t == nil || conn == nil || frame.Type != fabricproto.FramePing {
|
||||
return
|
||||
}
|
||||
payload := string(frame.Payload)
|
||||
if !strings.HasPrefix(payload, fabricQUICReverseHelloPrefix) {
|
||||
return
|
||||
}
|
||||
peerID := strings.TrimPrefix(payload, fabricQUICReverseHelloPrefix)
|
||||
t.RegisterReverseConn(peerID, conn)
|
||||
t.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_reverse_registered",
|
||||
AcceptedBy: "quic_reverse_hello",
|
||||
RemoteAddr: conn.RemoteAddr().String(),
|
||||
PeerID: peerID,
|
||||
})
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) handleInboundProductionForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicProductionForwardResponse{}
|
||||
productionHandler, _, _, _, _ := t.inboundHandlers()
|
||||
if productionHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else {
|
||||
var envelope ProductionEnvelope
|
||||
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||
response.Error = "invalid production mesh envelope"
|
||||
} else if result, err := productionHandler(ctx, envelope); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Result = result
|
||||
}
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err == nil {
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) handleInboundWebIngressForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicWebIngressForwardResponse{}
|
||||
_, webIngressHandler, _, _, _ := t.inboundHandlers()
|
||||
if webIngressHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else if payload, err := webIngressHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Payload = append(json.RawMessage(nil), payload...)
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err == nil {
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: WebIngressForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) handleInboundFabricControlForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != FabricControlForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicFabricControlForwardResponse{}
|
||||
_, _, fabricControlHandler, _, _ := t.inboundHandlers()
|
||||
if fabricControlHandler == nil {
|
||||
response.Error = ErrForwardRuntimeUnavailable.Error()
|
||||
} else if payload, err := fabricControlHandler(ctx, append([]byte(nil), frame.Payload...)); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Payload = append(json.RawMessage(nil), payload...)
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err == nil {
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: FabricControlForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) handleInboundSyntheticForwardFrame(ctx context.Context, stream *quic.Stream, frame fabricproto.Frame) bool {
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID {
|
||||
return false
|
||||
}
|
||||
response := quicSyntheticForwardResponse{}
|
||||
_, _, _, syntheticHandler, _ := t.inboundHandlers()
|
||||
if syntheticHandler == nil {
|
||||
response.Error = ErrMeshRuntimeDisabled.Error()
|
||||
} else {
|
||||
var envelope SyntheticEnvelope
|
||||
if err := json.Unmarshal(frame.Payload, &envelope); err != nil {
|
||||
response.Error = "invalid synthetic mesh envelope"
|
||||
} else if ack, err := syntheticHandler(ctx, envelope); err != nil {
|
||||
response.Error = err.Error()
|
||||
} else {
|
||||
response.Envelope = ack
|
||||
}
|
||||
}
|
||||
payload, err := json.Marshal(response)
|
||||
if err == nil {
|
||||
_ = fabricproto.WriteFrame(stream, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: SyntheticForwardQUICStreamID, Sequence: frame.Sequence, Payload: payload})
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) inboundHandlers() (func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), func(context.Context, []byte) ([]byte, error), func(context.Context, []byte) ([]byte, error), func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), FabricSessionEventLogger) {
|
||||
if t == nil {
|
||||
return nil, nil, nil, nil, nil
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.inboundProductionHandler, t.inboundWebIngressHandler, t.inboundFabricControlHandler, t.inboundSyntheticHandler, t.logger
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) localPeerID() string {
|
||||
if t == nil {
|
||||
return ""
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return strings.TrimSpace(t.LocalPeerID)
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) logFabricSession(entry FabricSessionEventLogEntry) {
|
||||
_, _, _, _, logger := t.inboundHandlers()
|
||||
if logger != nil {
|
||||
logger(entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) RegisterReverseConn(peerID string, conn *quic.Conn) {
|
||||
if t == nil || conn == nil {
|
||||
return
|
||||
}
|
||||
peerID = strings.TrimSpace(peerID)
|
||||
if peerID == "" {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if t.reverseConns == nil {
|
||||
t.reverseConns = map[string]*quicFabricConnEntry{}
|
||||
}
|
||||
if existing := t.reverseConns[peerID]; existing != nil && existing.conn != nil && existing.conn != conn {
|
||||
select {
|
||||
case <-existing.conn.Context().Done():
|
||||
default:
|
||||
_ = existing.conn.CloseWithError(0, "reverse connection replaced")
|
||||
}
|
||||
}
|
||||
t.reverseConns[peerID] = &quicFabricConnEntry{conn: conn, lastUsed: time.Now()}
|
||||
t.stats.ReverseRegisters++
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) reverseConnForTarget(target FabricTransportTarget) (*quic.Conn, string, bool) {
|
||||
peerID := strings.TrimSpace(target.PeerID)
|
||||
if t == nil || peerID == "" || !fabricTransportPrefersReverseConn(target.Transport) {
|
||||
return nil, "", false
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
t.pruneIdleLocked(time.Now())
|
||||
entry := t.reverseConns[peerID]
|
||||
if entry == nil || entry.conn == nil {
|
||||
return nil, "", false
|
||||
}
|
||||
select {
|
||||
case <-entry.conn.Context().Done():
|
||||
delete(t.reverseConns, peerID)
|
||||
t.stats.ClosedEvicted++
|
||||
return nil, "", false
|
||||
default:
|
||||
entry.lastUsed = time.Now()
|
||||
t.stats.ReverseReuses++
|
||||
return entry.conn, quicFabricReverseConnKey(peerID), true
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) reserveStream(key string, conn *quic.Conn) error {
|
||||
if t == nil || key == "" {
|
||||
return nil
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
entry := t.conns[key]
|
||||
entry := t.connEntryLocked(key)
|
||||
if entry == nil || entry.conn != conn {
|
||||
return fmt.Errorf("quic fabric connection is not cached")
|
||||
}
|
||||
@@ -267,16 +644,26 @@ func (t *QUICFabricTransport) releaseStream(key string) {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
if entry := t.conns[key]; entry != nil {
|
||||
if entry := t.connEntryLocked(key); entry != nil {
|
||||
if entry.activeStreams > 0 {
|
||||
entry.activeStreams--
|
||||
}
|
||||
entry.lastUsed = time.Now()
|
||||
t.stats.StreamCloses++
|
||||
}
|
||||
t.stats.StreamCloses++
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) connEntryLocked(key string) *quicFabricConnEntry {
|
||||
if t == nil || key == "" {
|
||||
return nil
|
||||
}
|
||||
if strings.HasPrefix(key, "reverse\x00") {
|
||||
return t.reverseConns[strings.TrimPrefix(key, "reverse\x00")]
|
||||
}
|
||||
return t.conns[key]
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic.Conn) {
|
||||
if t == nil || conn == nil {
|
||||
return
|
||||
@@ -315,6 +702,20 @@ func (t *QUICFabricTransport) pruneIdleLocked(now time.Time) {
|
||||
t.stats.IdleEvicted++
|
||||
}
|
||||
}
|
||||
for peerID, entry := range t.reverseConns {
|
||||
if entry == nil || entry.conn == nil {
|
||||
delete(t.reverseConns, peerID)
|
||||
continue
|
||||
}
|
||||
if !entry.lastUsed.IsZero() && now.Sub(entry.lastUsed) > ttl {
|
||||
if entry.activeStreams > 0 {
|
||||
continue
|
||||
}
|
||||
_ = entry.conn.CloseWithError(0, "idle reverse")
|
||||
delete(t.reverseConns, peerID)
|
||||
t.stats.IdleEvicted++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func quicFabricConnKey(target FabricTransportTarget) string {
|
||||
@@ -340,6 +741,23 @@ func parseQUICFabricConnKey(key string) (peerID string, endpoint string, certSHA
|
||||
return peerID, endpoint, certSHA256
|
||||
}
|
||||
|
||||
func quicFabricReverseConnKey(peerID string) string {
|
||||
peerID = strings.TrimSpace(peerID)
|
||||
if peerID == "" {
|
||||
return ""
|
||||
}
|
||||
return "reverse\x00" + peerID
|
||||
}
|
||||
|
||||
func fabricTransportPrefersReverseConn(transport string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(transport)) {
|
||||
case "reverse_quic", "relay_quic":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) Close() error {
|
||||
if t == nil {
|
||||
return nil
|
||||
@@ -348,12 +766,19 @@ func (t *QUICFabricTransport) Close() error {
|
||||
t.stats.CloseAllCalls++
|
||||
conns := t.conns
|
||||
t.conns = map[string]*quicFabricConnEntry{}
|
||||
reverseConns := t.reverseConns
|
||||
t.reverseConns = map[string]*quicFabricConnEntry{}
|
||||
t.mu.Unlock()
|
||||
for _, entry := range conns {
|
||||
if entry != nil && entry.conn != nil {
|
||||
_ = entry.conn.CloseWithError(0, "closed")
|
||||
}
|
||||
}
|
||||
for _, entry := range reverseConns {
|
||||
if entry != nil && entry.conn != nil {
|
||||
_ = entry.conn.CloseWithError(0, "closed")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -370,6 +795,7 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
|
||||
}
|
||||
snapshot := QUICFabricTransportSnapshot{
|
||||
SchemaVersion: "rap.quic_fabric_transport.v1",
|
||||
LocalPeerID: strings.TrimSpace(t.LocalPeerID),
|
||||
MaxStreamsPerConn: limit,
|
||||
Stats: t.stats,
|
||||
}
|
||||
@@ -391,6 +817,40 @@ func (t *QUICFabricTransport) Snapshot() QUICFabricTransportSnapshot {
|
||||
PeerID: peerID,
|
||||
Endpoint: endpoint,
|
||||
CertSHA256: certSHA256,
|
||||
Direction: "outbound",
|
||||
ActiveStreams: entry.activeStreams,
|
||||
MaxStreams: limit,
|
||||
Saturated: entry.activeStreams >= limit,
|
||||
}
|
||||
if !entry.lastUsed.IsZero() {
|
||||
connSnapshot.LastUsedUnixSec = entry.lastUsed.UTC().Unix()
|
||||
}
|
||||
if limit > 0 {
|
||||
connSnapshot.CapacityPressurePercent = (entry.activeStreams * 100) / limit
|
||||
}
|
||||
snapshot.Connections = append(snapshot.Connections, connSnapshot)
|
||||
if entry.activeStreams >= limit {
|
||||
snapshot.SaturatedConnections++
|
||||
}
|
||||
}
|
||||
}
|
||||
for peerID, entry := range t.reverseConns {
|
||||
if entry == nil || entry.conn == nil {
|
||||
delete(t.reverseConns, peerID)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-entry.conn.Context().Done():
|
||||
delete(t.reverseConns, peerID)
|
||||
t.stats.ClosedEvicted++
|
||||
snapshot.Stats.ClosedEvicted++
|
||||
default:
|
||||
snapshot.ActiveCount++
|
||||
snapshot.ActiveStreams += entry.activeStreams
|
||||
connSnapshot := QUICFabricConnSnapshot{
|
||||
PeerID: peerID,
|
||||
Endpoint: entry.conn.RemoteAddr().String(),
|
||||
Direction: "reverse",
|
||||
ActiveStreams: entry.activeStreams,
|
||||
MaxStreams: limit,
|
||||
Saturated: entry.activeStreams >= limit,
|
||||
@@ -462,6 +922,7 @@ func (s *quicFabricSession) Close() error {
|
||||
s.closeOnce.Do(func() {
|
||||
close(s.done)
|
||||
if s.stream != nil {
|
||||
s.stream.CancelRead(0)
|
||||
err = s.stream.Close()
|
||||
}
|
||||
if s.transport != nil {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"math/big"
|
||||
"strings"
|
||||
@@ -341,6 +342,119 @@ func TestQUICFabricTransportLimitsStreamsPerConnection(t *testing.T) {
|
||||
defer second.Close()
|
||||
}
|
||||
|
||||
func TestQUICFabricTransportReusesInboundConnectionForReverseStream(t *testing.T) {
|
||||
reverseTransport := NewQUICFabricTransport(nil)
|
||||
defer reverseTransport.Close()
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: testQUICTLSConfig(t),
|
||||
ReverseTransport: reverseTransport,
|
||||
SyntheticForwardHandler: func(_ context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
envelope.To, envelope.From = envelope.From, PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-r"}
|
||||
return envelope, nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
clientTransport := NewQUICFabricTransport(nil)
|
||||
defer clientTransport.Close()
|
||||
clientTransport.SetLocalPeerID("node-a")
|
||||
clientTransport.SetInboundHandlers(func(_ context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
return ProductionForwardResult{
|
||||
Accepted: true,
|
||||
Delivered: true,
|
||||
Forwarded: true,
|
||||
By: PeerIdentity{ClusterID: envelope.ClusterID, NodeID: "node-a"},
|
||||
MessageID: envelope.MessageID,
|
||||
RouteID: envelope.RouteID,
|
||||
}, nil
|
||||
}, nil, nil)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
session, err := clientTransport.Connect(ctx, FabricTransportTarget{
|
||||
PeerID: "node-r",
|
||||
Endpoint: server.Addr().String(),
|
||||
TLSConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
NextProtos: []string{fabricQUICNextProto},
|
||||
},
|
||||
Timeout: time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("client connect: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for {
|
||||
if reverseTransport.Snapshot().Stats.ReverseRegisters > 0 {
|
||||
break
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
t.Fatalf("reverse hello did not register connection: %+v", reverseTransport.Snapshot())
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
reverseSession, err := reverseTransport.Connect(ctx, FabricTransportTarget{
|
||||
PeerID: "node-a",
|
||||
Endpoint: "10.0.0.2:19443",
|
||||
Transport: "relay_quic",
|
||||
Timeout: time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("reverse connect: %v", err)
|
||||
}
|
||||
defer reverseSession.Close()
|
||||
productionPayload, err := json.Marshal(ProductionEnvelope{
|
||||
FabricProtocolVersion: ProtocolVersion,
|
||||
MessageID: "msg-1",
|
||||
RouteID: "route-r-a",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-r",
|
||||
DestinationNodeID: "node-a",
|
||||
CurrentHopNodeID: "node-a",
|
||||
NextHopNodeID: "node-a",
|
||||
ChannelClass: ProductionChannelFabricControl,
|
||||
MessageType: ProductionMessageFabricControl,
|
||||
TTL: 4,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
ExpiresAt: time.Now().UTC().Add(time.Minute),
|
||||
PayloadHash: "unused-by-test-handler",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal production: %v", err)
|
||||
}
|
||||
if err := reverseSession.Send(ctx, fabricproto.Frame{Type: fabricproto.FrameData, TrafficClass: fabricproto.TrafficClassReliable, StreamID: ProductionForwardQUICStreamID, Sequence: 2, Payload: productionPayload}); err != nil {
|
||||
t.Fatalf("send reverse production: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-reverseSession.Frames():
|
||||
var response quicProductionForwardResponse
|
||||
if err := json.Unmarshal(frame.Payload, &response); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if !response.Result.Accepted || !response.Result.Delivered || response.Result.By.NodeID != "node-a" {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
case err := <-reverseSession.Errors():
|
||||
t.Fatalf("reverse session error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
snapshot := reverseTransport.Snapshot()
|
||||
if snapshot.Stats.ReverseRegisters == 0 || snapshot.Stats.ReverseReuses == 0 {
|
||||
t.Fatalf("reverse connection was not registered/reused: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
||||
var events []FabricSessionEventLogEntry
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
@@ -389,6 +503,68 @@ func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICFabricServerHandlesWebIngressForwardFrames(t *testing.T) {
|
||||
var received []byte
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: testQUICTLSConfig(t),
|
||||
WebIngressForwardHandler: func(_ context.Context, payload []byte) ([]byte, error) {
|
||||
received = append([]byte(nil), payload...)
|
||||
return []byte(`{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}`), nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
|
||||
Endpoint: server.Addr().String(),
|
||||
TLSConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
NextProtos: []string{fabricQUICNextProto},
|
||||
},
|
||||
Timeout: time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("connect quic fabric: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: WebIngressForwardQUICStreamID,
|
||||
Sequence: 44,
|
||||
Payload: []byte(`{"envelope":true}`),
|
||||
}); err != nil {
|
||||
t.Fatalf("send web ingress frame: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-session.Frames():
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != WebIngressForwardQUICStreamID || frame.Sequence != 44 {
|
||||
t.Fatalf("frame = %+v", frame)
|
||||
}
|
||||
var response quicWebIngressForwardResponse
|
||||
if err := json.Unmarshal(frame.Payload, &response); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if string(response.Payload) != `{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":200,"body_b64":"b2s="}` || response.Error != "" {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
case err := <-session.Errors():
|
||||
t.Fatalf("session error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
if string(received) != `{"envelope":true}` {
|
||||
t.Fatalf("received = %s", string(received))
|
||||
}
|
||||
}
|
||||
|
||||
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
|
||||
t.Helper()
|
||||
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type FabricRouteHealthTracker struct {
|
||||
mu sync.Mutex
|
||||
QuarantineTTL time.Duration
|
||||
routes map[string]FabricRouteHealthEntry
|
||||
}
|
||||
|
||||
type FabricRouteHealthEntry struct {
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Failures uint64 `json:"failures"`
|
||||
LastFailure time.Time `json:"last_failure,omitempty"`
|
||||
RetryAfter time.Time `json:"retry_after,omitempty"`
|
||||
}
|
||||
|
||||
type FabricRouteHealthSnapshot struct {
|
||||
Quarantined map[string]FabricRouteHealthEntry `json:"quarantined,omitempty"`
|
||||
}
|
||||
|
||||
func NewFabricRouteHealthTracker(ttl time.Duration) *FabricRouteHealthTracker {
|
||||
if ttl <= 0 {
|
||||
ttl = 30 * time.Second
|
||||
}
|
||||
return &FabricRouteHealthTracker{QuarantineTTL: ttl, routes: map[string]FabricRouteHealthEntry{}}
|
||||
}
|
||||
|
||||
func (t *FabricRouteHealthTracker) MarkFailure(routeID string, reason string, now time.Time) {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if t == nil || routeID == "" {
|
||||
return
|
||||
}
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
ttl := t.QuarantineTTL
|
||||
if ttl <= 0 {
|
||||
ttl = 30 * time.Second
|
||||
}
|
||||
t.mu.Lock()
|
||||
entry := t.routes[routeID]
|
||||
entry.Failures++
|
||||
entry.Reason = strings.TrimSpace(reason)
|
||||
entry.LastFailure = now
|
||||
entry.RetryAfter = now.Add(ttl)
|
||||
if t.routes == nil {
|
||||
t.routes = map[string]FabricRouteHealthEntry{}
|
||||
}
|
||||
t.routes[routeID] = entry
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *FabricRouteHealthTracker) MarkSuccess(routeID string) {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if t == nil || routeID == "" {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
delete(t.routes, routeID)
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *FabricRouteHealthTracker) Apply(routeSet FabricRouteSet, now time.Time) FabricRouteSet {
|
||||
if t == nil {
|
||||
return routeSet
|
||||
}
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if len(t.routes) == 0 {
|
||||
return routeSet
|
||||
}
|
||||
return mapFabricRouteSet(routeSet, func(route FabricRoute) FabricRoute {
|
||||
entry, ok := t.routes[route.RouteID]
|
||||
if !ok {
|
||||
return route
|
||||
}
|
||||
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
|
||||
delete(t.routes, route.RouteID)
|
||||
return route
|
||||
}
|
||||
route.Healthy = false
|
||||
route.Degraded = true
|
||||
return route
|
||||
})
|
||||
}
|
||||
|
||||
func (t *FabricRouteHealthTracker) Snapshot(now time.Time) FabricRouteHealthSnapshot {
|
||||
if t == nil {
|
||||
return FabricRouteHealthSnapshot{}
|
||||
}
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
out := map[string]FabricRouteHealthEntry{}
|
||||
for routeID, entry := range t.routes {
|
||||
if !entry.RetryAfter.IsZero() && !now.Before(entry.RetryAfter) {
|
||||
continue
|
||||
}
|
||||
out[routeID] = entry
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return FabricRouteHealthSnapshot{}
|
||||
}
|
||||
return FabricRouteHealthSnapshot{Quarantined: out}
|
||||
}
|
||||
|
||||
func mapFabricRouteSet(routeSet FabricRouteSet, fn func(FabricRoute) FabricRoute) FabricRouteSet {
|
||||
if strings.TrimSpace(routeSet.Primary.RouteID) != "" {
|
||||
routeSet.Primary = fn(routeSet.Primary)
|
||||
}
|
||||
for i := range routeSet.WarmStandby {
|
||||
routeSet.WarmStandby[i] = fn(routeSet.WarmStandby[i])
|
||||
}
|
||||
for i := range routeSet.ColdFallbacks {
|
||||
routeSet.ColdFallbacks[i] = fn(routeSet.ColdFallbacks[i])
|
||||
}
|
||||
return routeSet
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
FabricCandidateReachabilityPublic = "public"
|
||||
FabricCandidateReachabilityPrivate = "private"
|
||||
FabricCandidateReachabilityRelay = "relay"
|
||||
FabricCandidateReachabilityOutboundOnly = "outbound_only"
|
||||
|
||||
FabricConnectivityDirect = "direct"
|
||||
FabricConnectivityOutboundOnly = "outbound_only"
|
||||
FabricConnectivityRelayRequired = "relay_required"
|
||||
)
|
||||
|
||||
type FabricRoutePlannerConfig struct {
|
||||
ClusterID string
|
||||
LocalNodeID string
|
||||
LocalSegmentID string
|
||||
LocalNATGroupID string
|
||||
DefaultCapacity int
|
||||
RelayCapacity int
|
||||
ReverseCapacity int
|
||||
Observations map[string]EndpointCandidateHealthObservation
|
||||
CapacityPressure map[string]EndpointCandidateCapacityPressure
|
||||
Now time.Time
|
||||
MaxObservationAge time.Duration
|
||||
MaxCapacityPressureAge time.Duration
|
||||
}
|
||||
|
||||
type FabricCandidateMetadata struct {
|
||||
LocalSegmentID string `json:"local_segment_id,omitempty"`
|
||||
NATGroupID string `json:"nat_group_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
ViaNodeID string `json:"via_node_id,omitempty"`
|
||||
STUNServer string `json:"stun_server,omitempty"`
|
||||
ICEFoundation string `json:"ice_foundation,omitempty"`
|
||||
}
|
||||
|
||||
func FabricRouteSetForPeerEndpointCandidates(targetNodeID string, candidates []PeerEndpointCandidate, cfg FabricRoutePlannerConfig) FabricRouteSet {
|
||||
targetNodeID = strings.TrimSpace(targetNodeID)
|
||||
if targetNodeID == "" && len(candidates) > 0 {
|
||||
targetNodeID = strings.TrimSpace(candidates[0].NodeID)
|
||||
}
|
||||
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: targetNodeID}
|
||||
if len(candidates) == 0 {
|
||||
return routeSet
|
||||
}
|
||||
now := cfg.Now
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
Now: now,
|
||||
Observations: cfg.Observations,
|
||||
MaxObservationAge: firstNonZeroDuration(cfg.MaxObservationAge, 30*time.Second),
|
||||
CapacityPressure: cfg.CapacityPressure,
|
||||
MaxCapacityPressureAge: firstNonZeroDuration(cfg.MaxCapacityPressureAge, 10*time.Second),
|
||||
})
|
||||
routes := make([]FabricRoute, 0, len(ranked))
|
||||
for index, scored := range ranked {
|
||||
route, ok := fabricRouteForPeerEndpointCandidate(scored.Candidate, cfg, scored.Score, index, now)
|
||||
if ok {
|
||||
routes = append(routes, route)
|
||||
}
|
||||
}
|
||||
return routeSetFromRoutes(routeSet, routes)
|
||||
}
|
||||
|
||||
func FabricRouteSetsForPeerEndpointCandidates(candidatesByNode map[string][]PeerEndpointCandidate, cfg FabricRoutePlannerConfig) map[string]FabricRouteSet {
|
||||
out := make(map[string]FabricRouteSet, len(candidatesByNode))
|
||||
for nodeID, candidates := range candidatesByNode {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
if nodeID == "" {
|
||||
continue
|
||||
}
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates(nodeID, candidates, cfg)
|
||||
if strings.TrimSpace(routeSet.Primary.RouteID) != "" || len(routeSet.WarmStandby) > 0 || len(routeSet.ColdFallbacks) > 0 {
|
||||
out[nodeID] = routeSet
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func fabricRouteForPeerEndpointCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int, now time.Time) (FabricRoute, bool) {
|
||||
candidate.EndpointID = strings.TrimSpace(candidate.EndpointID)
|
||||
candidate.NodeID = strings.TrimSpace(candidate.NodeID)
|
||||
candidate.Address = strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
|
||||
if candidate.EndpointID == "" || candidate.NodeID == "" || candidate.Address == "" || !isQUICOnlyCandidateTransport(candidate.Transport) {
|
||||
return FabricRoute{}, false
|
||||
}
|
||||
metadata := decodeFabricCandidateMetadata(candidate.Metadata)
|
||||
mode := fabricRouteModeForPeerEndpointCandidate(candidate, metadata, cfg)
|
||||
hops := fabricRouteHopsForCandidate(candidate, metadata, mode, cfg)
|
||||
if len(hops) == 0 {
|
||||
return FabricRoute{}, false
|
||||
}
|
||||
relayCount := 0
|
||||
for _, hop := range hops {
|
||||
if hop.Mode == FabricRouteRelay {
|
||||
relayCount++
|
||||
}
|
||||
}
|
||||
latency := fabricRouteLatencyFromCandidate(candidate, cfg, score, index)
|
||||
capacity := fabricRouteCapacityForMode(mode, cfg)
|
||||
if capacity <= 0 {
|
||||
capacity = 100
|
||||
}
|
||||
healthy := true
|
||||
degraded := false
|
||||
if observation, ok := cfg.Observations[candidate.EndpointID]; ok {
|
||||
healthy = observation.ReliabilityScore == 0 || observation.ReliabilityScore >= 50
|
||||
degraded = observation.LastLatencyMs > 0 && observation.LastLatencyMs >= 250
|
||||
}
|
||||
return FabricRoute{
|
||||
RouteID: candidate.EndpointID,
|
||||
ClusterID: strings.TrimSpace(cfg.ClusterID),
|
||||
SourceNodeID: strings.TrimSpace(cfg.LocalNodeID),
|
||||
DestinationNodeID: candidate.NodeID,
|
||||
Hops: hops,
|
||||
BaseLatencyMs: latency,
|
||||
Capacity: capacity,
|
||||
ActiveChannels: int(candidatePressureCount(candidate.EndpointID, cfg)),
|
||||
RelayCount: relayCount,
|
||||
Healthy: healthy,
|
||||
Degraded: degraded,
|
||||
LastUpdatedAt: now,
|
||||
}, true
|
||||
}
|
||||
|
||||
func fabricRouteModeForPeerEndpointCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) FabricRouteMode {
|
||||
transportMode := fabricRouteModeForTransportTarget(FabricTransportTarget{Transport: candidate.Transport})
|
||||
if transportMode == FabricRouteRelay || transportMode == FabricRouteReverse || transportMode == FabricRouteICE || transportMode == FabricRouteLAN {
|
||||
return transportMode
|
||||
}
|
||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||
if sameLocalSegment(metadata, cfg) || sameNATGroup(metadata, cfg) {
|
||||
return FabricRouteLAN
|
||||
}
|
||||
if reachability == FabricCandidateReachabilityRelay || connectivity == FabricConnectivityRelayRequired || strings.TrimSpace(metadata.RelayEndpoint) != "" {
|
||||
return FabricRouteRelay
|
||||
}
|
||||
if connectivity == FabricConnectivityOutboundOnly || reachability == FabricCandidateReachabilityOutboundOnly {
|
||||
return FabricRouteReverse
|
||||
}
|
||||
if strings.TrimSpace(metadata.STUNServer) != "" || strings.TrimSpace(metadata.ICEFoundation) != "" || candidate.NATType != "" {
|
||||
return FabricRouteICE
|
||||
}
|
||||
return FabricRouteDirect
|
||||
}
|
||||
|
||||
func fabricRouteHopsForCandidate(candidate PeerEndpointCandidate, metadata FabricCandidateMetadata, mode FabricRouteMode, cfg FabricRoutePlannerConfig) []FabricRouteHop {
|
||||
localNodeID := strings.TrimSpace(cfg.LocalNodeID)
|
||||
targetNodeID := strings.TrimSpace(candidate.NodeID)
|
||||
endpoint := strings.TrimRight(strings.TrimSpace(candidate.Address), "/")
|
||||
switch mode {
|
||||
case FabricRouteRelay:
|
||||
relayNodeID := firstNonEmpty(strings.TrimSpace(metadata.RelayNodeID), strings.TrimSpace(metadata.ViaNodeID))
|
||||
relayEndpoint := firstNonEmpty(strings.TrimRight(strings.TrimSpace(metadata.RelayEndpoint), "/"), endpoint)
|
||||
hops := []FabricRouteHop{}
|
||||
if localNodeID != "" {
|
||||
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: FabricRouteDirect})
|
||||
}
|
||||
if relayNodeID == "" {
|
||||
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
|
||||
return hops
|
||||
}
|
||||
hops = append(hops,
|
||||
FabricRouteHop{NodeID: relayNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID + ":relay", Address: relayEndpoint},
|
||||
FabricRouteHop{NodeID: targetNodeID, Mode: FabricRouteRelay, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)},
|
||||
)
|
||||
return hops
|
||||
case FabricRouteLAN, FabricRouteICE, FabricRouteReverse, FabricRouteDirect:
|
||||
hops := []FabricRouteHop{}
|
||||
if localNodeID != "" {
|
||||
hops = append(hops, FabricRouteHop{NodeID: localNodeID, Mode: mode})
|
||||
}
|
||||
hops = append(hops, FabricRouteHop{NodeID: targetNodeID, Mode: mode, EndpointID: candidate.EndpointID, Address: endpoint, PeerCertSHA256: candidatePeerCertSHA256(candidate)})
|
||||
return hops
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func isQUICOnlyCandidateTransport(transport string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(transport)) {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp",
|
||||
string(FabricRouteLAN), string(FabricRouteReverse), string(FabricRouteRelay), string(FabricRouteICE):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func fabricRouteLatencyFromCandidate(candidate PeerEndpointCandidate, cfg FabricRoutePlannerConfig, score int, index int) int {
|
||||
if observation, ok := cfg.Observations[candidate.EndpointID]; ok && observation.LastLatencyMs > 0 {
|
||||
if observation.LastLatencyMs > int64(^uint(0)>>1) {
|
||||
return int(^uint(0) >> 1)
|
||||
}
|
||||
return int(observation.LastLatencyMs)
|
||||
}
|
||||
base := 10 + index
|
||||
switch strings.ToLower(strings.TrimSpace(candidate.Reachability)) {
|
||||
case FabricCandidateReachabilityPrivate:
|
||||
base = 3 + index
|
||||
case FabricCandidateReachabilityOutboundOnly:
|
||||
base = 25 + index
|
||||
case FabricCandidateReachabilityRelay:
|
||||
base = 40 + index
|
||||
}
|
||||
if score < 100 {
|
||||
base += (100 - score) / 10
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
func fabricRouteCapacityForMode(mode FabricRouteMode, cfg FabricRoutePlannerConfig) int {
|
||||
switch mode {
|
||||
case FabricRouteRelay:
|
||||
return firstPositiveInt(cfg.RelayCapacity, cfg.DefaultCapacity, 100)
|
||||
case FabricRouteReverse:
|
||||
return firstPositiveInt(cfg.ReverseCapacity, cfg.DefaultCapacity, 100)
|
||||
default:
|
||||
return firstPositiveInt(cfg.DefaultCapacity, 100)
|
||||
}
|
||||
}
|
||||
|
||||
func candidatePressureCount(endpointID string, cfg FabricRoutePlannerConfig) int64 {
|
||||
if pressure, ok := cfg.CapacityPressure[endpointID]; ok {
|
||||
return pressure.Count
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func sameLocalSegment(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||
localSegment := strings.TrimSpace(cfg.LocalSegmentID)
|
||||
if localSegment == "" {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(metadata.LocalSegmentID), localSegment)
|
||||
}
|
||||
|
||||
func sameNATGroup(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||
localNATGroup := strings.TrimSpace(cfg.LocalNATGroupID)
|
||||
if localNATGroup == "" {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(metadata.NATGroupID), localNATGroup)
|
||||
}
|
||||
|
||||
func decodeFabricCandidateMetadata(raw json.RawMessage) FabricCandidateMetadata {
|
||||
if len(raw) == 0 {
|
||||
return FabricCandidateMetadata{}
|
||||
}
|
||||
var metadata FabricCandidateMetadata
|
||||
if err := json.Unmarshal(raw, &metadata); err != nil {
|
||||
return FabricCandidateMetadata{}
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
func candidatePeerCertSHA256(candidate PeerEndpointCandidate) string {
|
||||
var metadata struct {
|
||||
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||
}
|
||||
if len(candidate.Metadata) == 0 {
|
||||
return ""
|
||||
}
|
||||
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
|
||||
return ""
|
||||
}
|
||||
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
|
||||
}
|
||||
|
||||
func firstPositiveInt(values ...int) int {
|
||||
for _, value := range values {
|
||||
if value > 0 {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func firstNonZeroDuration(values ...time.Duration) time.Duration {
|
||||
for _, value := range values {
|
||||
if value > 0 {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func FabricRouteSetForRelayFallback(clusterID string, sourceNodeID string, targetNodeID string, relayNodeID string, relayEndpoint string, targetEndpoint string) FabricRouteSet {
|
||||
relayEndpoint = strings.TrimRight(strings.TrimSpace(relayEndpoint), "/")
|
||||
targetEndpoint = strings.TrimRight(strings.TrimSpace(targetEndpoint), "/")
|
||||
candidate := PeerEndpointCandidate{
|
||||
EndpointID: fmt.Sprintf("%s-via-%s-relay", strings.TrimSpace(targetNodeID), strings.TrimSpace(relayNodeID)),
|
||||
NodeID: strings.TrimSpace(targetNodeID),
|
||||
Transport: string(FabricRouteRelay),
|
||||
Address: targetEndpoint,
|
||||
Reachability: FabricCandidateReachabilityRelay,
|
||||
ConnectivityMode: FabricConnectivityRelayRequired,
|
||||
Metadata: mustMarshalFabricCandidateMetadata(FabricCandidateMetadata{RelayNodeID: relayNodeID, RelayEndpoint: relayEndpoint}),
|
||||
}
|
||||
return FabricRouteSetForPeerEndpointCandidates(targetNodeID, []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{
|
||||
ClusterID: clusterID,
|
||||
LocalNodeID: sourceNodeID,
|
||||
})
|
||||
}
|
||||
|
||||
func mustMarshalFabricCandidateMetadata(metadata FabricCandidateMetadata) json.RawMessage {
|
||||
raw, _ := json.Marshal(metadata)
|
||||
return raw
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{LocalSegmentID: "site-a", NATGroupID: "nat-a"})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "quic",
|
||||
Address: "quic://203.0.113.10:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "quic",
|
||||
Address: "quic://10.10.0.12:19443",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
PolicyTags: []string{"private-lan"},
|
||||
Metadata: metadata,
|
||||
},
|
||||
}, FabricRoutePlannerConfig{
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
LocalSegmentID: "site-a",
|
||||
DefaultCapacity: 200,
|
||||
Now: time.Unix(100, 0).UTC(),
|
||||
})
|
||||
if routeSet.Primary.RouteID != "node-b-lan" {
|
||||
t.Fatalf("primary route = %q, want node-b-lan", routeSet.Primary.RouteID)
|
||||
}
|
||||
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
|
||||
t.Fatalf("primary mode = %q, want lan", routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesBuildsRelayFallback(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{RelayNodeID: "node-r", RelayEndpoint: "quic://node-r:19443"})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "quic",
|
||||
Address: "quic://node-b-passive:19443",
|
||||
Reachability: "outbound_only",
|
||||
ConnectivityMode: "relay_required",
|
||||
NATType: "symmetric",
|
||||
Metadata: metadata,
|
||||
}}, FabricRoutePlannerConfig{
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RelayCapacity: 50,
|
||||
Now: time.Unix(100, 0).UTC(),
|
||||
})
|
||||
if routeSet.Primary.RouteID != "node-b-relay" {
|
||||
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
|
||||
}
|
||||
if routeSet.Primary.RelayCount != 2 {
|
||||
t.Fatalf("relay count = %d, want 2", routeSet.Primary.RelayCount)
|
||||
}
|
||||
if got := routeSet.Primary.Hops[1].NodeID; got != "node-r" {
|
||||
t.Fatalf("relay hop = %q, want node-r", got)
|
||||
}
|
||||
if routeSet.Primary.Capacity != 50 {
|
||||
t.Fatalf("capacity = %d, want 50", routeSet.Primary.Capacity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesUsesTargetWhenRelayMetadataIsAbsent(t *testing.T) {
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay_quic",
|
||||
Address: "quic://node-b:19443",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
|
||||
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||
if routeSet.Primary.RouteID != "node-b-relay" {
|
||||
t.Fatalf("primary route = %q", routeSet.Primary.RouteID)
|
||||
}
|
||||
if len(routeSet.Primary.Hops) != 2 {
|
||||
t.Fatalf("hops = %+v, want local + target only", routeSet.Primary.Hops)
|
||||
}
|
||||
targetHop := routeSet.Primary.Hops[1]
|
||||
if targetHop.NodeID != "node-b" || targetHop.Mode != FabricRouteRelay || targetHop.PeerCertSHA256 != "abc123" {
|
||||
t.Fatalf("target hop = %+v, want relay-mode target with cert", targetHop)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesAcceptsExplicitQUICModes(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
transport string
|
||||
wantMode FabricRouteMode
|
||||
}{
|
||||
{name: "lan", transport: "lan_quic", wantMode: FabricRouteLAN},
|
||||
{name: "reverse", transport: "reverse_quic", wantMode: FabricRouteReverse},
|
||||
{name: "relay", transport: "relay_quic", wantMode: FabricRouteRelay},
|
||||
{name: "ice", transport: "ice_quic", wantMode: FabricRouteICE},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-" + tc.name,
|
||||
NodeID: "node-b",
|
||||
Transport: tc.transport,
|
||||
Address: "quic://node-b:19443",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
Metadata: json.RawMessage(`{"tls_cert_sha256":"abc123"}`),
|
||||
}}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||
if routeSet.Primary.RouteID == "" {
|
||||
t.Fatalf("%s candidate produced empty route set", tc.transport)
|
||||
}
|
||||
hop := routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1]
|
||||
if hop.Mode != tc.wantMode {
|
||||
t.Fatalf("mode = %q, want %q", hop.Mode, tc.wantMode)
|
||||
}
|
||||
if hop.PeerCertSHA256 != "abc123" {
|
||||
t.Fatalf("peer cert = %q, want abc123", hop.PeerCertSHA256)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesTreatsSameNATGroupAsLAN(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{NATGroupID: "nat-a"})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{{
|
||||
EndpointID: "node-b-nat-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "quic",
|
||||
Address: "quic://10.44.0.12:19443",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
NATType: "symmetric",
|
||||
Metadata: metadata,
|
||||
}}, FabricRoutePlannerConfig{
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
LocalNATGroupID: "nat-a",
|
||||
})
|
||||
if routeSet.Primary.Hops[len(routeSet.Primary.Hops)-1].Mode != FabricRouteLAN {
|
||||
t.Fatalf("route = %+v, want LAN mode for same NAT group", routeSet.Primary)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesRejectsNonQUIC(t *testing.T) {
|
||||
for _, candidate := range []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-http",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_http",
|
||||
Address: "http://node-b:8080",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-legacy-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "quic://node-r:19443",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-legacy-reverse",
|
||||
NodeID: "node-b",
|
||||
Transport: "outbound_reverse",
|
||||
Address: "quic://node-b:19443",
|
||||
Reachability: "outbound_only",
|
||||
ConnectivityMode: "outbound_only",
|
||||
},
|
||||
} {
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{candidate}, FabricRoutePlannerConfig{ClusterID: "cluster-1", LocalNodeID: "node-a"})
|
||||
if routeSet.Primary.RouteID != "" || len(routeSet.WarmStandby) != 0 {
|
||||
t.Fatalf("non-quic candidate produced route set: %+v", routeSet)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type FabricRoutePressureTracker struct {
|
||||
mu sync.Mutex
|
||||
active map[string]int
|
||||
maxActive map[string]int
|
||||
acquiredTotal uint64
|
||||
releasedTotal uint64
|
||||
maxActiveTotal int
|
||||
lastAcquiredRoute string
|
||||
lastReleasedRoute string
|
||||
}
|
||||
|
||||
type FabricRoutePressureSnapshot struct {
|
||||
Active map[string]int `json:"active"`
|
||||
MaxActive map[string]int `json:"max_active"`
|
||||
ActiveTotal int `json:"active_total"`
|
||||
MaxActiveTotal int `json:"max_active_total"`
|
||||
AcquiredTotal uint64 `json:"acquired_total"`
|
||||
ReleasedTotal uint64 `json:"released_total"`
|
||||
LastAcquiredRoute string `json:"last_acquired_route,omitempty"`
|
||||
LastReleasedRoute string `json:"last_released_route,omitempty"`
|
||||
}
|
||||
|
||||
func NewFabricRoutePressureTracker() *FabricRoutePressureTracker {
|
||||
return &FabricRoutePressureTracker{
|
||||
active: map[string]int{},
|
||||
maxActive: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *FabricRoutePressureTracker) Apply(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if t == nil {
|
||||
return routeSet
|
||||
}
|
||||
active := t.Snapshot()
|
||||
if len(active) == 0 {
|
||||
return routeSet
|
||||
}
|
||||
apply := func(route FabricRoute) FabricRoute {
|
||||
if count := active[route.RouteID]; count > 0 {
|
||||
route.ActiveChannels += count
|
||||
}
|
||||
return route
|
||||
}
|
||||
routeSet.Primary = apply(routeSet.Primary)
|
||||
for i := range routeSet.WarmStandby {
|
||||
routeSet.WarmStandby[i] = apply(routeSet.WarmStandby[i])
|
||||
}
|
||||
for i := range routeSet.ColdFallbacks {
|
||||
routeSet.ColdFallbacks[i] = apply(routeSet.ColdFallbacks[i])
|
||||
}
|
||||
return routeSet
|
||||
}
|
||||
|
||||
func (t *FabricRoutePressureTracker) Acquire(routeID string) func() {
|
||||
routeID = strings.TrimSpace(routeID)
|
||||
if t == nil || routeID == "" {
|
||||
return func() {}
|
||||
}
|
||||
t.mu.Lock()
|
||||
if t.active == nil {
|
||||
t.active = map[string]int{}
|
||||
}
|
||||
if t.maxActive == nil {
|
||||
t.maxActive = map[string]int{}
|
||||
}
|
||||
t.active[routeID]++
|
||||
if t.active[routeID] > t.maxActive[routeID] {
|
||||
t.maxActive[routeID] = t.active[routeID]
|
||||
}
|
||||
t.acquiredTotal++
|
||||
t.lastAcquiredRoute = routeID
|
||||
if activeTotal := activeTotalLocked(t.active); activeTotal > t.maxActiveTotal {
|
||||
t.maxActiveTotal = activeTotal
|
||||
}
|
||||
t.mu.Unlock()
|
||||
var released atomic.Bool
|
||||
return func() {
|
||||
if released.Swap(true) {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
if t.active[routeID] <= 1 {
|
||||
delete(t.active, routeID)
|
||||
} else {
|
||||
t.active[routeID]--
|
||||
}
|
||||
t.releasedTotal++
|
||||
t.lastReleasedRoute = routeID
|
||||
t.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (t *FabricRoutePressureTracker) Snapshot() map[string]int {
|
||||
return t.SnapshotPressure().Active
|
||||
}
|
||||
|
||||
func (t *FabricRoutePressureTracker) SnapshotPressure() FabricRoutePressureSnapshot {
|
||||
if t == nil {
|
||||
return FabricRoutePressureSnapshot{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
active := make(map[string]int, len(t.active))
|
||||
for routeID, count := range t.active {
|
||||
active[routeID] = count
|
||||
}
|
||||
maxActive := make(map[string]int, len(t.maxActive))
|
||||
for routeID, count := range t.maxActive {
|
||||
maxActive[routeID] = count
|
||||
}
|
||||
return FabricRoutePressureSnapshot{
|
||||
Active: active,
|
||||
MaxActive: maxActive,
|
||||
ActiveTotal: activeTotalLocked(active),
|
||||
MaxActiveTotal: t.maxActiveTotal,
|
||||
AcquiredTotal: t.acquiredTotal,
|
||||
ReleasedTotal: t.releasedTotal,
|
||||
LastAcquiredRoute: t.lastAcquiredRoute,
|
||||
LastReleasedRoute: t.lastReleasedRoute,
|
||||
}
|
||||
}
|
||||
|
||||
func activeTotalLocked(active map[string]int) int {
|
||||
total := 0
|
||||
for _, count := range active {
|
||||
total += count
|
||||
}
|
||||
return total
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package mesh
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestFabricRoutePressureTrackerAppliesAndReleasesActiveChannels(t *testing.T) {
|
||||
tracker := NewFabricRoutePressureTracker()
|
||||
releaseA := tracker.Acquire("route-a")
|
||||
releaseAAgain := tracker.Acquire("route-a")
|
||||
releaseB := tracker.Acquire("route-b")
|
||||
routeSet := FabricRouteSet{
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: "node-b",
|
||||
Primary: testFabricRoute("route-a", "node-b", 10, 100, 3, true),
|
||||
WarmStandby: []FabricRoute{
|
||||
testFabricRoute("route-b", "node-b", 10, 100, 0, true),
|
||||
},
|
||||
}
|
||||
|
||||
withPressure := tracker.Apply(routeSet)
|
||||
if withPressure.Primary.ActiveChannels != 5 {
|
||||
t.Fatalf("primary active = %d, want 5", withPressure.Primary.ActiveChannels)
|
||||
}
|
||||
if withPressure.WarmStandby[0].ActiveChannels != 1 {
|
||||
t.Fatalf("standby active = %d, want 1", withPressure.WarmStandby[0].ActiveChannels)
|
||||
}
|
||||
|
||||
releaseA()
|
||||
releaseA()
|
||||
releaseAAgain()
|
||||
releaseB()
|
||||
snapshot := tracker.SnapshotPressure()
|
||||
if len(snapshot.Active) != 0 || snapshot.ActiveTotal != 0 {
|
||||
t.Fatalf("snapshot after release = %+v, want inactive", snapshot)
|
||||
}
|
||||
if snapshot.AcquiredTotal != 3 || snapshot.ReleasedTotal != 3 {
|
||||
t.Fatalf("snapshot totals = %+v, want acquired/released 3", snapshot)
|
||||
}
|
||||
if snapshot.MaxActive["route-a"] != 2 || snapshot.MaxActive["route-b"] != 1 || snapshot.MaxActiveTotal != 3 {
|
||||
t.Fatalf("snapshot max = %+v", snapshot)
|
||||
}
|
||||
if snapshot.LastAcquiredRoute != "route-b" || snapshot.LastReleasedRoute != "route-b" {
|
||||
t.Fatalf("snapshot last routes = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
@@ -12,8 +12,9 @@ import (
|
||||
func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
@@ -83,8 +84,9 @@ func TestFabricSessionPeerManagerReusesPeerPump(t *testing.T) {
|
||||
func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
@@ -131,8 +133,9 @@ func TestFabricSessionPeerManagerClosePeerReopens(t *testing.T) {
|
||||
func TestFabricSessionPeerManagerReopensClosedPump(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
|
||||
@@ -40,73 +40,22 @@ type FabricTransportTarget struct {
|
||||
ErrorBuffer int
|
||||
}
|
||||
|
||||
func FabricTransportForTarget(target FabricTransportTarget, websocket *WebSocketFabricTransport, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
|
||||
func FabricTransportForTarget(target FabricTransportTarget, quicTransport *QUICFabricTransport) (FabricTransport, FabricTransportTarget, error) {
|
||||
transportLabel := strings.ToLower(strings.TrimSpace(target.Transport))
|
||||
endpoint := strings.TrimSpace(target.Endpoint)
|
||||
if strings.HasPrefix(strings.ToLower(endpoint), "quic://") {
|
||||
transportLabel = "quic"
|
||||
if transportLabel == "" {
|
||||
transportLabel = "quic"
|
||||
}
|
||||
target.Endpoint = strings.TrimPrefix(endpoint, "quic://")
|
||||
}
|
||||
switch transportLabel {
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
||||
case "quic", "direct_quic", "udp_quic", "quic_udp", "lan_quic", "reverse_quic", "relay_quic", "ice_quic":
|
||||
if quicTransport == nil {
|
||||
quicTransport = NewQUICFabricTransport(nil)
|
||||
}
|
||||
return quicTransport, target, nil
|
||||
case "", "websocket", "ws", "wss", "direct_http", "direct_https", "direct_tcp_tls":
|
||||
if websocket == nil {
|
||||
websocket = NewWebSocketFabricTransport(nil)
|
||||
}
|
||||
return websocket, target, nil
|
||||
default:
|
||||
return nil, target, fmt.Errorf("unsupported fabric transport %q", target.Transport)
|
||||
return nil, target, fmt.Errorf("unsupported fabric transport %q: quic is required", target.Transport)
|
||||
}
|
||||
}
|
||||
|
||||
type WebSocketFabricTransport struct {
|
||||
Manager *FabricSessionPeerManager
|
||||
}
|
||||
|
||||
func NewWebSocketFabricTransport(manager *FabricSessionPeerManager) *WebSocketFabricTransport {
|
||||
if manager == nil {
|
||||
manager = NewFabricSessionPeerManager()
|
||||
}
|
||||
return &WebSocketFabricTransport{Manager: manager}
|
||||
}
|
||||
|
||||
func (t *WebSocketFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||
manager := t.Manager
|
||||
if manager == nil {
|
||||
manager = NewFabricSessionPeerManager()
|
||||
t.Manager = manager
|
||||
}
|
||||
return manager.Get(ctx, FabricSessionPeerTarget{
|
||||
PeerID: target.PeerID,
|
||||
BaseURL: target.Endpoint,
|
||||
Options: FabricSessionDialOptions{
|
||||
Token: target.Token,
|
||||
Header: target.Header,
|
||||
Timeout: target.Timeout,
|
||||
MaxPayload: target.MaxPayload,
|
||||
},
|
||||
Pump: FabricSessionPumpOptions{
|
||||
OutboundBuffer: target.OutboundBuffer,
|
||||
InboundBuffer: target.InboundBuffer,
|
||||
ErrorBuffer: target.ErrorBuffer,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (t *WebSocketFabricTransport) Close() error {
|
||||
if t == nil || t.Manager == nil {
|
||||
return nil
|
||||
}
|
||||
return t.Manager.Close()
|
||||
}
|
||||
|
||||
func (t *WebSocketFabricTransport) Snapshot() FabricSessionPeerManagerSnapshot {
|
||||
if t == nil || t.Manager == nil {
|
||||
return FabricSessionPeerManagerSnapshot{SchemaVersion: "rap.fabric_session_peer_manager.v1"}
|
||||
}
|
||||
return t.Manager.Snapshot()
|
||||
}
|
||||
|
||||
@@ -1,117 +1,27 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestWebSocketFabricTransportConnectsAndReusesSession(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
transport := NewWebSocketFabricTransport(nil)
|
||||
defer transport.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricTransportTarget{
|
||||
PeerID: "node-a",
|
||||
Endpoint: server.URL,
|
||||
Token: "rap_fsn_transport",
|
||||
Timeout: time.Second,
|
||||
OutboundBuffer: 4,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
}
|
||||
|
||||
first, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first connect: %v", err)
|
||||
}
|
||||
second, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second connect: %v", err)
|
||||
}
|
||||
if first != second {
|
||||
t.Fatal("transport did not reuse session")
|
||||
}
|
||||
if opened != 1 {
|
||||
t.Fatalf("opened = %d, want 1", opened)
|
||||
}
|
||||
if err := first.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, Sequence: 1, Payload: []byte("transport")}); err != nil {
|
||||
t.Fatalf("send ping: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-first.Frames():
|
||||
if frame.Type != fabricproto.FramePong || frame.Sequence != 1 || string(frame.Payload) != "transport" {
|
||||
t.Fatalf("frame = %+v", frame)
|
||||
func TestFabricTransportRejectsWebSocketTransport(t *testing.T) {
|
||||
for _, target := range []FabricTransportTarget{
|
||||
{Transport: "wss", Endpoint: "wss://node-a.example/fabric/session"},
|
||||
{Transport: "relay", Endpoint: "quic://node-r.example:19443"},
|
||||
{Transport: "outbound_reverse", Endpoint: "quic://node-b.example:19443"},
|
||||
} {
|
||||
_, _, err := FabricTransportForTarget(target, nil)
|
||||
if err == nil || !strings.Contains(err.Error(), "quic is required") {
|
||||
t.Fatalf("target = %+v err = %v, want quic-only rejection", target, err)
|
||||
}
|
||||
case err := <-first.Errors():
|
||||
t.Fatalf("session error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebSocketFabricTransportReopensClosedSession(t *testing.T) {
|
||||
var opened int
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
if entry.Event == "fabric_session_websocket_opened" {
|
||||
opened++
|
||||
}
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
transport := NewWebSocketFabricTransport(nil)
|
||||
defer transport.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
target := FabricTransportTarget{
|
||||
PeerID: "node-a",
|
||||
Endpoint: server.URL,
|
||||
Token: "rap_fsn_transport_reopen",
|
||||
Timeout: time.Second,
|
||||
}
|
||||
|
||||
first, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("first connect: %v", err)
|
||||
}
|
||||
if err := first.Close(); err != nil {
|
||||
t.Fatalf("close first session: %v", err)
|
||||
}
|
||||
second, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
t.Fatalf("second connect: %v", err)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatal("transport reused closed session")
|
||||
}
|
||||
if opened != 2 {
|
||||
t.Fatalf("opened = %d, want 2", opened)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
|
||||
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
|
||||
Endpoint: "quic://127.0.0.1:4433",
|
||||
}, nil, nil)
|
||||
}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("select transport: %v", err)
|
||||
}
|
||||
@@ -123,15 +33,12 @@ func TestFabricTransportForTargetSelectsQUICByScheme(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricTransportForTargetSelectsWebSocketByDefault(t *testing.T) {
|
||||
transport, target, err := FabricTransportForTarget(FabricTransportTarget{
|
||||
func TestFabricTransportForTargetRejectsNonQUICByDefault(t *testing.T) {
|
||||
_, target, err := FabricTransportForTarget(FabricTransportTarget{
|
||||
Endpoint: "https://node.example",
|
||||
}, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("select transport: %v", err)
|
||||
}
|
||||
if _, ok := transport.(*WebSocketFabricTransport); !ok {
|
||||
t.Fatalf("transport = %T, want websocket", transport)
|
||||
}, nil)
|
||||
if err == nil {
|
||||
t.Fatal("non-QUIC target unexpectedly selected a transport")
|
||||
}
|
||||
if target.Endpoint != "https://node.example" {
|
||||
t.Fatalf("endpoint = %q", target.Endpoint)
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// HTTPPeerTransport sends synthetic mesh envelopes to explicitly configured
|
||||
// peer endpoints. It is intentionally narrow: production forwarding remains
|
||||
// disabled and only SyntheticRuntime messages use this transport.
|
||||
type HTTPPeerTransport struct {
|
||||
PeerURLs map[string]string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
func NewHTTPPeerTransport(peerURLs map[string]string) *HTTPPeerTransport {
|
||||
normalized := make(map[string]string, len(peerURLs))
|
||||
for nodeID, baseURL := range peerURLs {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
||||
if nodeID != "" && baseURL != "" {
|
||||
normalized[nodeID] = baseURL
|
||||
}
|
||||
}
|
||||
return &HTTPPeerTransport{PeerURLs: normalized}
|
||||
}
|
||||
|
||||
func (t *HTTPPeerTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
if t == nil {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
||||
if baseURL == "" {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
client := NewClient(baseURL)
|
||||
if t.HTTPClient != nil {
|
||||
client.HTTPClient = t.HTTPClient
|
||||
}
|
||||
return client.SendSynthetic(ctx, envelope)
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHTTPPeerTransportDirectSyntheticProbe(t *testing.T) {
|
||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
defer nodeA.Close()
|
||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
||||
defer nodeB.Close()
|
||||
|
||||
route := liveSyntheticRoute("route-direct", []string{"node-a", "node-b"})
|
||||
routes := []SyntheticRoute{route}
|
||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-b": nodeB.URL})
|
||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
||||
|
||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-direct")
|
||||
if err != nil {
|
||||
t.Fatalf("send live direct probe: %v", err)
|
||||
}
|
||||
if ack.MessageType != SyntheticMessageProbeAck {
|
||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if got, want := payload.Path, []string{"node-a", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("path = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPPeerTransportSingleRelaySyntheticProbe(t *testing.T) {
|
||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
defer nodeA.Close()
|
||||
nodeR := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"})
|
||||
defer nodeR.Close()
|
||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
||||
defer nodeB.Close()
|
||||
|
||||
route := liveSyntheticRoute("route-relay", []string{"node-a", "node-r", "node-b"})
|
||||
routes := []SyntheticRoute{route}
|
||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-r": nodeR.URL})
|
||||
nodeR.Runtime = newLiveRuntime(nodeR.Local, routes, map[string]string{"node-b": nodeB.URL})
|
||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
||||
|
||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-relay")
|
||||
if err != nil {
|
||||
t.Fatalf("send live relay probe: %v", err)
|
||||
}
|
||||
if ack.MessageType != SyntheticMessageProbeAck {
|
||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if got, want := payload.Path, []string{"node-a", "node-r", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("path = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPPeerTransportMissingPeer(t *testing.T) {
|
||||
transport := NewHTTPPeerTransport(map[string]string{})
|
||||
_, err := transport.SendSynthetic(context.Background(), "node-missing", SyntheticEnvelope{})
|
||||
if !errors.Is(err, ErrSyntheticPeerUnavailable) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticPeerUnavailable", err)
|
||||
}
|
||||
}
|
||||
|
||||
type liveSyntheticNode struct {
|
||||
Local PeerIdentity
|
||||
Runtime *SyntheticRuntime
|
||||
URL string
|
||||
server *httptest.Server
|
||||
}
|
||||
|
||||
func newLiveSyntheticNode(t *testing.T, local PeerIdentity) *liveSyntheticNode {
|
||||
t.Helper()
|
||||
node := &liveSyntheticNode{Local: local}
|
||||
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
Server{Local: node.Local, SyntheticRuntime: node.Runtime}.Handler().ServeHTTP(w, r)
|
||||
}))
|
||||
node.URL = node.server.URL
|
||||
return node
|
||||
}
|
||||
|
||||
func (n *liveSyntheticNode) Close() {
|
||||
if n.server != nil {
|
||||
n.server.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func newLiveRuntime(local PeerIdentity, routes []SyntheticRoute, peers map[string]string) *SyntheticRuntime {
|
||||
return NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: local,
|
||||
Routes: routes,
|
||||
Transport: NewHTTPPeerTransport(peers),
|
||||
})
|
||||
}
|
||||
|
||||
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: hops,
|
||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
RouteVersion: "route-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
}
|
||||
}
|
||||
|
||||
func sameStrings(left, right []string) bool {
|
||||
if len(left) != len(right) {
|
||||
return false
|
||||
}
|
||||
for i := range left {
|
||||
if left[i] != right[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -53,9 +54,11 @@ type PeerCacheEntry struct {
|
||||
BestReachability string `json:"best_reachability,omitempty"`
|
||||
BestConnectivity string `json:"best_connectivity,omitempty"`
|
||||
BestNATType string `json:"best_nat_type,omitempty"`
|
||||
BestRegion string `json:"best_region,omitempty"`
|
||||
BestPolicyTags []string `json:"best_policy_tags,omitempty"`
|
||||
BestCandidateScore int `json:"best_candidate_score,omitempty"`
|
||||
BestScoreReasons []string `json:"best_score_reasons,omitempty"`
|
||||
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
|
||||
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
@@ -132,9 +135,11 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
entry.BestReachability = scored[0].Candidate.Reachability
|
||||
entry.BestConnectivity = scored[0].Candidate.ConnectivityMode
|
||||
entry.BestNATType = scored[0].Candidate.NATType
|
||||
entry.BestRegion = scored[0].Candidate.Region
|
||||
entry.BestPolicyTags = append([]string{}, scored[0].Candidate.PolicyTags...)
|
||||
entry.BestCandidateScore = scored[0].Score
|
||||
entry.BestScoreReasons = append([]string{}, scored[0].Reasons...)
|
||||
entry.BestPeerCertSHA256 = candidatePeerCertSHA256(scored[0].Candidate)
|
||||
entry.bestScore = scored[0].Score
|
||||
if strings.TrimSpace(scored[0].Candidate.Address) != "" {
|
||||
entry.Endpoint = strings.TrimSpace(scored[0].Candidate.Address)
|
||||
@@ -188,6 +193,7 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
if lease.PeerNodeID != cfg.Local.NodeID {
|
||||
entry := peerCacheEntry(entries, lease.PeerNodeID)
|
||||
useLeaseEndpoint := shouldUseRendezvousEndpoint(*entry)
|
||||
localRelay := lease.RelayNodeID == cfg.Local.NodeID
|
||||
entry.RendezvousLeaseID = lease.LeaseID
|
||||
entry.RelayNodeID = lease.RelayNodeID
|
||||
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
@@ -195,12 +201,21 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
|
||||
if useLeaseEndpoint {
|
||||
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_control")
|
||||
if localRelay {
|
||||
entry.BestTransport = "reverse_quic"
|
||||
} else {
|
||||
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_quic")
|
||||
}
|
||||
entry.BestReachability = "relay"
|
||||
entry.BestConnectivity = firstNonEmpty(lease.ConnectivityMode, "relay_required")
|
||||
entry.Endpoint = entry.RelayEndpoint
|
||||
entry.BestCandidateID = lease.LeaseID
|
||||
entry.BestCandidateAddr = entry.RelayEndpoint
|
||||
if !localRelay {
|
||||
entry.Endpoint = entry.RelayEndpoint
|
||||
entry.BestCandidateID = lease.LeaseID
|
||||
entry.BestCandidateAddr = entry.RelayEndpoint
|
||||
entry.BestPeerCertSHA256 = rendezvousLeasePeerCertSHA256(lease)
|
||||
} else if strings.TrimSpace(entry.Endpoint) == "" {
|
||||
entry.Endpoint = firstNonEmpty(entry.BestCandidateAddr, entry.RelayEndpoint)
|
||||
}
|
||||
entry.bestScore = maxInt(entry.bestScore, 500)
|
||||
}
|
||||
}
|
||||
@@ -262,6 +277,20 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
}}
|
||||
}
|
||||
|
||||
func rendezvousLeasePeerCertSHA256(lease PeerRendezvousLease) string {
|
||||
var metadata struct {
|
||||
PeerCertSHA256 string `json:"peer_cert_sha256,omitempty"`
|
||||
TLSCertSHA256 string `json:"tls_cert_sha256,omitempty"`
|
||||
}
|
||||
if len(lease.Metadata) == 0 {
|
||||
return ""
|
||||
}
|
||||
if err := json.Unmarshal(lease.Metadata, &metadata); err != nil {
|
||||
return ""
|
||||
}
|
||||
return firstNonEmpty(strings.TrimSpace(metadata.PeerCertSHA256), strings.TrimSpace(metadata.TLSCertSHA256))
|
||||
}
|
||||
|
||||
func (c *PeerCache) Snapshot() PeerCacheSnapshot {
|
||||
if c == nil {
|
||||
return PeerCacheSnapshot{}
|
||||
|
||||
@@ -10,15 +10,15 @@ func TestPeerCacheSelectsAdjacentWarmPeersWithinLimit(t *testing.T) {
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-a": "http://node-a:19000",
|
||||
"node-r": "http://node-r:19000",
|
||||
"node-c": "http://node-c:19000",
|
||||
"node-a": "quic://node-a:19443",
|
||||
"node-r": "quic://node-r:19443",
|
||||
"node-c": "quic://node-c:19443",
|
||||
},
|
||||
Routes: []SyntheticRoute{
|
||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"}),
|
||||
},
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-seed", Endpoint: "https://seed.example.test", Transport: "direct_tcp_tls", Priority: 10},
|
||||
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", Priority: 10},
|
||||
},
|
||||
WarmPeerLimit: 2,
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
@@ -42,7 +42,7 @@ func TestPeerCachePromotesRecoverySeedAfterRoutePeers(t *testing.T) {
|
||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r"}),
|
||||
},
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-seed", Endpoint: "wss://seed.example.test/mesh", Transport: "wss", ConnectivityMode: "direct", Priority: 1},
|
||||
{NodeID: "node-seed", Endpoint: "quic://seed.example.test:19443", Transport: "direct_quic", ConnectivityMode: "direct", Priority: 1},
|
||||
},
|
||||
WarmPeerLimit: 3,
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
@@ -68,7 +68,7 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Transport: "relay_quic",
|
||||
Address: "relay.example.test",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
@@ -77,8 +77,8 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -119,10 +119,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
EndpointID: "node-b-ice",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "https://node-b.example.test:443",
|
||||
Transport: "ice_quic",
|
||||
Address: "quic://node-b.example.test:19444",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -148,10 +148,10 @@ func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("node-b missing from cache")
|
||||
}
|
||||
if entry.BestCandidateID != "node-b-wss" || entry.Endpoint != "https://node-b.example.test:443" {
|
||||
if entry.BestCandidateID != "node-b-ice" || entry.Endpoint != "quic://node-b.example.test:19444" {
|
||||
t.Fatalf("peer cache did not apply endpoint observations: %+v", entry)
|
||||
}
|
||||
if !containsString(entry.BestScoreReasons, "transport:wss") {
|
||||
if !containsString(entry.BestScoreReasons, "transport:ice_quic") {
|
||||
t.Fatalf("peer cache did not expose score reasons: %+v", entry.BestScoreReasons)
|
||||
}
|
||||
}
|
||||
@@ -161,15 +161,15 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "https://node-b.public.example.test:443",
|
||||
"node-b": "quic://node-b.public.example.test:19443",
|
||||
},
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "https://node-b.public.example.test:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://node-b.public.example.test:19443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -179,8 +179,8 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-corp-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "http://10.24.10.20:19001",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://10.24.10.20:19443",
|
||||
Reachability: "private",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -199,7 +199,7 @@ func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("node-b missing from peer cache")
|
||||
}
|
||||
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "http://10.24.10.20:19001" {
|
||||
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "quic://10.24.10.20:19443" {
|
||||
t.Fatalf("peer cache did not choose corp LAN endpoint: %+v", entry)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ type PeerConnectionIntentPlanConfig struct {
|
||||
PeerCache PeerCacheSnapshot
|
||||
RecoveryPlan PeerRecoveryPlan
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
@@ -62,12 +63,14 @@ type PeerConnectionIntent struct {
|
||||
Reachability string `json:"reachability,omitempty"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
NATType string `json:"nat_type,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
PolicyTags []string `json:"policy_tags,omitempty"`
|
||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||
DirectCandidate bool `json:"direct_candidate"`
|
||||
RelayCandidate bool `json:"relay_candidate"`
|
||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
@@ -94,33 +97,35 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
|
||||
}
|
||||
entry := entryByNode[candidate.NodeID]
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: candidate.NodeID,
|
||||
Action: connectionIntentAction(candidate),
|
||||
Reason: candidate.Reason,
|
||||
Endpoint: candidate.Endpoint,
|
||||
ConnectionState: candidate.ConnectionState,
|
||||
Transport: firstNonEmpty(candidate.BestTransport, entry.BestTransport),
|
||||
Reachability: entry.BestReachability,
|
||||
ConnectivityMode: entry.BestConnectivity,
|
||||
NATType: entry.BestNATType,
|
||||
PolicyTags: append([]string{}, entry.BestPolicyTags...),
|
||||
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
|
||||
RendezvousLeaseID: entry.RendezvousLeaseID,
|
||||
RelayNodeID: entry.RelayNodeID,
|
||||
RelayEndpoint: entry.RelayEndpoint,
|
||||
RelayCandidate: entry.RelayControl,
|
||||
ControlPlaneOnly: entry.RelayControl,
|
||||
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
|
||||
Priority: candidate.Priority,
|
||||
GeneratedAt: now,
|
||||
NodeID: candidate.NodeID,
|
||||
Action: connectionIntentAction(candidate),
|
||||
Reason: candidate.Reason,
|
||||
Endpoint: candidate.Endpoint,
|
||||
ConnectionState: candidate.ConnectionState,
|
||||
Transport: firstNonEmpty(candidate.BestTransport, entry.BestTransport),
|
||||
Reachability: entry.BestReachability,
|
||||
ConnectivityMode: entry.BestConnectivity,
|
||||
NATType: entry.BestNATType,
|
||||
Region: entry.BestRegion,
|
||||
PolicyTags: append([]string{}, entry.BestPolicyTags...),
|
||||
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
|
||||
BestPeerCertSHA256: entry.BestPeerCertSHA256,
|
||||
RendezvousLeaseID: entry.RendezvousLeaseID,
|
||||
RelayNodeID: entry.RelayNodeID,
|
||||
RelayEndpoint: entry.RelayEndpoint,
|
||||
RelayCandidate: entry.RelayControl,
|
||||
ControlPlaneOnly: entry.RelayControl,
|
||||
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
|
||||
Priority: candidate.Priority,
|
||||
GeneratedAt: now,
|
||||
}
|
||||
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent)
|
||||
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent, cfg.PreferredRegion)
|
||||
intent.TransportMode = mode
|
||||
intent.RequiresRendezvous = requiresRendezvous
|
||||
intent.DirectCandidate = directCandidate
|
||||
if intent.RequiresRendezvous {
|
||||
if lease, ok := rendezvousLeaseForPeer(cfg.RendezvousLeases, intent.NodeID, now); ok {
|
||||
applyRendezvousLease(&intent, lease)
|
||||
applyRendezvousLease(&intent, lease, cfg.PeerCache.LocalNodeID)
|
||||
}
|
||||
}
|
||||
intents = append(intents, intent)
|
||||
@@ -185,10 +190,12 @@ func connectionIntentAction(candidate PeerRecoveryCandidate) string {
|
||||
}
|
||||
}
|
||||
|
||||
func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
|
||||
func classifyPeerTransport(intent PeerConnectionIntent, preferredRegion string) (string, bool, bool) {
|
||||
transport := strings.ToLower(strings.TrimSpace(intent.Transport))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(intent.ConnectivityMode))
|
||||
reachability := strings.ToLower(strings.TrimSpace(intent.Reachability))
|
||||
region := strings.TrimSpace(intent.Region)
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
tags := lowerStringSet(intent.PolicyTags)
|
||||
|
||||
if strings.Contains(transport, "relay") || connectivity == "relay_required" || reachability == "relay" {
|
||||
@@ -201,6 +208,9 @@ func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
|
||||
return PeerTransportModeCorporateLAN, false, true
|
||||
}
|
||||
if tags["private-lan"] || reachability == "private" || endpointHasPrivateHost(intent.Endpoint) {
|
||||
if preferredRegion != "" && region != "" && !strings.EqualFold(region, preferredRegion) {
|
||||
return PeerTransportModeRelayRequired, true, false
|
||||
}
|
||||
return PeerTransportModePrivateLAN, false, true
|
||||
}
|
||||
if strings.Contains(transport, "direct") || reachability == "public" || connectivity == "direct" {
|
||||
@@ -246,9 +256,16 @@ func rendezvousLeaseForPeer(leases []PeerRendezvousLease, peerNodeID string, now
|
||||
return candidates[0], true
|
||||
}
|
||||
|
||||
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease) {
|
||||
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
intent.Transport = firstNonEmpty(lease.Transport, "relay_control")
|
||||
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease, localNodeID string) {
|
||||
localRelay := strings.TrimSpace(lease.RelayNodeID) == strings.TrimSpace(localNodeID)
|
||||
if !localRelay {
|
||||
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
}
|
||||
if localRelay {
|
||||
intent.Transport = "reverse_quic"
|
||||
} else {
|
||||
intent.Transport = firstNonEmpty(lease.Transport, "relay_quic")
|
||||
}
|
||||
intent.TransportMode = PeerTransportModeRelayControl
|
||||
intent.RequiresRendezvous = false
|
||||
intent.RendezvousResolved = true
|
||||
@@ -256,17 +273,33 @@ func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLeas
|
||||
intent.RelayCandidate = true
|
||||
intent.RendezvousLeaseID = lease.LeaseID
|
||||
intent.RelayNodeID = lease.RelayNodeID
|
||||
intent.RelayEndpoint = intent.Endpoint
|
||||
intent.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
intent.ControlPlaneOnly = true
|
||||
if certSHA256 := rendezvousLeasePeerCertSHA256(lease); certSHA256 != "" && !localRelay {
|
||||
intent.BestPeerCertSHA256 = certSHA256
|
||||
}
|
||||
if lease.ConnectivityMode != "" {
|
||||
intent.ConnectivityMode = lease.ConnectivityMode
|
||||
}
|
||||
}
|
||||
|
||||
func endpointHasPrivateHost(rawEndpoint string) bool {
|
||||
addr, ok := endpointHostAddr(rawEndpoint)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
|
||||
}
|
||||
|
||||
func endpointHasUnspecifiedHost(rawEndpoint string) bool {
|
||||
addr, ok := endpointHostAddr(rawEndpoint)
|
||||
return ok && addr.IsUnspecified()
|
||||
}
|
||||
|
||||
func endpointHostAddr(rawEndpoint string) (netip.Addr, bool) {
|
||||
rawEndpoint = strings.TrimSpace(rawEndpoint)
|
||||
if rawEndpoint == "" {
|
||||
return false
|
||||
return netip.Addr{}, false
|
||||
}
|
||||
host := rawEndpoint
|
||||
if parsed, err := url.Parse(rawEndpoint); err == nil && parsed.Host != "" {
|
||||
@@ -277,9 +310,9 @@ func endpointHasPrivateHost(rawEndpoint string) bool {
|
||||
}
|
||||
addr, err := netip.ParseAddr(strings.Trim(host, "[]"))
|
||||
if err != nil {
|
||||
return false
|
||||
return netip.Addr{}, false
|
||||
}
|
||||
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
|
||||
return addr, true
|
||||
}
|
||||
|
||||
func lowerStringSet(values []string) map[string]bool {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -11,8 +12,8 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://10.24.10.20:19001",
|
||||
BestTransport: "direct_tcp_tls",
|
||||
Endpoint: "quic://10.24.10.20:19443",
|
||||
BestTransport: "lan_quic",
|
||||
BestReachability: "private",
|
||||
BestConnectivity: "direct",
|
||||
BestPolicyTags: []string{"corp-lan", "same-site"},
|
||||
@@ -23,7 +24,7 @@ func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://10.24.10.20:19001",
|
||||
Endpoint: "quic://10.24.10.20:19443",
|
||||
ConnectionState: PeerConnectionReady,
|
||||
Reason: "maintain_ready",
|
||||
Priority: 100,
|
||||
@@ -48,15 +49,15 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
BestTransport: "direct_tcp_tls",
|
||||
Endpoint: "quic://node-b.example.test:19443",
|
||||
BestTransport: "reverse_quic",
|
||||
BestReachability: "outbound_only",
|
||||
BestConnectivity: "outbound_only",
|
||||
},
|
||||
{
|
||||
NodeID: "node-c",
|
||||
Endpoint: "relay://fabric-relay/node-c",
|
||||
BestTransport: "relay",
|
||||
BestTransport: "relay_quic",
|
||||
BestReachability: "relay",
|
||||
BestConnectivity: "relay_required",
|
||||
},
|
||||
@@ -66,7 +67,7 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
Endpoint: "quic://node-b.example.test:19443",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_warm",
|
||||
Priority: 90,
|
||||
@@ -91,6 +92,42 @@ func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *te
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsRequireRendezvousForRemotePrivateRegion(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PreferredRegion: "ifcm",
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "quic://192.168.200.61:19132",
|
||||
BestTransport: "direct_quic",
|
||||
BestReachability: "private",
|
||||
BestConnectivity: "private_lan",
|
||||
BestRegion: "docker-test",
|
||||
},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{
|
||||
Mode: PeerRecoveryModeRecovery,
|
||||
Candidates: []PeerRecoveryCandidate{{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "quic://192.168.200.61:19132",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_warm",
|
||||
Priority: 100,
|
||||
}},
|
||||
},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.IntentCount != 1 || plan.RelayRequiredCount != 1 || plan.RendezvousRequiredCount != 1 {
|
||||
t.Fatalf("unexpected remote private plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.DirectCandidate || !intent.RequiresRendezvous || intent.TransportMode != PeerTransportModeRelayRequired {
|
||||
t.Fatalf("unexpected remote private intent: %+v", intent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
@@ -120,13 +157,14 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://node-r:19443",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
Metadata: peerConnectionIntentLeaseMetadata(t, "abc123"),
|
||||
},
|
||||
},
|
||||
Now: now,
|
||||
@@ -137,9 +175,10 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.TransportMode != PeerTransportModeRelayControl ||
|
||||
intent.Endpoint != "http://node-r:19000" ||
|
||||
intent.Endpoint != "quic://node-r:19443" ||
|
||||
intent.RelayNodeID != "node-r" ||
|
||||
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
|
||||
intent.BestPeerCertSHA256 != "abc123" ||
|
||||
!intent.RelayCandidate ||
|
||||
!intent.RendezvousResolved ||
|
||||
intent.RequiresRendezvous {
|
||||
@@ -176,8 +215,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
||||
LeaseID: "lease-expired-preferred",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r-old",
|
||||
RelayEndpoint: "http://node-r-old:19000",
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://node-r-old:19443",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 1,
|
||||
ControlPlaneOnly: true,
|
||||
@@ -188,8 +227,8 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
||||
LeaseID: "lease-active-reselected",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r-new",
|
||||
RelayEndpoint: "http://node-r-new:19000",
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://node-r-new:19443",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 20,
|
||||
ControlPlaneOnly: true,
|
||||
@@ -206,20 +245,29 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
||||
intent := plan.Intents[0]
|
||||
if intent.RendezvousLeaseID != "lease-active-reselected" ||
|
||||
intent.RelayNodeID != "node-r-new" ||
|
||||
intent.Endpoint != "http://node-r-new:19000" {
|
||||
intent.Endpoint != "quic://node-r-new:19443" {
|
||||
t.Fatalf("expired lease was not skipped: %+v", intent)
|
||||
}
|
||||
}
|
||||
|
||||
func peerConnectionIntentLeaseMetadata(t *testing.T, certSHA256 string) json.RawMessage {
|
||||
t.Helper()
|
||||
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal metadata: %v", err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsClassifyPrivateEndpointWithoutCandidateHints(t *testing.T) {
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-b", Endpoint: "http://192.168.10.20:19001"},
|
||||
{NodeID: "node-b", Endpoint: "quic://192.168.10.20:19443"},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://192.168.10.20:19001",
|
||||
Endpoint: "quic://192.168.10.20:19443",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_peer",
|
||||
Priority: 10,
|
||||
|
||||
@@ -2,6 +2,7 @@ package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -25,6 +26,8 @@ type PeerConnectionManagerConfig struct {
|
||||
Tracker *PeerConnectionTracker
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
HTTPClient *http.Client
|
||||
QUICTransport *QUICFabricTransport
|
||||
PreferredRegion string
|
||||
ProbeTimeout time.Duration
|
||||
Now func() time.Time
|
||||
}
|
||||
@@ -35,6 +38,8 @@ type PeerConnectionManager struct {
|
||||
tracker *PeerConnectionTracker
|
||||
rendezvousLeases []PeerRendezvousLease
|
||||
httpClient *http.Client
|
||||
quicTransport *QUICFabricTransport
|
||||
preferredRegion string
|
||||
probeTimeout time.Duration
|
||||
now func() time.Time
|
||||
|
||||
@@ -101,9 +106,10 @@ type PeerConnectionCandidateProbeResult struct {
|
||||
}
|
||||
|
||||
type peerConnectionProbeTarget struct {
|
||||
CandidateID string
|
||||
Endpoint string
|
||||
Transport string
|
||||
CandidateID string
|
||||
Endpoint string
|
||||
Transport string
|
||||
PeerCertSHA256 string
|
||||
}
|
||||
|
||||
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
|
||||
@@ -132,6 +138,8 @@ func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionMa
|
||||
tracker: cfg.Tracker,
|
||||
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
|
||||
httpClient: httpClient,
|
||||
quicTransport: cfg.QUICTransport,
|
||||
preferredRegion: strings.TrimSpace(cfg.PreferredRegion),
|
||||
probeTimeout: probeTimeout,
|
||||
now: now,
|
||||
}
|
||||
@@ -155,6 +163,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
||||
PeerCache: peerSnapshot,
|
||||
RecoveryPlan: recoveryPlan,
|
||||
RendezvousLeases: rendezvousLeases,
|
||||
PreferredRegion: m.preferredRegion,
|
||||
Now: startedAt,
|
||||
})
|
||||
entriesByNode := map[string]PeerCacheEntry{}
|
||||
@@ -215,6 +224,15 @@ func (m *PeerConnectionManager) UpdatePeerConfig(peerCache *PeerCache, rendezvou
|
||||
m.rendezvousLeases = append([]PeerRendezvousLease{}, rendezvousLeases...)
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) UpdateQUICTransport(transport *QUICFabricTransport) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.quicTransport = transport
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvousLease) {
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
@@ -242,17 +260,18 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
StartedAt: startedAt,
|
||||
}
|
||||
peer := PeerCacheEntry{
|
||||
NodeID: intent.NodeID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Warm: true,
|
||||
WarmReason: intent.Reason,
|
||||
RecoverySeed: intent.RecoverySeed,
|
||||
BestCandidateID: intent.BestCandidateID,
|
||||
BestTransport: intent.Transport,
|
||||
RendezvousLeaseID: intent.RendezvousLeaseID,
|
||||
RelayNodeID: intent.RelayNodeID,
|
||||
RelayEndpoint: intent.RelayEndpoint,
|
||||
RelayControl: intent.RelayCandidate,
|
||||
NodeID: intent.NodeID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Warm: true,
|
||||
WarmReason: intent.Reason,
|
||||
RecoverySeed: intent.RecoverySeed,
|
||||
BestCandidateID: intent.BestCandidateID,
|
||||
BestTransport: intent.Transport,
|
||||
RendezvousLeaseID: intent.RendezvousLeaseID,
|
||||
RelayNodeID: intent.RelayNodeID,
|
||||
RelayEndpoint: intent.RelayEndpoint,
|
||||
RelayControl: intent.RelayCandidate,
|
||||
BestPeerCertSHA256: firstNonEmpty(intent.BestPeerCertSHA256, cacheEntry.BestPeerCertSHA256),
|
||||
}
|
||||
if intent.RequiresRendezvous {
|
||||
result.LinkStatus = PeerConnectionProbeDeferred
|
||||
@@ -282,13 +301,12 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
ClusterID: m.local.ClusterID,
|
||||
NodeID: intent.NodeID,
|
||||
}
|
||||
if intent.RelayCandidate && intent.RelayNodeID != "" {
|
||||
target.NodeID = intent.RelayNodeID
|
||||
}
|
||||
target.NodeID = peerConnectionProbeTargetNodeID(intent, m.local.NodeID)
|
||||
targets := []peerConnectionProbeTarget{{
|
||||
CandidateID: intent.BestCandidateID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Transport: intent.Transport,
|
||||
CandidateID: intent.BestCandidateID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Transport: intent.Transport,
|
||||
PeerCertSHA256: intent.BestPeerCertSHA256,
|
||||
}}
|
||||
if intent.DirectCandidate {
|
||||
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
||||
@@ -300,13 +318,14 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
probePeer.BestCandidateID = strings.TrimSpace(probeTarget.CandidateID)
|
||||
probePeer.BestCandidateAddr = probePeer.Endpoint
|
||||
probePeer.BestTransport = strings.TrimSpace(probeTarget.Transport)
|
||||
probePeer.BestPeerCertSHA256 = firstNonEmpty(probeTarget.PeerCertSHA256, probePeer.BestPeerCertSHA256)
|
||||
if probePeer.Endpoint == "" {
|
||||
continue
|
||||
}
|
||||
candidateStartedAt := normalizedNow(m.now())
|
||||
m.tracker.BeginProbe(probePeer, candidateStartedAt)
|
||||
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
||||
_, err := NewClient(probePeer.Endpoint).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
|
||||
err := m.probePeerTarget(probeCtx, probePeer, target)
|
||||
cancel()
|
||||
completedAt := normalizedNow(m.now())
|
||||
candidateResult := PeerConnectionCandidateProbeResult{
|
||||
@@ -354,47 +373,97 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
return result
|
||||
}
|
||||
|
||||
func peerConnectionProbeTargetNodeID(intent PeerConnectionIntent, localNodeID string) string {
|
||||
if intent.RelayCandidate && strings.TrimSpace(intent.RelayNodeID) != "" && strings.TrimSpace(intent.RelayNodeID) != strings.TrimSpace(localNodeID) {
|
||||
return intent.RelayNodeID
|
||||
}
|
||||
return intent.NodeID
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer PeerCacheEntry, target PeerIdentity) error {
|
||||
endpoint := strings.TrimRight(strings.TrimSpace(probePeer.Endpoint), "/")
|
||||
transport := strings.TrimSpace(probePeer.BestTransport)
|
||||
if hasLegacyEndpointScheme(endpoint) {
|
||||
return fmt.Errorf("non_quic_probe_rejected")
|
||||
}
|
||||
if peerConnectionTargetIsQUIC(transport, endpoint) {
|
||||
carrier, selectedTarget, err := FabricTransportForTarget(FabricTransportTarget{
|
||||
EndpointID: probePeer.BestCandidateID,
|
||||
PeerID: target.NodeID,
|
||||
Endpoint: endpoint,
|
||||
Transport: transport,
|
||||
Timeout: m.probeTimeout,
|
||||
PeerCertSHA256: strings.TrimSpace(probePeer.BestPeerCertSHA256),
|
||||
}, m.quicTransport)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
session, err := carrier.Connect(ctx, selectedTarget)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return session.Close()
|
||||
}
|
||||
return fmt.Errorf("non_quic_probe_rejected")
|
||||
}
|
||||
|
||||
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
||||
add := func(candidateID, endpoint, transport string) {
|
||||
add := func(candidateID, endpoint, transport, peerCertSHA256 string) {
|
||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||
if endpoint == "" {
|
||||
return
|
||||
}
|
||||
if endpointHasUnspecifiedHost(endpoint) {
|
||||
return
|
||||
}
|
||||
key := candidateID + "|" + endpoint
|
||||
if _, ok := seen[key]; ok {
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, peerConnectionProbeTarget{
|
||||
CandidateID: strings.TrimSpace(candidateID),
|
||||
Endpoint: endpoint,
|
||||
Transport: strings.TrimSpace(transport),
|
||||
CandidateID: strings.TrimSpace(candidateID),
|
||||
Endpoint: endpoint,
|
||||
Transport: strings.TrimSpace(transport),
|
||||
PeerCertSHA256: strings.TrimSpace(peerCertSHA256),
|
||||
})
|
||||
}
|
||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||
if !candidateUsableForDirectProbe(candidate) {
|
||||
continue
|
||||
}
|
||||
add(candidate.EndpointID, candidate.Address, candidate.Transport)
|
||||
add(candidate.EndpointID, candidate.Address, candidate.Transport, candidatePeerCertSHA256(candidate))
|
||||
}
|
||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport)
|
||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport, cacheEntry.BestPeerCertSHA256)
|
||||
return out
|
||||
}
|
||||
|
||||
func peerConnectionTargetIsQUIC(transport string, endpoint string) bool {
|
||||
return isQUICOnlyCandidateTransport(transport) || strings.HasPrefix(strings.ToLower(strings.TrimSpace(endpoint)), "quic://")
|
||||
}
|
||||
|
||||
func candidateUsableForDirectProbe(candidate PeerEndpointCandidate) bool {
|
||||
endpoint := strings.TrimSpace(candidate.Address)
|
||||
if endpoint == "" || strings.HasPrefix(endpoint, "relay://") || strings.HasPrefix(endpoint, "outbound://") {
|
||||
return false
|
||||
}
|
||||
if endpointHasUnspecifiedHost(endpoint) {
|
||||
return false
|
||||
}
|
||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
||||
if connectivity == "outbound_only" || connectivity == "relay_required" || reachability == "outbound_only" || reachability == "relay" {
|
||||
return false
|
||||
}
|
||||
return transport == "" || strings.Contains(transport, "direct") || transport == "wss" || strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://")
|
||||
return transport == "" ||
|
||||
strings.Contains(transport, "direct_quic") ||
|
||||
transport == "quic" ||
|
||||
transport == "lan_quic" ||
|
||||
transport == "ice_quic" ||
|
||||
strings.HasPrefix(endpoint, "quic://")
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
|
||||
|
||||
@@ -2,8 +2,8 @@ package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -11,12 +11,18 @@ import (
|
||||
func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
||||
}.Handler())
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
@@ -24,19 +30,20 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: server.URL,
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
PolicyTags: []string{"corp-lan", "same-site"},
|
||||
Priority: 1,
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
},
|
||||
"node-c": {
|
||||
{
|
||||
EndpointID: "node-c-relay",
|
||||
NodeID: "node-c",
|
||||
Transport: "relay",
|
||||
Transport: "relay_quic",
|
||||
Address: "relay://fabric/node-c",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
@@ -49,10 +56,11 @@ func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
ProbeTimeout: time.Second,
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
@@ -116,24 +124,31 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
|
||||
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
|
||||
}.Handler())
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
leases := []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: server.URL,
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://" + server.Addr().String(),
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
@@ -143,7 +158,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Transport: "relay_quic",
|
||||
Address: "relay://fabric/node-b",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
@@ -161,6 +176,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
RendezvousLeases: leases,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
@@ -189,15 +205,37 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetKeepsPeerForLocalRelayReverseQUIC(t *testing.T) {
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: "node-b",
|
||||
RelayCandidate: true,
|
||||
RelayNodeID: "node-a",
|
||||
Transport: "reverse_quic",
|
||||
}
|
||||
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-b" {
|
||||
t.Fatalf("local relay reverse probe target = %q, want peer node-b", got)
|
||||
}
|
||||
intent.RelayNodeID = "node-r"
|
||||
if got := peerConnectionProbeTargetNodeID(intent, "node-a"); got != "node-r" {
|
||||
t.Fatalf("remote relay probe target = %q, want relay node-r", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
||||
}.Handler())
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
@@ -205,8 +243,8 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-dead",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_http",
|
||||
Address: "http://127.0.0.1:1",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://127.0.0.1:1",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "private_lan",
|
||||
Priority: 1,
|
||||
@@ -214,11 +252,12 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
{
|
||||
EndpointID: "node-b-live",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_http",
|
||||
Address: server.URL,
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "private_lan",
|
||||
Priority: 2,
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -227,11 +266,11 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
HTTPClient: &http.Client{Timeout: 100 * time.Millisecond},
|
||||
ProbeTimeout: 100 * time.Millisecond,
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: 100 * time.Millisecond,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
@@ -243,7 +282,7 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||
}
|
||||
result := cycle.Results[0]
|
||||
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != server.URL {
|
||||
if result.LinkStatus != PeerConnectionProbeReachable || result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||
t.Fatalf("fallback did not select live candidate: %+v", result)
|
||||
}
|
||||
if len(result.CandidateResults) != 2 ||
|
||||
@@ -252,7 +291,85 @@ func TestPeerConnectionManagerFallsBackAcrossEndpointCandidates(t *testing.T) {
|
||||
t.Fatalf("candidate probe trail mismatch: %+v", result.CandidateResults)
|
||||
}
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != server.URL {
|
||||
if snapshot.Ready != 1 || len(snapshot.Entries) != 1 || snapshot.Entries[0].BestCandidateID != "node-b-live" || snapshot.Entries[0].Endpoint != "quic://"+server.Addr().String() {
|
||||
t.Fatalf("tracker did not retain selected candidate: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerSkipsUnspecifiedQUICCandidates(t *testing.T) {
|
||||
now := time.Date(2026, 5, 17, 6, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: tlsConfig,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
certSHA256 := testQUICCertSHA256(t, tlsConfig)
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-unspecified-v6",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://[::]:19131",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-live",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://" + server.Addr().String(),
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 2,
|
||||
Metadata: peerConnectionProbeMetadata(t, certSHA256),
|
||||
},
|
||||
},
|
||||
},
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
QUICTransport: NewQUICFabricTransport(nil),
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 1 || cycle.Succeeded != 1 || len(cycle.Results) != 1 {
|
||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||
}
|
||||
result := cycle.Results[0]
|
||||
if result.SelectedCandidateID != "node-b-live" || result.SelectedEndpoint != "quic://"+server.Addr().String() {
|
||||
t.Fatalf("manager did not skip unspecified endpoint: %+v", result)
|
||||
}
|
||||
if len(result.CandidateResults) != 1 || result.CandidateResults[0].CandidateID != "node-b-live" {
|
||||
t.Fatalf("unspecified endpoint should not be probed: %+v", result.CandidateResults)
|
||||
}
|
||||
}
|
||||
|
||||
func peerConnectionProbeMetadata(t *testing.T, certSHA256 string) json.RawMessage {
|
||||
t.Helper()
|
||||
payload, err := json.Marshal(map[string]string{"peer_cert_sha256": certSHA256})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal probe metadata: %v", err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ func TestPeerConnectionTrackerTransitionsReadyAndDegraded(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "http://node-b:19000"},
|
||||
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "quic://node-b:19443"},
|
||||
},
|
||||
}, now)
|
||||
|
||||
|
||||
@@ -76,12 +76,12 @@ func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-c",
|
||||
Endpoint: "http://relay:19001",
|
||||
Endpoint: "quic://relay:19443",
|
||||
Warm: true,
|
||||
WarmReason: "rendezvous_lease",
|
||||
RendezvousLeaseID: "lease-1",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://relay:19001",
|
||||
RelayEndpoint: "quic://relay:19443",
|
||||
RelayControl: true,
|
||||
},
|
||||
},
|
||||
@@ -121,7 +121,7 @@ func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
||||
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
|
||||
return PeerCacheEntry{
|
||||
NodeID: nodeID,
|
||||
Endpoint: "http://" + nodeID + ":19001",
|
||||
Endpoint: "quic://" + nodeID + ":19443",
|
||||
Warm: warm,
|
||||
WarmReason: warmReason,
|
||||
RecoverySeed: recoverySeed,
|
||||
|
||||
@@ -2,42 +2,369 @@ package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
type ProductionForwardTransport interface {
|
||||
SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error)
|
||||
}
|
||||
|
||||
type HTTPProductionForwardTransport struct {
|
||||
PeerURLs map[string]string
|
||||
HTTPClient *http.Client
|
||||
type QUICProductionForwardTransport struct {
|
||||
Targets map[string]FabricTransportTarget
|
||||
RouteSets map[string]FabricRouteSet
|
||||
Transport FabricTransport
|
||||
Router FabricChannelRouter
|
||||
Timeout time.Duration
|
||||
Pressure *FabricRoutePressureTracker
|
||||
Health *FabricRouteHealthTracker
|
||||
sequence atomic.Uint64
|
||||
}
|
||||
|
||||
func NewHTTPProductionForwardTransport(peerURLs map[string]string) *HTTPProductionForwardTransport {
|
||||
normalized := make(map[string]string, len(peerURLs))
|
||||
for nodeID, baseURL := range peerURLs {
|
||||
type QUICProductionForwardTransportSnapshot struct {
|
||||
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
|
||||
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
|
||||
}
|
||||
|
||||
func NewQUICProductionForwardTransport(targets map[string]FabricTransportTarget, transport *QUICFabricTransport) *QUICProductionForwardTransport {
|
||||
routeSets := make(map[string]FabricRouteSet, len(targets))
|
||||
for nodeID, target := range targets {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
||||
if nodeID != "" && baseURL != "" {
|
||||
normalized[nodeID] = baseURL
|
||||
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
|
||||
target.Transport = strings.TrimSpace(target.Transport)
|
||||
if nodeID != "" && target.Endpoint != "" {
|
||||
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), nodeID)
|
||||
routeSets[nodeID] = FabricRouteSetForTransportTargets("", "", nodeID, []FabricTransportTarget{target})
|
||||
}
|
||||
}
|
||||
return &HTTPProductionForwardTransport{PeerURLs: normalized}
|
||||
if transport == nil {
|
||||
transport = NewQUICFabricTransport(nil)
|
||||
}
|
||||
return NewQUICProductionForwardTransportFromRouteSets(routeSets, transport)
|
||||
}
|
||||
|
||||
func (t *HTTPProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
if t == nil {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
func NewQUICProductionForwardTransportFromRouteSets(routeSets map[string]FabricRouteSet, transport FabricTransport) *QUICProductionForwardTransport {
|
||||
normalizedRouteSets := make(map[string]FabricRouteSet, len(routeSets))
|
||||
targets := make(map[string]FabricTransportTarget, len(routeSets))
|
||||
for nodeID, routeSet := range routeSets {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
if nodeID == "" {
|
||||
continue
|
||||
}
|
||||
normalizedRouteSets[nodeID] = routeSet
|
||||
if target, err := FabricTransportTargetForRoute(routeSet.Primary); err == nil {
|
||||
targets[nodeID] = target
|
||||
}
|
||||
}
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
||||
if baseURL == "" {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
if transport == nil {
|
||||
transport = NewQUICFabricTransport(nil)
|
||||
}
|
||||
client := NewClient(baseURL)
|
||||
if t.HTTPClient != nil {
|
||||
client.HTTPClient = t.HTTPClient
|
||||
return &QUICProductionForwardTransport{
|
||||
Targets: targets,
|
||||
RouteSets: normalizedRouteSets,
|
||||
Transport: transport,
|
||||
Router: NewFabricChannelRouter(FabricChannelRouterConfig{
|
||||
MaxAckLatencyMs: 2000,
|
||||
MinRerouteInterval: 50 * time.Millisecond,
|
||||
}),
|
||||
Timeout: 30 * time.Second,
|
||||
Pressure: NewFabricRoutePressureTracker(),
|
||||
Health: NewFabricRouteHealthTracker(30 * time.Second),
|
||||
}
|
||||
return client.SendProduction(ctx, envelope)
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
if t == nil || t.Transport == nil {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
}
|
||||
nextNodeID = strings.TrimSpace(nextNodeID)
|
||||
routeSet, ok := t.RouteSets[nextNodeID]
|
||||
if !ok {
|
||||
target, targetOK := t.Targets[nextNodeID]
|
||||
if !targetOK || strings.TrimSpace(target.Endpoint) == "" {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
}
|
||||
routeSet = FabricRouteSetForTransportTargets(envelope.ClusterID, envelope.CurrentHopNodeID, nextNodeID, []FabricTransportTarget{target})
|
||||
}
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: firstNonEmpty(strings.TrimSpace(envelope.MessageID), fmt.Sprintf("production-%d", t.sequence.Add(1))),
|
||||
ClusterID: envelope.ClusterID,
|
||||
SourceNodeID: firstNonEmpty(productionRouteSetSourceNodeID(routeSet), envelope.CurrentHopNodeID),
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: nextNodeID,
|
||||
TrafficClass: FabricServiceChannelReliable,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
result, err := t.sendProductionWithRouteSet(ctx, spec, routeSet, payload)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func productionRouteSetSourceNodeID(routeSet FabricRouteSet) string {
|
||||
for _, route := range flattenFabricRouteSet(routeSet) {
|
||||
if sourceNodeID := strings.TrimSpace(route.SourceNodeID); sourceNodeID != "" {
|
||||
return sourceNodeID
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) sendProductionWithRouteSet(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (ProductionForwardResult, error) {
|
||||
router := t.Router
|
||||
if router.Config.MaxRoutePressure == 0 {
|
||||
router = NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 2000, MinRerouteInterval: 50 * time.Millisecond})
|
||||
}
|
||||
routeSet = t.routeSetForScheduling(routeSet)
|
||||
channel, _, err := router.OpenChannel(spec, routeSet, time.Now().UTC())
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
timeout := t.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
for {
|
||||
routeSet = t.routeSetForScheduling(routeSet)
|
||||
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||
if !ok {
|
||||
return ProductionForwardResult{}, ErrFabricRouteNotFound
|
||||
}
|
||||
target, err := FabricTransportTargetForRoute(route)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), spec.TargetID)
|
||||
target.MaxPayload = fabricproto.DefaultMaxPayload
|
||||
releaseRoute := t.acquireProductionRoute(route.RouteID)
|
||||
session, err := t.Transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
releaseRoute()
|
||||
t.markProductionRouteFailure(route.RouteID, err)
|
||||
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "connect_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return ProductionForwardResult{}, rerouteErr
|
||||
}
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
response, ackMs, err := t.sendProductionOnSession(ctx, session, payload, timeout)
|
||||
_ = session.Close()
|
||||
releaseRoute()
|
||||
if err == nil {
|
||||
t.markProductionRouteSuccess(route.RouteID)
|
||||
_, _, _ = router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
AckLatencyMs: ackMs,
|
||||
BytesSent: uint64(len(payload)),
|
||||
FramesSent: 1,
|
||||
BytesRecv: uint64(len(response.Payload)),
|
||||
FramesRecv: 1,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
return decodeQUICProductionForwardResponse(response.Payload)
|
||||
}
|
||||
t.markProductionRouteFailure(route.RouteID, err)
|
||||
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "response_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return ProductionForwardResult{}, rerouteErr
|
||||
}
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) routeSetWithActiveChannels(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if t == nil || t.Pressure == nil {
|
||||
return routeSet
|
||||
}
|
||||
return t.Pressure.Apply(routeSet)
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if t != nil && t.Health != nil {
|
||||
routeSet = t.Health.Apply(routeSet, time.Now().UTC())
|
||||
}
|
||||
return t.routeSetWithActiveChannels(routeSet)
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) acquireProductionRoute(routeID string) func() {
|
||||
if t == nil || t.Pressure == nil {
|
||||
return func() {}
|
||||
}
|
||||
return t.Pressure.Acquire(routeID)
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) markProductionRouteFailure(routeID string, err error) {
|
||||
if t == nil || t.Health == nil || err == nil {
|
||||
return
|
||||
}
|
||||
t.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) markProductionRouteSuccess(routeID string) {
|
||||
if t == nil || t.Health == nil {
|
||||
return
|
||||
}
|
||||
t.Health.MarkSuccess(routeID)
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) Snapshot() QUICProductionForwardTransportSnapshot {
|
||||
if t == nil {
|
||||
return QUICProductionForwardTransportSnapshot{}
|
||||
}
|
||||
var pressure FabricRoutePressureSnapshot
|
||||
if t.Pressure != nil {
|
||||
pressure = t.Pressure.SnapshotPressure()
|
||||
}
|
||||
var health FabricRouteHealthSnapshot
|
||||
if t.Health != nil {
|
||||
health = t.Health.Snapshot(time.Now().UTC())
|
||||
}
|
||||
return QUICProductionForwardTransportSnapshot{RoutePressure: pressure, RouteHealth: health}
|
||||
}
|
||||
|
||||
func (t *QUICProductionForwardTransport) sendProductionOnSession(ctx context.Context, session FabricTransportSession, payload []byte, timeout time.Duration) (fabricproto.Frame, int64, error) {
|
||||
sequence := t.sequence.Add(1)
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: ProductionForwardQUICStreamID,
|
||||
Sequence: sequence,
|
||||
Payload: payload,
|
||||
}); err != nil {
|
||||
return fabricproto.Frame{}, 0, err
|
||||
}
|
||||
waitCtx := ctx
|
||||
if timeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
waitCtx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
started := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
return fabricproto.Frame{}, 0, waitCtx.Err()
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
|
||||
}
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, 0, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != ProductionForwardQUICStreamID || frame.Sequence != sequence {
|
||||
continue
|
||||
}
|
||||
return frame, time.Since(started).Milliseconds(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func decodeQUICProductionForwardResponse(payload []byte) (ProductionForwardResult, error) {
|
||||
var response quicProductionForwardResponse
|
||||
if err := json.Unmarshal(payload, &response); err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
if strings.TrimSpace(response.Error) != "" {
|
||||
return ProductionForwardResult{}, fmt.Errorf("%w: %s", ErrForwardPeerUnavailable, response.Error)
|
||||
}
|
||||
return response.Result, nil
|
||||
}
|
||||
|
||||
func FabricRouteSetForTransportTargets(clusterID string, sourceNodeID string, targetNodeID string, targets []FabricTransportTarget) FabricRouteSet {
|
||||
routeSet := FabricRouteSet{TargetKind: FabricChannelTargetNode, TargetID: strings.TrimSpace(targetNodeID)}
|
||||
routes := make([]FabricRoute, 0, len(targets))
|
||||
for index, target := range targets {
|
||||
target.Endpoint = strings.TrimRight(strings.TrimSpace(target.Endpoint), "/")
|
||||
if strings.TrimSpace(target.Endpoint) == "" {
|
||||
continue
|
||||
}
|
||||
peerID := firstNonEmpty(strings.TrimSpace(target.PeerID), strings.TrimSpace(targetNodeID))
|
||||
routeID := strings.TrimSpace(target.EndpointID)
|
||||
if routeID == "" {
|
||||
routeID = fmt.Sprintf("%s-quic-%d", peerID, index)
|
||||
}
|
||||
routes = append(routes, FabricRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: strings.TrimSpace(clusterID),
|
||||
SourceNodeID: strings.TrimSpace(sourceNodeID),
|
||||
DestinationNodeID: peerID,
|
||||
Hops: []FabricRouteHop{{
|
||||
NodeID: peerID,
|
||||
Mode: fabricRouteModeForTransportTarget(target),
|
||||
EndpointID: strings.TrimSpace(target.EndpointID),
|
||||
Address: target.Endpoint,
|
||||
PeerCertSHA256: strings.TrimSpace(target.PeerCertSHA256),
|
||||
}},
|
||||
BaseLatencyMs: routeLatencyForIndex(index),
|
||||
Capacity: 100,
|
||||
ActiveChannels: 0,
|
||||
Healthy: true,
|
||||
LastUpdatedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
if len(routes) == 0 {
|
||||
return routeSet
|
||||
}
|
||||
routeSet.Primary = routes[0]
|
||||
if len(routes) > 1 {
|
||||
routeSet.WarmStandby = append(routeSet.WarmStandby, routes[1:]...)
|
||||
}
|
||||
return routeSet
|
||||
}
|
||||
|
||||
func fabricRouteModeForTransportTarget(target FabricTransportTarget) FabricRouteMode {
|
||||
switch strings.ToLower(strings.TrimSpace(target.Transport)) {
|
||||
case string(FabricRouteLAN):
|
||||
return FabricRouteLAN
|
||||
case string(FabricRouteReverse):
|
||||
return FabricRouteReverse
|
||||
case string(FabricRouteRelay):
|
||||
return FabricRouteRelay
|
||||
case string(FabricRouteICE):
|
||||
return FabricRouteICE
|
||||
default:
|
||||
return FabricRouteDirect
|
||||
}
|
||||
}
|
||||
|
||||
func routeLatencyForIndex(index int) int {
|
||||
if index <= 0 {
|
||||
return 10
|
||||
}
|
||||
return 10 + index
|
||||
}
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestQUICProductionForwardTransportReroutesOnConnectFailure(t *testing.T) {
|
||||
transport := newFakeProductionForwardFabricTransport()
|
||||
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{
|
||||
Delivered: true,
|
||||
MessageID: "message-1",
|
||||
RouteID: "route-1",
|
||||
}
|
||||
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||
}),
|
||||
}, transport)
|
||||
forward.Timeout = time.Second
|
||||
|
||||
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||
if err != nil {
|
||||
t.Fatalf("send production: %v", err)
|
||||
}
|
||||
if !result.Delivered || result.MessageID != "message-1" {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||
t.Fatalf("dead connect count = %d, want 1", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||
t.Fatalf("fast connect count = %d, want 1", got)
|
||||
}
|
||||
snapshot := forward.Snapshot()
|
||||
if snapshot.RoutePressure.AcquiredTotal != 2 || snapshot.RoutePressure.ReleasedTotal != 2 || snapshot.RoutePressure.MaxActiveTotal == 0 {
|
||||
t.Fatalf("route pressure snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICProductionForwardTransportQuarantinesFailedRoute(t *testing.T) {
|
||||
transport := newFakeProductionForwardFabricTransport()
|
||||
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||
}),
|
||||
}, transport)
|
||||
forward.Timeout = time.Second
|
||||
|
||||
for i := 0; i < 2; i++ {
|
||||
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||
if err != nil {
|
||||
t.Fatalf("send production #%d: %v", i+1, err)
|
||||
}
|
||||
if !result.Delivered {
|
||||
t.Fatalf("result #%d = %+v", i+1, result)
|
||||
}
|
||||
}
|
||||
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||
t.Fatalf("dead connect count = %d, want quarantine after first failure", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 2 {
|
||||
t.Fatalf("fast connect count = %d, want both sends on healthy route", got)
|
||||
}
|
||||
snapshot := forward.Snapshot()
|
||||
if snapshot.RouteHealth.Quarantined["dead"].Failures != 1 {
|
||||
t.Fatalf("route health snapshot = %+v, want dead route quarantined", snapshot.RouteHealth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRouteHealthTrackerExpiresQuarantine(t *testing.T) {
|
||||
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||
})
|
||||
tracker := NewFabricRouteHealthTracker(time.Second)
|
||||
now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
tracker.MarkFailure("dead", "connect failed", now)
|
||||
applied := tracker.Apply(routeSet, now.Add(500*time.Millisecond))
|
||||
if applied.Primary.Healthy || !applied.Primary.Degraded {
|
||||
t.Fatalf("primary after quarantine = %+v, want unhealthy degraded route", applied.Primary)
|
||||
}
|
||||
if len(tracker.Snapshot(now.Add(500*time.Millisecond)).Quarantined) != 1 {
|
||||
t.Fatalf("route health snapshot = %+v, want one quarantined route", tracker.Snapshot(now.Add(500*time.Millisecond)))
|
||||
}
|
||||
|
||||
applied = tracker.Apply(routeSet, now.Add(2*time.Second))
|
||||
if !applied.Primary.Healthy || applied.Primary.Degraded {
|
||||
t.Fatalf("primary after ttl = %+v, want route restored", applied.Primary)
|
||||
}
|
||||
if snapshot := tracker.Snapshot(now.Add(2 * time.Second)); len(snapshot.Quarantined) != 0 {
|
||||
t.Fatalf("route health snapshot after ttl = %+v, want empty quarantine", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICProductionForwardTransportReroutesOnResponseTimeout(t *testing.T) {
|
||||
transport := newFakeProductionForwardFabricTransport()
|
||||
transport.delays["quic://slow.example.test:19443"] = 100 * time.Millisecond
|
||||
transport.results["quic://slow.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||
transport.results["quic://fast.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "slow", PeerID: "node-b", Endpoint: "quic://slow.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||
}),
|
||||
}, transport)
|
||||
forward.Timeout = 10 * time.Millisecond
|
||||
|
||||
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||
if err != nil {
|
||||
t.Fatalf("send production: %v", err)
|
||||
}
|
||||
if !result.Delivered || result.MessageID != "message-1" {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
if got := transport.connectCount("quic://slow.example.test:19443"); got != 1 {
|
||||
t.Fatalf("slow connect count = %d, want 1", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||
t.Fatalf("fast connect count = %d, want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICProductionForwardTransportSchedulesWithRouteSetSourceForForwardedEnvelope(t *testing.T) {
|
||||
transport := newFakeProductionForwardFabricTransport()
|
||||
transport.results["quic://node-c.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-forwarded"}
|
||||
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{
|
||||
"node-c": FabricRouteSetForTransportTargets("cluster-a", "node-b", "node-c", []FabricTransportTarget{
|
||||
{EndpointID: "node-c-direct", PeerID: "node-c", Endpoint: "quic://node-c.example.test:19443", Transport: "quic"},
|
||||
}),
|
||||
}, transport)
|
||||
forward.Timeout = time.Second
|
||||
envelope := testProductionForwardEnvelope("message-forwarded")
|
||||
envelope.ClusterID = "cluster-a"
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = "node-c"
|
||||
envelope.CurrentHopNodeID = "node-c"
|
||||
envelope.NextHopNodeID = "node-c"
|
||||
|
||||
result, err := forward.SendProduction(context.Background(), "node-c", envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("send production: %v", err)
|
||||
}
|
||||
if !result.Delivered || result.MessageID != "message-forwarded" {
|
||||
t.Fatalf("result = %+v", result)
|
||||
}
|
||||
if got := transport.connectCount("quic://node-c.example.test:19443"); got != 1 {
|
||||
t.Fatalf("connect count = %d, want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICProductionForwardTransportSpreadsConcurrentChannelsByActivePressure(t *testing.T) {
|
||||
transport := newFakeProductionForwardFabricTransport()
|
||||
transport.delays["quic://route-a.example.test:19443"] = 80 * time.Millisecond
|
||||
transport.results["quic://route-a.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-1"}
|
||||
transport.results["quic://route-b.example.test:19443"] = ProductionForwardResult{Delivered: true, MessageID: "message-2"}
|
||||
routeSet := FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "route-a", PeerID: "node-b", Endpoint: "quic://route-a.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "route-b", PeerID: "node-b", Endpoint: "quic://route-b.example.test:19443", Transport: "quic"},
|
||||
})
|
||||
routeSet.Primary.Capacity = 100
|
||||
routeSet.WarmStandby[0].Capacity = 100
|
||||
forward := NewQUICProductionForwardTransportFromRouteSets(map[string]FabricRouteSet{"node-b": routeSet}, transport)
|
||||
forward.Timeout = time.Second
|
||||
|
||||
firstDone := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-1"))
|
||||
firstDone <- err
|
||||
}()
|
||||
transport.waitForConnect(t, "quic://route-a.example.test:19443", 1)
|
||||
result, err := forward.SendProduction(context.Background(), "node-b", testProductionForwardEnvelope("message-2"))
|
||||
if err != nil {
|
||||
t.Fatalf("second send production: %v", err)
|
||||
}
|
||||
if !result.Delivered || result.MessageID != "message-2" {
|
||||
t.Fatalf("second result = %+v", result)
|
||||
}
|
||||
if got := transport.connectCount("quic://route-b.example.test:19443"); got != 1 {
|
||||
t.Fatalf("route-b connect count = %d, want 1", got)
|
||||
}
|
||||
if err := <-firstDone; err != nil {
|
||||
t.Fatalf("first send production: %v", err)
|
||||
}
|
||||
snapshot := forward.Snapshot()
|
||||
if snapshot.RoutePressure.MaxActive["route-a"] != 1 || snapshot.RoutePressure.MaxActive["route-b"] != 1 || snapshot.RoutePressure.AcquiredTotal != 2 {
|
||||
t.Fatalf("route pressure snapshot = %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
type fakeProductionForwardFabricTransport struct {
|
||||
mu sync.Mutex
|
||||
failConnect map[string]bool
|
||||
delays map[string]time.Duration
|
||||
results map[string]ProductionForwardResult
|
||||
connects map[string]int
|
||||
}
|
||||
|
||||
func newFakeProductionForwardFabricTransport() *fakeProductionForwardFabricTransport {
|
||||
return &fakeProductionForwardFabricTransport{
|
||||
failConnect: map[string]bool{},
|
||||
delays: map[string]time.Duration{},
|
||||
results: map[string]ProductionForwardResult{},
|
||||
connects: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fakeProductionForwardFabricTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||
endpoint := target.Endpoint
|
||||
t.mu.Lock()
|
||||
t.connects[endpoint]++
|
||||
fail := t.failConnect[endpoint]
|
||||
delay := t.delays[endpoint]
|
||||
result := t.results[endpoint]
|
||||
t.mu.Unlock()
|
||||
if fail {
|
||||
return nil, ErrForwardPeerUnavailable
|
||||
}
|
||||
return &fakeProductionForwardFabricSession{
|
||||
delay: delay,
|
||||
result: result,
|
||||
frames: make(chan fabricproto.Frame, 16),
|
||||
errors: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *fakeProductionForwardFabricTransport) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *fakeProductionForwardFabricTransport) connectCount(endpoint string) int {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.connects[endpoint]
|
||||
}
|
||||
|
||||
func (t *fakeProductionForwardFabricTransport) waitForConnect(tb testing.TB, endpoint string, count int) {
|
||||
tb.Helper()
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for {
|
||||
t.mu.Lock()
|
||||
got := t.connects[endpoint]
|
||||
t.mu.Unlock()
|
||||
if got >= count {
|
||||
return
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
tb.Fatalf("timed out waiting for %s connect count %d, got %d", endpoint, count, got)
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
type fakeProductionForwardFabricSession struct {
|
||||
delay time.Duration
|
||||
result ProductionForwardResult
|
||||
frames chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func (s *fakeProductionForwardFabricSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||
if frame.Type != fabricproto.FrameData {
|
||||
return nil
|
||||
}
|
||||
responsePayload, _ := json.Marshal(quicProductionForwardResponse{Result: s.result})
|
||||
go func() {
|
||||
if s.delay > 0 {
|
||||
time.Sleep(s.delay)
|
||||
}
|
||||
select {
|
||||
case <-s.done:
|
||||
case s.frames <- fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: frame.TrafficClass,
|
||||
StreamID: frame.StreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: responsePayload,
|
||||
}:
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeProductionForwardFabricSession) Frames() <-chan fabricproto.Frame {
|
||||
return s.frames
|
||||
}
|
||||
|
||||
func (s *fakeProductionForwardFabricSession) Errors() <-chan error {
|
||||
return s.errors
|
||||
}
|
||||
|
||||
func (s *fakeProductionForwardFabricSession) Close() error {
|
||||
s.once.Do(func() {
|
||||
close(s.done)
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeProductionForwardFabricSession) Closed() bool {
|
||||
select {
|
||||
case <-s.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func testProductionForwardEnvelope(messageID string) ProductionEnvelope {
|
||||
now := time.Now().UTC()
|
||||
return ProductionEnvelope{
|
||||
FabricProtocolVersion: ProtocolVersion,
|
||||
MessageID: messageID,
|
||||
RouteID: "route-1",
|
||||
ClusterID: "cluster-a",
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-b",
|
||||
CurrentHopNodeID: "node-a",
|
||||
NextHopNodeID: "node-b",
|
||||
ChannelClass: ProductionChannelFabricControl,
|
||||
MessageType: ProductionMessageFabricControl,
|
||||
TTL: 8,
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
}
|
||||
}
|
||||
@@ -106,6 +106,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
||||
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
|
||||
}
|
||||
if hasLegacyEndpointScheme(endpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint")
|
||||
}
|
||||
}
|
||||
for nodeID, candidates := range cfg.PeerEndpointCandidates {
|
||||
if strings.TrimSpace(nodeID) == "" {
|
||||
@@ -121,6 +124,9 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
||||
strings.TrimSpace(candidate.ConnectivityMode) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(candidate.Transport) || hasLegacyEndpointScheme(candidate.Address) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint candidate")
|
||||
}
|
||||
}
|
||||
}
|
||||
for endpointID, observation := range cfg.PeerEndpointObservations {
|
||||
@@ -179,6 +185,14 @@ func validatePeerDirectory(entries []PeerDirectoryEntry, localNodeID string) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||
return strings.HasPrefix(endpoint, "http://") ||
|
||||
strings.HasPrefix(endpoint, "https://") ||
|
||||
strings.HasPrefix(endpoint, "ws://") ||
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
}
|
||||
|
||||
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||
if len(seeds) > 20 {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains too many recovery seeds")
|
||||
@@ -191,6 +205,9 @@ func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||
strings.TrimSpace(seed.Transport) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(seed.Transport) || hasLegacyEndpointScheme(seed.Endpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC recovery seed")
|
||||
}
|
||||
if _, duplicate := seen[key]; duplicate {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate recovery seed")
|
||||
}
|
||||
@@ -224,6 +241,9 @@ func validateRendezvousLeases(leases []PeerRendezvousLease, routes []SyntheticRo
|
||||
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(lease.Transport) || hasLegacyEndpointScheme(lease.RelayEndpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC rendezvous lease")
|
||||
}
|
||||
if _, duplicate := seen[lease.LeaseID]; duplicate {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate rendezvous lease")
|
||||
}
|
||||
|
||||
@@ -18,14 +18,14 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
||||
ConfigVersion: "config-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
|
||||
PeerEndpoints: map[string]string{"node-b": "quic://127.0.0.1:19443"},
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
@@ -55,8 +55,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
Transport: "direct_tcp_tls",
|
||||
Endpoint: "quic://node-b.example.test:19443",
|
||||
Transport: "direct_quic",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
@@ -66,8 +66,8 @@ func TestLoadScopedSyntheticConfig(t *testing.T) {
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://node-r:19443",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
AllowedChannels: []string{"fabric_control", "route_control"},
|
||||
@@ -158,8 +158,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-c",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
@@ -174,6 +174,73 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpoint(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
PeerEndpoints: map[string]string{"node-b": "https://node-b.example.test:443"},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected non-QUIC peer endpoint error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateTransport(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-websocket",
|
||||
NodeID: "node-b",
|
||||
Transport: "websocket",
|
||||
Address: "quic://203.0.113.20:19443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected non-QUIC peer endpoint candidate error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateScheme(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-https",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "https://node-b.example.test:443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected non-QUIC peer endpoint candidate error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointObservation(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
@@ -217,7 +284,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-b", Endpoint: "", Transport: "direct_tcp_tls"},
|
||||
{NodeID: "node-b", Endpoint: "", Transport: "direct_quic"},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
@@ -228,6 +295,23 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyRecoverySeed(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-b", Endpoint: "https://node-b.example.test:443", Transport: "direct_quic"},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected non-QUIC recovery seed error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17z12.synthetic.v1",
|
||||
@@ -238,8 +322,8 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
RelayEndpoint: "quic://node-r:19443",
|
||||
Transport: "relay_quic",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
},
|
||||
@@ -253,6 +337,36 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyRendezvousLease(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17z12.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RendezvousLeases: []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "https://node-r.example.test:443",
|
||||
Transport: "relay_quic",
|
||||
ConnectivityMode: "relay_required",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
AllowedChannels: []string{"fabric_control", "route_control"},
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: time.Now().UTC().Add(-time.Minute),
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-r", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected non-QUIC rendezvous lease error")
|
||||
}
|
||||
}
|
||||
|
||||
func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
||||
t.Helper()
|
||||
payload, err := json.Marshal(cfg)
|
||||
@@ -265,3 +379,32 @@ func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: hops,
|
||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
RouteVersion: "route-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
}
|
||||
}
|
||||
|
||||
func sameStrings(left, right []string) bool {
|
||||
if len(left) != len(right) {
|
||||
return false
|
||||
}
|
||||
for i := range left {
|
||||
if left[i] != right[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -69,22 +69,24 @@ type VPNPacketIngressRoutePreference interface {
|
||||
}
|
||||
|
||||
type Server struct {
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
FabricSessionEnabled bool
|
||||
FabricSessionLogger FabricSessionEventLogger
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionEnvelopeDelivery ProductionEnvelopeDelivery
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
DisableHTTPDataPlane bool
|
||||
FabricServiceChannelLogger FabricServiceChannelAccessLogger
|
||||
RemoteWorkspaceFrameSink RemoteWorkspaceFrameSink
|
||||
ProductionRoutes []SyntheticRoute
|
||||
VPNPacketIngress VPNPacketIngress
|
||||
BackendProxyBaseURL string
|
||||
ClusterAuthorityPublicKey string
|
||||
ServiceChannelIntrospection bool
|
||||
FabricSessionEnabled bool
|
||||
FabricSessionWebSocketEnabled bool
|
||||
FabricSessionLogger FabricSessionEventLogger
|
||||
}
|
||||
|
||||
func (s Server) Handler() http.Handler {
|
||||
@@ -92,7 +94,7 @@ func (s Server) Handler() http.Handler {
|
||||
mux.HandleFunc("/mesh/v1/health", s.handleHealth)
|
||||
mux.HandleFunc("/mesh/v1/forward", s.handleForward)
|
||||
mux.HandleFunc("/mesh/v1/synthetic/probe", s.handleSyntheticProbe)
|
||||
if s.FabricSessionEnabled {
|
||||
if s.FabricSessionEnabled && s.FabricSessionWebSocketEnabled {
|
||||
mux.HandleFunc("/mesh/v1/fabric/session/ws", s.handleFabricSessionWebSocket)
|
||||
}
|
||||
if s.RemoteWorkspaceFrameSink != nil {
|
||||
@@ -198,6 +200,7 @@ type FabricSessionEventLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
AcceptedBy string `json:"accepted_by,omitempty"`
|
||||
SessionID string `json:"session_id,omitempty"`
|
||||
SessionEvent fabricproto.SessionEventType `json:"session_event,omitempty"`
|
||||
@@ -2079,16 +2082,12 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if s.DisableHTTPDataPlane {
|
||||
http.Error(w, "mesh data-plane forwarding requires QUIC fabric transport", http.StatusGone)
|
||||
return
|
||||
}
|
||||
if !s.ProductionForwardingEnabled {
|
||||
s.logProductionForward(ProductionForwardLogEntry{
|
||||
Event: "production_forward_rejected",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
LocalNodeID: s.Local.NodeID,
|
||||
Reason: ErrForwardDisabled.Error(),
|
||||
StatusCode: http.StatusNotImplemented,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
})
|
||||
http.Error(w, ErrForwardDisabled.Error(), http.StatusNotImplemented)
|
||||
s.rejectProductionForward(w, ProductionEnvelope{}, ErrForwardDisabled, forwardStatusCode(ErrForwardDisabled))
|
||||
return
|
||||
}
|
||||
var envelope ProductionEnvelope
|
||||
@@ -2104,54 +2103,57 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "invalid production mesh envelope", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if err := ValidateProductionEnvelope(s.Local, envelope, time.Now().UTC()); err != nil {
|
||||
result, err := s.ForwardProduction(r.Context(), envelope)
|
||||
if err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
}
|
||||
writeProductionForwardResult(w, result)
|
||||
}
|
||||
|
||||
func (s Server) ForwardProduction(ctx context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
if !s.ProductionForwardingEnabled {
|
||||
return ProductionForwardResult{}, ErrForwardDisabled
|
||||
}
|
||||
if err := ValidateProductionEnvelope(s.Local, envelope, time.Now().UTC()); err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
if err := ValidateProductionEnvelopeRouteConfig(s.Local, envelope, s.ProductionRoutes, time.Now().UTC()); err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_accepted", s.Local, envelope, "", 0))
|
||||
if s.ProductionEnvelopeObserver != nil {
|
||||
observation := NewProductionEnvelopeObservation(envelope, time.Now().UTC())
|
||||
if err := observeProductionEnvelope(r.Context(), s.ProductionEnvelopeObserver, observation); err != nil {
|
||||
if err := observeProductionEnvelope(ctx, s.ProductionEnvelopeObserver, observation); err != nil {
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, ErrForwardObservationFailed.Error(), http.StatusInternalServerError))
|
||||
http.Error(w, ErrForwardObservationFailed.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
return ProductionForwardResult{}, ErrForwardObservationFailed
|
||||
}
|
||||
}
|
||||
if envelope.DestinationNodeID == s.Local.NodeID {
|
||||
if err := deliverProductionEnvelope(r.Context(), s.ProductionEnvelopeDelivery, envelope); err != nil {
|
||||
if err := deliverProductionEnvelope(ctx, s.ProductionEnvelopeDelivery, envelope); err != nil {
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, ErrForwardDeliveryFailed.Error(), http.StatusInternalServerError))
|
||||
http.Error(w, ErrForwardDeliveryFailed.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
return ProductionForwardResult{}, ErrForwardDeliveryFailed
|
||||
}
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_delivered", s.Local, envelope, "", http.StatusOK))
|
||||
writeProductionForwardResult(w, ProductionForwardResult{
|
||||
return ProductionForwardResult{
|
||||
Accepted: true,
|
||||
Delivered: true,
|
||||
By: s.Local,
|
||||
MessageID: envelope.MessageID,
|
||||
RouteID: envelope.RouteID,
|
||||
})
|
||||
return
|
||||
}, nil
|
||||
}
|
||||
if envelope.NextHopNodeID == s.Local.NodeID {
|
||||
s.rejectProductionForward(w, envelope, ErrLoopDetected, forwardStatusCode(ErrLoopDetected))
|
||||
return
|
||||
return ProductionForwardResult{}, ErrLoopDetected
|
||||
}
|
||||
if len(envelope.RoutePath) == 0 && envelope.NextHopNodeID != envelope.DestinationNodeID {
|
||||
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
|
||||
return
|
||||
return ProductionForwardResult{}, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
if s.ProductionForwardTransport == nil {
|
||||
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
|
||||
return
|
||||
return ProductionForwardResult{}, ErrForwardRuntimeUnavailable
|
||||
}
|
||||
if envelope.TTL <= 1 {
|
||||
s.rejectProductionForward(w, envelope, ErrTTLExhausted, forwardStatusCode(ErrTTLExhausted))
|
||||
return
|
||||
return ProductionForwardResult{}, ErrTTLExhausted
|
||||
}
|
||||
forwarded := envelope
|
||||
forwarded.CurrentHopNodeID = envelope.NextHopNodeID
|
||||
@@ -2159,10 +2161,9 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
|
||||
forwarded.TTL = envelope.TTL - 1
|
||||
forwarded.HopCount = envelope.HopCount + 1
|
||||
forwarded.VisitedNodeIDs = append(append([]string{}, envelope.VisitedNodeIDs...), s.Local.NodeID)
|
||||
result, err := s.ProductionForwardTransport.SendProduction(r.Context(), envelope.NextHopNodeID, forwarded)
|
||||
result, err := s.ProductionForwardTransport.SendProduction(ctx, envelope.NextHopNodeID, forwarded)
|
||||
if err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_forwarded", s.Local, envelope, "", http.StatusOK))
|
||||
result.Accepted = true
|
||||
@@ -2171,7 +2172,7 @@ func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
|
||||
result.MessageID = envelope.MessageID
|
||||
result.RouteID = envelope.RouteID
|
||||
result.NextNodeID = envelope.NextHopNodeID
|
||||
writeProductionForwardResult(w, result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s Server) rejectProductionForward(w http.ResponseWriter, envelope ProductionEnvelope, err error, statusCode int) {
|
||||
@@ -2262,6 +2263,10 @@ func (s Server) handleSyntheticProbe(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if s.DisableHTTPDataPlane {
|
||||
http.Error(w, "mesh synthetic probes require QUIC fabric transport", http.StatusGone)
|
||||
return
|
||||
}
|
||||
if s.SyntheticRuntime == nil {
|
||||
http.Error(w, ErrMeshRuntimeDisabled.Error(), http.StatusServiceUnavailable)
|
||||
return
|
||||
@@ -2307,17 +2312,19 @@ func syntheticStatusCode(err error) int {
|
||||
}
|
||||
|
||||
func forwardStatusCode(err error) int {
|
||||
switch err {
|
||||
case ErrClusterMismatch, ErrNodeMismatch, ErrUnauthorizedChannel, ErrLoopDetected:
|
||||
switch {
|
||||
case errors.Is(err, ErrClusterMismatch), errors.Is(err, ErrNodeMismatch), errors.Is(err, ErrUnauthorizedChannel), errors.Is(err, ErrLoopDetected):
|
||||
return http.StatusForbidden
|
||||
case ErrRouteExpired, ErrTTLExhausted, ErrInvalidRoutePath, ErrRouteIDRequired:
|
||||
case errors.Is(err, ErrRouteExpired), errors.Is(err, ErrTTLExhausted), errors.Is(err, ErrInvalidRoutePath), errors.Is(err, ErrRouteIDRequired), errors.Is(err, ErrForwardEnvelopeInvalid):
|
||||
return http.StatusBadRequest
|
||||
case ErrForwardRuntimeUnavailable:
|
||||
case errors.Is(err, ErrForwardRuntimeUnavailable), errors.Is(err, ErrForwardDisabled):
|
||||
return http.StatusNotImplemented
|
||||
case ErrRouteNotFound:
|
||||
case errors.Is(err, ErrRouteNotFound):
|
||||
return http.StatusNotFound
|
||||
case ErrForwardPeerUnavailable:
|
||||
case errors.Is(err, ErrForwardPeerUnavailable):
|
||||
return http.StatusBadGateway
|
||||
case errors.Is(err, ErrForwardObservationFailed), errors.Is(err, ErrForwardDeliveryFailed):
|
||||
return http.StatusInternalServerError
|
||||
default:
|
||||
return http.StatusBadRequest
|
||||
}
|
||||
|
||||
@@ -23,6 +23,18 @@ import (
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
type testProductionForwardTransport struct {
|
||||
targets map[string]Server
|
||||
}
|
||||
|
||||
func (t testProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
target, ok := t.targets[strings.TrimSpace(nextNodeID)]
|
||||
if !ok {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
}
|
||||
return target.ForwardProduction(ctx, envelope)
|
||||
}
|
||||
|
||||
func TestMeshHealthAcceptsSameCluster(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{Local: local}.Handler())
|
||||
@@ -92,8 +104,9 @@ func TestFabricSessionWebSocketDisabledByDefault(t *testing.T) {
|
||||
func TestFabricSessionWebSocketPingPongAndEvents(t *testing.T) {
|
||||
var events []FabricSessionEventLogEntry
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
events = append(events, entry)
|
||||
},
|
||||
@@ -119,8 +132,9 @@ func TestFabricSessionWebSocketPingPongAndEvents(t *testing.T) {
|
||||
|
||||
func TestFabricSessionWebSocketOpenStreamDataAck(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -151,8 +165,9 @@ func TestFabricSessionWebSocketOpenStreamDataAck(t *testing.T) {
|
||||
|
||||
func TestFabricSessionWebSocketRequiresToken(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -172,9 +187,10 @@ func TestFabricSessionWebSocketRequiresSignedAuthorityWhenConfigured(t *testing.
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
@@ -196,9 +212,10 @@ func TestFabricSessionWebSocketAcceptsSignedAuthority(t *testing.T) {
|
||||
token := "rap_fsn_signedtest"
|
||||
var events []FabricSessionEventLogEntry
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
FabricSessionEnabled: true,
|
||||
FabricSessionWebSocketEnabled: true,
|
||||
ClusterAuthorityPublicKey: base64.StdEncoding.EncodeToString(publicKey),
|
||||
FabricSessionLogger: func(entry FabricSessionEventLogEntry) {
|
||||
events = append(events, entry)
|
||||
},
|
||||
@@ -360,23 +377,20 @@ func TestMeshForwardingGateDeliversFabricControlAtDestination(t *testing.T) {
|
||||
func TestMeshForwardingGateForwardsDirectFabricControlToNextHop(t *testing.T) {
|
||||
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
serverC := httptest.NewServer(Server{
|
||||
serverC := Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
}
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
|
||||
}.Handler())
|
||||
defer serverB.Close()
|
||||
|
||||
@@ -414,36 +428,30 @@ func TestMeshForwardingGateForwardsMultiHopFabricControlByRoutePath(t *testing.T
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
var nodeREvents []ProductionForwardLogEntry
|
||||
var nodeBEvents []ProductionForwardLogEntry
|
||||
serverC := httptest.NewServer(Server{
|
||||
serverC := Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
}
|
||||
|
||||
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
|
||||
serverR := httptest.NewServer(Server{
|
||||
serverR := Server{
|
||||
Local: nodeR,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
nodeREvents = append(nodeREvents, entry)
|
||||
},
|
||||
}.Handler())
|
||||
defer serverR.Close()
|
||||
}
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeR.NodeID: serverR.URL,
|
||||
}),
|
||||
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeR.NodeID: serverR}},
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
nodeBEvents = append(nodeBEvents, entry)
|
||||
},
|
||||
@@ -490,7 +498,7 @@ func TestMeshForwardingGateForwardsConfiguredProductionRoute(t *testing.T) {
|
||||
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
route := configuredProductionRoute("route-1", []string{"node-a", "node-b", "node-r", nodeC.NodeID})
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
serverC := httptest.NewServer(Server{
|
||||
serverC := Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
@@ -498,28 +506,22 @@ func TestMeshForwardingGateForwardsConfiguredProductionRoute(t *testing.T) {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
}
|
||||
|
||||
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
|
||||
serverR := httptest.NewServer(Server{
|
||||
serverR := Server{
|
||||
Local: nodeR,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
}.Handler())
|
||||
defer serverR.Close()
|
||||
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeC.NodeID: serverC}},
|
||||
}
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeR.NodeID: serverR.URL,
|
||||
}),
|
||||
ProductionForwardTransport: testProductionForwardTransport{targets: map[string]Server{nodeR.NodeID: serverR}},
|
||||
}.Handler())
|
||||
defer serverB.Close()
|
||||
|
||||
@@ -5016,3 +5018,30 @@ func TestSyntheticEndpointDisabledByDefault(t *testing.T) {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusServiceUnavailable)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPDataPlaneDisabledRequiresQUIC(t *testing.T) {
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
||||
SyntheticRuntime: NewSyntheticRuntime(SyntheticRuntimeConfig{Enabled: true, Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}}),
|
||||
DisableHTTPDataPlane: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/synthetic/probe", "application/json", bytes.NewReader([]byte(`{}`)))
|
||||
if err != nil {
|
||||
t.Fatalf("post synthetic probe: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusGone {
|
||||
t.Fatalf("synthetic status = %d, want %d", resp.StatusCode, http.StatusGone)
|
||||
}
|
||||
|
||||
resp, err = http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader([]byte(`{}`)))
|
||||
if err != nil {
|
||||
t.Fatalf("post production forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusGone {
|
||||
t.Fatalf("forward status = %d, want %d", resp.StatusCode, http.StatusGone)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,268 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
type QUICSyntheticTransport struct {
|
||||
Targets map[string]FabricTransportTarget
|
||||
RouteSets map[string]FabricRouteSet
|
||||
Transport FabricTransport
|
||||
Router FabricChannelRouter
|
||||
Timeout time.Duration
|
||||
Pressure *FabricRoutePressureTracker
|
||||
Health *FabricRouteHealthTracker
|
||||
sequence atomic.Uint64
|
||||
}
|
||||
|
||||
type QUICSyntheticTransportSnapshot struct {
|
||||
RoutePressure FabricRoutePressureSnapshot `json:"route_pressure"`
|
||||
RouteHealth FabricRouteHealthSnapshot `json:"route_health,omitempty"`
|
||||
}
|
||||
|
||||
func NewQUICSyntheticTransportFromRouteSets(routeSets map[string]FabricRouteSet, transport FabricTransport) *QUICSyntheticTransport {
|
||||
normalizedRouteSets := make(map[string]FabricRouteSet, len(routeSets))
|
||||
targets := make(map[string]FabricTransportTarget, len(routeSets))
|
||||
for nodeID, routeSet := range routeSets {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
if nodeID == "" {
|
||||
continue
|
||||
}
|
||||
normalizedRouteSets[nodeID] = routeSet
|
||||
if target, err := FabricTransportTargetForRoute(routeSet.Primary); err == nil {
|
||||
targets[nodeID] = target
|
||||
}
|
||||
}
|
||||
if transport == nil {
|
||||
transport = NewQUICFabricTransport(nil)
|
||||
}
|
||||
return &QUICSyntheticTransport{
|
||||
Targets: targets,
|
||||
RouteSets: normalizedRouteSets,
|
||||
Transport: transport,
|
||||
Router: NewFabricChannelRouter(FabricChannelRouterConfig{
|
||||
MaxAckLatencyMs: 2000,
|
||||
MinRerouteInterval: 50 * time.Millisecond,
|
||||
}),
|
||||
Timeout: 10 * time.Second,
|
||||
Pressure: NewFabricRoutePressureTracker(),
|
||||
Health: NewFabricRouteHealthTracker(30 * time.Second),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
if t == nil || t.Transport == nil {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
nextNodeID = strings.TrimSpace(nextNodeID)
|
||||
routeSet, ok := t.RouteSets[nextNodeID]
|
||||
if !ok {
|
||||
target, targetOK := t.Targets[nextNodeID]
|
||||
if !targetOK || strings.TrimSpace(target.Endpoint) == "" {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
routeSet = FabricRouteSetForTransportTargets(envelope.ClusterID, envelope.From.NodeID, nextNodeID, []FabricTransportTarget{target})
|
||||
}
|
||||
spec := FabricChannelSpec{
|
||||
ChannelID: fmt.Sprintf("synthetic-%d", t.sequence.Add(1)),
|
||||
ClusterID: envelope.ClusterID,
|
||||
SourceNodeID: envelope.From.NodeID,
|
||||
TargetKind: FabricChannelTargetNode,
|
||||
TargetID: nextNodeID,
|
||||
TrafficClass: FabricServiceChannelReliable,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
return t.sendSyntheticWithRouteSet(ctx, spec, routeSet, payload)
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) sendSyntheticWithRouteSet(ctx context.Context, spec FabricChannelSpec, routeSet FabricRouteSet, payload []byte) (SyntheticEnvelope, error) {
|
||||
router := t.Router
|
||||
if router.Config.MaxRoutePressure == 0 {
|
||||
router = NewFabricChannelRouter(FabricChannelRouterConfig{MaxAckLatencyMs: 2000, MinRerouteInterval: 50 * time.Millisecond})
|
||||
}
|
||||
routeSet = t.routeSetForScheduling(routeSet)
|
||||
channel, _, err := router.OpenChannel(spec, routeSet, time.Now().UTC())
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
timeout := t.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10 * time.Second
|
||||
}
|
||||
for {
|
||||
routeSet = t.routeSetForScheduling(routeSet)
|
||||
route, ok := findFabricRoute(routeSet, channel.RouteID)
|
||||
if !ok {
|
||||
return SyntheticEnvelope{}, ErrFabricRouteNotFound
|
||||
}
|
||||
target, err := FabricTransportTargetForRoute(route)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
target.PeerID = firstNonEmpty(strings.TrimSpace(target.PeerID), spec.TargetID)
|
||||
target.MaxPayload = fabricproto.DefaultMaxPayload
|
||||
releaseRoute := t.acquireSyntheticRoute(route.RouteID)
|
||||
session, err := t.Transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
releaseRoute()
|
||||
t.markSyntheticRouteFailure(route.RouteID, err)
|
||||
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "connect_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return SyntheticEnvelope{}, rerouteErr
|
||||
}
|
||||
return SyntheticEnvelope{}, fmt.Errorf("%w: %v", ErrSyntheticPeerUnavailable, err)
|
||||
}
|
||||
response, ackMs, err := t.sendSyntheticOnSession(ctx, session, payload, timeout)
|
||||
_ = session.Close()
|
||||
releaseRoute()
|
||||
if err == nil {
|
||||
t.markSyntheticRouteSuccess(route.RouteID)
|
||||
_, _, _ = router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
AckLatencyMs: ackMs,
|
||||
BytesSent: uint64(len(payload)),
|
||||
FramesSent: 1,
|
||||
BytesRecv: uint64(len(response.Payload)),
|
||||
FramesRecv: 1,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
return decodeQUICSyntheticForwardResponse(response.Payload)
|
||||
}
|
||||
t.markSyntheticRouteFailure(route.RouteID, err)
|
||||
updated, event, rerouteErr := router.ObserveChannel(channel, routeSet, FabricChannelObservation{
|
||||
ChannelID: spec.ChannelID,
|
||||
RouteID: route.RouteID,
|
||||
Failed: true,
|
||||
Reason: "response_failed",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}, time.Now().UTC())
|
||||
channel = updated
|
||||
if event.Type == FabricChannelRouteEventReroute {
|
||||
continue
|
||||
}
|
||||
if rerouteErr != nil {
|
||||
return SyntheticEnvelope{}, rerouteErr
|
||||
}
|
||||
return SyntheticEnvelope{}, fmt.Errorf("%w: %v", ErrSyntheticPeerUnavailable, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) routeSetForScheduling(routeSet FabricRouteSet) FabricRouteSet {
|
||||
if t != nil && t.Health != nil {
|
||||
routeSet = t.Health.Apply(routeSet, time.Now().UTC())
|
||||
}
|
||||
if t != nil && t.Pressure != nil {
|
||||
routeSet = t.Pressure.Apply(routeSet)
|
||||
}
|
||||
return routeSet
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) acquireSyntheticRoute(routeID string) func() {
|
||||
if t == nil || t.Pressure == nil {
|
||||
return func() {}
|
||||
}
|
||||
return t.Pressure.Acquire(routeID)
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) markSyntheticRouteFailure(routeID string, err error) {
|
||||
if t == nil || t.Health == nil || err == nil {
|
||||
return
|
||||
}
|
||||
t.Health.MarkFailure(routeID, err.Error(), time.Now().UTC())
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) markSyntheticRouteSuccess(routeID string) {
|
||||
if t == nil || t.Health == nil {
|
||||
return
|
||||
}
|
||||
t.Health.MarkSuccess(routeID)
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) Snapshot() QUICSyntheticTransportSnapshot {
|
||||
if t == nil {
|
||||
return QUICSyntheticTransportSnapshot{}
|
||||
}
|
||||
var pressure FabricRoutePressureSnapshot
|
||||
if t.Pressure != nil {
|
||||
pressure = t.Pressure.SnapshotPressure()
|
||||
}
|
||||
var health FabricRouteHealthSnapshot
|
||||
if t.Health != nil {
|
||||
health = t.Health.Snapshot(time.Now().UTC())
|
||||
}
|
||||
return QUICSyntheticTransportSnapshot{RoutePressure: pressure, RouteHealth: health}
|
||||
}
|
||||
|
||||
func (t *QUICSyntheticTransport) sendSyntheticOnSession(ctx context.Context, session FabricTransportSession, payload []byte, timeout time.Duration) (fabricproto.Frame, int64, error) {
|
||||
sequence := t.sequence.Add(1)
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: SyntheticForwardQUICStreamID,
|
||||
Sequence: sequence,
|
||||
Payload: payload,
|
||||
}); err != nil {
|
||||
return fabricproto.Frame{}, 0, err
|
||||
}
|
||||
waitCtx := ctx
|
||||
if timeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
waitCtx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
started := time.Now()
|
||||
for {
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
return fabricproto.Frame{}, 0, waitCtx.Err()
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return fabricproto.Frame{}, 0, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
if err != nil {
|
||||
return fabricproto.Frame{}, 0, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return fabricproto.Frame{}, 0, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || frame.StreamID != SyntheticForwardQUICStreamID || frame.Sequence != sequence {
|
||||
continue
|
||||
}
|
||||
return frame, time.Since(started).Milliseconds(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func decodeQUICSyntheticForwardResponse(payload []byte) (SyntheticEnvelope, error) {
|
||||
var response quicSyntheticForwardResponse
|
||||
if err := json.Unmarshal(payload, &response); err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
if strings.TrimSpace(response.Error) != "" {
|
||||
return SyntheticEnvelope{}, fmt.Errorf("%w: %s", ErrSyntheticPeerUnavailable, response.Error)
|
||||
}
|
||||
return response.Envelope, nil
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func TestQUICSyntheticTransportReroutesOnConnectFailure(t *testing.T) {
|
||||
transport := newFakeSyntheticFabricTransport()
|
||||
transport.failConnect["quic://dead.example.test:19443"] = true
|
||||
transport.responses["quic://fast.example.test:19443"] = testSyntheticAckEnvelope("route-1", 1)
|
||||
forward := NewQUICSyntheticTransportFromRouteSets(map[string]FabricRouteSet{
|
||||
"node-b": FabricRouteSetForTransportTargets("cluster-a", "node-a", "node-b", []FabricTransportTarget{
|
||||
{EndpointID: "dead", PeerID: "node-b", Endpoint: "quic://dead.example.test:19443", Transport: "quic"},
|
||||
{EndpointID: "fast", PeerID: "node-b", Endpoint: "quic://fast.example.test:19443", Transport: "quic"},
|
||||
}),
|
||||
}, transport)
|
||||
forward.Timeout = time.Second
|
||||
|
||||
ack, err := forward.SendSynthetic(context.Background(), "node-b", testSyntheticEnvelope("route-1", 1))
|
||||
if err != nil {
|
||||
t.Fatalf("send synthetic: %v", err)
|
||||
}
|
||||
if ack.RouteID != "route-1" || ack.MessageType != SyntheticMessageRouteHealthAck {
|
||||
t.Fatalf("ack = %+v", ack)
|
||||
}
|
||||
if got := transport.connectCount("quic://dead.example.test:19443"); got != 1 {
|
||||
t.Fatalf("dead connect count = %d, want 1", got)
|
||||
}
|
||||
if got := transport.connectCount("quic://fast.example.test:19443"); got != 1 {
|
||||
t.Fatalf("fast connect count = %d, want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQUICFabricServerHandlesSyntheticFrames(t *testing.T) {
|
||||
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TLSConfig: testQUICTLSConfig(t),
|
||||
SyntheticForwardHandler: func(_ context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
return testSyntheticAckEnvelope(envelope.RouteID, envelope.Sequence), nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start quic fabric server: %v", err)
|
||||
}
|
||||
defer server.Close()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
|
||||
Endpoint: server.Addr().String(),
|
||||
TLSConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
NextProtos: []string{fabricQUICNextProto},
|
||||
},
|
||||
Timeout: time.Second,
|
||||
InboundBuffer: 4,
|
||||
ErrorBuffer: 4,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("connect: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
payload, err := json.Marshal(testSyntheticEnvelope("route-1", 7))
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
StreamID: SyntheticForwardQUICStreamID,
|
||||
Sequence: 42,
|
||||
Payload: payload,
|
||||
}); err != nil {
|
||||
t.Fatalf("send synthetic frame: %v", err)
|
||||
}
|
||||
select {
|
||||
case frame := <-session.Frames():
|
||||
if frame.StreamID != SyntheticForwardQUICStreamID || frame.Sequence != 42 {
|
||||
t.Fatalf("frame = %+v", frame)
|
||||
}
|
||||
ack, err := decodeQUICSyntheticForwardResponse(frame.Payload)
|
||||
if err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if ack.RouteID != "route-1" || ack.MessageType != SyntheticMessageRouteHealthAck || ack.Sequence != 7 {
|
||||
t.Fatalf("ack = %+v", ack)
|
||||
}
|
||||
case err := <-session.Errors():
|
||||
t.Fatalf("session error: %v", err)
|
||||
case <-ctx.Done():
|
||||
t.Fatal(ctx.Err())
|
||||
}
|
||||
}
|
||||
|
||||
type fakeSyntheticFabricTransport struct {
|
||||
mu sync.Mutex
|
||||
failConnect map[string]bool
|
||||
responses map[string]SyntheticEnvelope
|
||||
connects map[string]int
|
||||
}
|
||||
|
||||
func newFakeSyntheticFabricTransport() *fakeSyntheticFabricTransport {
|
||||
return &fakeSyntheticFabricTransport{
|
||||
failConnect: map[string]bool{},
|
||||
responses: map[string]SyntheticEnvelope{},
|
||||
connects: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fakeSyntheticFabricTransport) Connect(_ context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
|
||||
endpoint := target.Endpoint
|
||||
t.mu.Lock()
|
||||
t.connects[endpoint]++
|
||||
fail := t.failConnect[endpoint]
|
||||
response := t.responses[endpoint]
|
||||
t.mu.Unlock()
|
||||
if fail {
|
||||
return nil, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
return &fakeSyntheticFabricSession{
|
||||
response: response,
|
||||
frames: make(chan fabricproto.Frame, 16),
|
||||
errors: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *fakeSyntheticFabricTransport) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *fakeSyntheticFabricTransport) connectCount(endpoint string) int {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.connects[endpoint]
|
||||
}
|
||||
|
||||
type fakeSyntheticFabricSession struct {
|
||||
response SyntheticEnvelope
|
||||
frames chan fabricproto.Frame
|
||||
errors chan error
|
||||
done chan struct{}
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func (s *fakeSyntheticFabricSession) Send(_ context.Context, frame fabricproto.Frame) error {
|
||||
if frame.Type != fabricproto.FrameData {
|
||||
return nil
|
||||
}
|
||||
responsePayload, _ := json.Marshal(quicSyntheticForwardResponse{Envelope: s.response})
|
||||
go func() {
|
||||
select {
|
||||
case <-s.done:
|
||||
case s.frames <- fabricproto.Frame{
|
||||
Type: fabricproto.FrameData,
|
||||
TrafficClass: frame.TrafficClass,
|
||||
StreamID: frame.StreamID,
|
||||
Sequence: frame.Sequence,
|
||||
Payload: responsePayload,
|
||||
}:
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeSyntheticFabricSession) Frames() <-chan fabricproto.Frame {
|
||||
return s.frames
|
||||
}
|
||||
|
||||
func (s *fakeSyntheticFabricSession) Errors() <-chan error {
|
||||
return s.errors
|
||||
}
|
||||
|
||||
func (s *fakeSyntheticFabricSession) Close() error {
|
||||
s.once.Do(func() {
|
||||
close(s.done)
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeSyntheticFabricSession) Closed() bool {
|
||||
select {
|
||||
case <-s.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func testSyntheticEnvelope(routeID string, sequence uint64) SyntheticEnvelope {
|
||||
now := time.Now().UTC()
|
||||
return SyntheticEnvelope{
|
||||
ProtocolVersion: ProtocolVersion,
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-a",
|
||||
From: PeerIdentity{ClusterID: "cluster-a", NodeID: "node-a"},
|
||||
To: PeerIdentity{ClusterID: "cluster-a", NodeID: "node-b"},
|
||||
Channel: SyntheticChannelFabricControl,
|
||||
MessageType: SyntheticMessageRouteHealth,
|
||||
TTL: 8,
|
||||
HopCount: 1,
|
||||
Visited: []string{"node-a"},
|
||||
Sequence: sequence,
|
||||
SentAt: now,
|
||||
}
|
||||
}
|
||||
|
||||
func testSyntheticAckEnvelope(routeID string, sequence uint64) SyntheticEnvelope {
|
||||
ack := testSyntheticEnvelope(routeID, sequence)
|
||||
ack.From = PeerIdentity{ClusterID: "cluster-a", NodeID: "node-b"}
|
||||
ack.To = PeerIdentity{ClusterID: "cluster-a", NodeID: "node-a"}
|
||||
ack.MessageType = SyntheticMessageRouteHealthAck
|
||||
ack.Visited = []string{"node-a", "node-b"}
|
||||
return ack
|
||||
}
|
||||
@@ -13,17 +13,18 @@ import (
|
||||
const FileName = "identity.json"
|
||||
|
||||
type Identity struct {
|
||||
NodeID string `json:"node_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
NodeFingerprint string `json:"node_fingerprint"`
|
||||
PublicKey string `json:"public_key"`
|
||||
IdentityStatus string `json:"identity_status"`
|
||||
PendingJoinRequestID string `json:"pending_join_request_id,omitempty"`
|
||||
ClusterAuthorityPublicKey string `json:"cluster_authority_public_key,omitempty"`
|
||||
ClusterAuthorityFingerprint string `json:"cluster_authority_fingerprint,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
NodeID string `json:"node_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
NodeFingerprint string `json:"node_fingerprint"`
|
||||
PublicKey string `json:"public_key"`
|
||||
IdentityStatus string `json:"identity_status"`
|
||||
PendingJoinRequestID string `json:"pending_join_request_id,omitempty"`
|
||||
ClusterAuthorityPublicKey string `json:"cluster_authority_public_key,omitempty"`
|
||||
ClusterAuthorityFingerprint string `json:"cluster_authority_fingerprint,omitempty"`
|
||||
ClusterAuthorityQuorum json.RawMessage `json:"cluster_authority_quorum,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
func LoadOrCreate(dir, clusterID, nodeName string) (Identity, error) {
|
||||
@@ -103,6 +104,10 @@ func MarkApproved(dir string, nodeID, clusterID, status string) (Identity, error
|
||||
}
|
||||
|
||||
func MarkApprovedWithAuthority(dir string, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint string) (Identity, error) {
|
||||
return MarkApprovedWithAuthorityAndQuorum(dir, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint, nil)
|
||||
}
|
||||
|
||||
func MarkApprovedWithAuthorityAndQuorum(dir string, nodeID, clusterID, status, authorityPublicKey, authorityFingerprint string, authorityQuorum json.RawMessage) (Identity, error) {
|
||||
path := filepath.Join(dir, FileName)
|
||||
identity, err := Load(path)
|
||||
if err != nil {
|
||||
@@ -114,6 +119,7 @@ func MarkApprovedWithAuthority(dir string, nodeID, clusterID, status, authorityP
|
||||
identity.PendingJoinRequestID = ""
|
||||
identity.ClusterAuthorityPublicKey = authorityPublicKey
|
||||
identity.ClusterAuthorityFingerprint = authorityFingerprint
|
||||
identity.ClusterAuthorityQuorum = authorityQuorum
|
||||
if err := Save(path, identity); err != nil {
|
||||
return Identity{}, err
|
||||
}
|
||||
|
||||
@@ -2,10 +2,12 @@ package supervisor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/webingress"
|
||||
)
|
||||
|
||||
type Supervisor interface {
|
||||
@@ -14,6 +16,8 @@ type Supervisor interface {
|
||||
|
||||
type StubSupervisor struct {
|
||||
Version string
|
||||
WebIngressRuntimeEnabled bool
|
||||
WebIngressManager *webingress.Manager
|
||||
RemoteWorkspaceRealAdapter RemoteWorkspaceRealAdapterConfig
|
||||
}
|
||||
|
||||
@@ -56,6 +60,9 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
}
|
||||
if desiredState != "enabled" {
|
||||
payload["reason"] = "desired_state_not_enabled"
|
||||
if (serviceType == "public-ingress" || serviceType == "admin-ingress") && s.WebIngressManager != nil {
|
||||
payload["listener_status"] = s.WebIngressManager.Stop(context.Background())
|
||||
}
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "stopped",
|
||||
RuntimeMode: runtimeMode,
|
||||
@@ -74,6 +81,57 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if serviceType == "public-ingress" || serviceType == "admin-ingress" {
|
||||
contract := s.webIngressContract(serviceType, workload.Config)
|
||||
for key, value := range contract {
|
||||
payload[key] = value
|
||||
}
|
||||
if contract["contract_valid"] == true {
|
||||
payload["reason"] = "web_ingress_contract_ready"
|
||||
payload["execution_mode"] = "contract_probe"
|
||||
payload["traffic"] = "https_edge_to_fabric_service_channel"
|
||||
if contract["real_listener_requested"] == true && contract["real_listener_runtime_enabled"] != true {
|
||||
payload["reason"] = "web_ingress_real_listener_gate_disabled"
|
||||
payload["traffic"] = "blocked"
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "degraded",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if contract["real_listener_start_allowed"] == true && s.WebIngressManager != nil {
|
||||
listenerStatus := s.WebIngressManager.Apply(context.Background(), webIngressListenerConfig(serviceType, workload.Config))
|
||||
payload["listener_status"] = listenerStatus
|
||||
payload["ports_opened_by_runtime"] = listenerStatus.Running
|
||||
payload["ports_opened_by_stub"] = false
|
||||
if !listenerStatus.HTTPSRunning {
|
||||
payload["reason"] = "web_ingress_listener_partial"
|
||||
payload["traffic"] = "blocked"
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "degraded",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
}
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "running",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
payload["reason"] = "web_ingress_contract_invalid"
|
||||
payload["traffic"] = "blocked"
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "degraded",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if serviceType == "synthetic.echo" && runtimeMode == "native" {
|
||||
payload["reason"] = "internal_synthetic_echo_ready"
|
||||
payload["execution_mode"] = "builtin"
|
||||
@@ -85,6 +143,23 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if (serviceType == "vpn-exit" || serviceType == "ipv4-egress" || serviceType == "vpn-client") && runtimeMode == "native" {
|
||||
for key, value := range vpnFabricOnlyContract(serviceType, workload.Config) {
|
||||
payload[key] = value
|
||||
}
|
||||
payload["execution_mode"] = "contract_probe"
|
||||
payload["fabric_transport"] = "quic_only"
|
||||
payload["fabric_service_channel_required"] = true
|
||||
payload["backend_relay_fallback"] = false
|
||||
payload["legacy_protocol_compatibility"] = false
|
||||
payload["traffic"] = "fabric_service_channel_only"
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "running",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if serviceType == "rdp-worker" && runtimeMode == "native" && boolConfig(workload.Config, "adapter_contract_probe") {
|
||||
payload["reason"] = "remote_workspace_adapter_contract_probe_ready"
|
||||
payload["execution_mode"] = "contract_probe"
|
||||
@@ -126,6 +201,173 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
}
|
||||
}
|
||||
|
||||
func vpnFabricOnlyContract(serviceType string, config map[string]any) map[string]any {
|
||||
role := "vpn-client"
|
||||
reason := "vpn_client_node_contract_ready"
|
||||
serviceClass := "vpn_packets"
|
||||
internetEgress := false
|
||||
if serviceType == "vpn-exit" || serviceType == "ipv4-egress" {
|
||||
role = "ipv4-egress"
|
||||
reason = "ipv4_egress_contract_ready"
|
||||
internetEgress = true
|
||||
}
|
||||
contract := map[string]any{
|
||||
"schema_version": "rap.vpn.fabric_node_contract.v1",
|
||||
"reason": reason,
|
||||
"role": role,
|
||||
"service_class": serviceClass,
|
||||
"internet_egress": internetEgress,
|
||||
"exit_pool_id": stringConfig(config, "pool_id", ""),
|
||||
"exit_region": stringConfig(config, "region", ""),
|
||||
"allowed_cidrs": stringSliceConfig(config, "allowed_cidrs"),
|
||||
"dns_servers": stringSliceConfig(config, "dns_servers"),
|
||||
"client_policy_source": stringConfig(config, "client_policy_source", "fabric_access_policy"),
|
||||
"android_node_supported": serviceType == "vpn-client",
|
||||
"ipv4_exit_supported": internetEgress,
|
||||
"fabric_service_channel_required": true,
|
||||
"packet_runtime_status": "fabric_channel_binding_pending_runtime",
|
||||
"service_binding": vpnServiceBindingContract(serviceType, config),
|
||||
}
|
||||
return contract
|
||||
}
|
||||
|
||||
func vpnServiceBindingContract(serviceType string, config map[string]any) map[string]any {
|
||||
if serviceType == "vpn-exit" || serviceType == "ipv4-egress" {
|
||||
return map[string]any{
|
||||
"type": "ipv4_egress",
|
||||
"accepts_service_class": "vpn_packets",
|
||||
"accepts_from_fabric_only": true,
|
||||
"legacy_protocol_listener": false,
|
||||
"exit_pool_id": stringConfig(config, "pool_id", ""),
|
||||
"region": stringConfig(config, "region", ""),
|
||||
"allowed_cidrs": stringSliceConfig(config, "allowed_cidrs"),
|
||||
"dns_servers": stringSliceConfig(config, "dns_servers"),
|
||||
"internet_egress": true,
|
||||
"requires_host_packet_runtime": true,
|
||||
}
|
||||
}
|
||||
return map[string]any{
|
||||
"type": "local_ipv4_ingress",
|
||||
"accepts_from": []string{"android_vpnservice_tun", "linux_tun", "host_service_port"},
|
||||
"service_class": "vpn_packets",
|
||||
"exit_selection": "pool",
|
||||
"preferred_exit_pool_id": stringConfig(config, "exit_pool_id", ""),
|
||||
"listen_tcp_ports": intSliceConfig(config, "listen_tcp_ports"),
|
||||
"listen_udp_ports": intSliceConfig(config, "listen_udp_ports"),
|
||||
"tun_required": true,
|
||||
"route_authority": "fabric_farm",
|
||||
"legacy_protocol_listener": false,
|
||||
"requires_fabric_node_runtime": true,
|
||||
}
|
||||
}
|
||||
|
||||
func webIngressListenerConfig(serviceType string, config map[string]any) webingress.ListenerConfig {
|
||||
return webingress.ListenerConfig{
|
||||
RuntimeConfig: webingress.RuntimeConfig{
|
||||
ServiceType: serviceType,
|
||||
Scope: stringConfig(config, "scope", ""),
|
||||
ServiceClasses: stringSliceConfig(config, "service_classes"),
|
||||
TLSMode: stringConfig(config, "tls_mode", "terminate"),
|
||||
HTTPPort: intConfig(config, "listen_http_port", 80),
|
||||
HTTPSPort: intConfig(config, "listen_https_port", 443),
|
||||
},
|
||||
HTTPAddr: stringConfig(config, "listen_http_addr", ":80"),
|
||||
HTTPSAddr: stringConfig(config, "listen_https_addr", ":443"),
|
||||
TLSCertFile: stringConfig(config, "tls_cert_file", ""),
|
||||
TLSKeyFile: stringConfig(config, "tls_key_file", ""),
|
||||
}
|
||||
}
|
||||
|
||||
func (s StubSupervisor) webIngressContract(serviceType string, config map[string]any) map[string]any {
|
||||
httpPort := intConfig(config, "listen_http_port", 80)
|
||||
httpsPort := intConfig(config, "listen_https_port", 443)
|
||||
tlsMode := strings.TrimSpace(stringConfig(config, "tls_mode", "terminate"))
|
||||
serviceClasses := stringSliceConfig(config, "service_classes")
|
||||
scope := strings.TrimSpace(stringConfig(config, "scope", ""))
|
||||
realListenerRequested := boolConfig(config, "real_listener_enabled")
|
||||
allowedClasses := webIngressAllowedServiceClasses(serviceType)
|
||||
missing := []string{}
|
||||
if httpPort != 80 {
|
||||
missing = append(missing, "listen_http_port_must_be_80")
|
||||
}
|
||||
if httpsPort != 443 {
|
||||
missing = append(missing, "listen_https_port_must_be_443")
|
||||
}
|
||||
if tlsMode != "terminate" && tlsMode != "passthrough-approved-terminator" {
|
||||
missing = append(missing, "tls_mode_invalid")
|
||||
}
|
||||
if scope == "" {
|
||||
missing = append(missing, "scope_required")
|
||||
}
|
||||
if len(serviceClasses) == 0 {
|
||||
missing = append(missing, "service_classes_required")
|
||||
}
|
||||
for _, serviceClass := range serviceClasses {
|
||||
if !containsString(allowedClasses, serviceClass) {
|
||||
missing = append(missing, "service_class_not_allowed:"+serviceClass)
|
||||
}
|
||||
}
|
||||
return map[string]any{
|
||||
"schema_version": "rap.web_ingress.workload_contract.v1",
|
||||
"contract_valid": len(missing) == 0,
|
||||
"missing_checks": missing,
|
||||
"service_edge_only": true,
|
||||
"authority_service": false,
|
||||
"fabric_transport": "quic_only",
|
||||
"http_between_fabric_nodes": false,
|
||||
"listen_http_port": httpPort,
|
||||
"listen_https_port": httpsPort,
|
||||
"tls_mode": tlsMode,
|
||||
"scope": scope,
|
||||
"service_classes": serviceClasses,
|
||||
"allowed_service_classes": allowedClasses,
|
||||
"fabric_service_channel_required": true,
|
||||
"runtime_roles_required": webIngressRuntimeRoles(serviceClasses),
|
||||
"payload_forwarding": "contract_only",
|
||||
"real_listener_requested": realListenerRequested,
|
||||
"real_listener_runtime_enabled": s.WebIngressRuntimeEnabled,
|
||||
"real_listener_start_allowed": len(missing) == 0 && realListenerRequested && s.WebIngressRuntimeEnabled,
|
||||
"runtime_handler_ready": len(missing) == 0,
|
||||
"runtime_handler_contract": "rap.web_ingress.runtime_response.v1",
|
||||
"runtime_handler_payload_status": "fabric_service_channel_binding_not_implemented",
|
||||
"fabric_envelope_schema": webingress.FabricServiceChannelEnvelopeSchema,
|
||||
"fabric_runtime_response_schema": "rap.web_ingress.fabric_runtime_response.v1",
|
||||
"fabric_envelope_signer": "ed25519_available",
|
||||
"fabric_envelope_sender": "mesh_request_response_runtime_adapter_available",
|
||||
"fabric_quic_stream": "web_ingress_forward",
|
||||
"fabric_quic_stream_id": 2,
|
||||
"fabric_runtime_receiver": "signed_envelope_receiver_available",
|
||||
"admin_runtime_dispatcher": "read_only_manifest_and_health_available",
|
||||
"control_api_binding": "read_only_projection_skeleton_available",
|
||||
"runtime_receiver_policy": "trusted_keys_and_service_class_allow_list",
|
||||
"ports_opened_by_stub": false,
|
||||
}
|
||||
}
|
||||
|
||||
func webIngressAllowedServiceClasses(serviceType string) []string {
|
||||
if serviceType == "admin-ingress" {
|
||||
return []string{"platform_admin", "cluster_admin"}
|
||||
}
|
||||
return []string{"organization_portal", "user_portal"}
|
||||
}
|
||||
|
||||
func webIngressRuntimeRoles(serviceClasses []string) []string {
|
||||
roles := []string{}
|
||||
for _, serviceClass := range serviceClasses {
|
||||
switch serviceClass {
|
||||
case "platform_admin":
|
||||
roles = append(roles, "global-admin-runtime", "identity-runtime", "policy-authority", "audit-sink")
|
||||
case "cluster_admin":
|
||||
roles = append(roles, "cluster-admin-runtime", "identity-runtime", "policy-authority", "audit-sink")
|
||||
case "organization_portal":
|
||||
roles = append(roles, "organization-portal-runtime", "identity-runtime", "policy-authority", "audit-sink")
|
||||
case "user_portal":
|
||||
roles = append(roles, "user-portal-runtime", "identity-runtime", "policy-authority", "audit-sink")
|
||||
}
|
||||
}
|
||||
return dedupeStrings(roles)
|
||||
}
|
||||
|
||||
func boolConfig(values map[string]any, key string) bool {
|
||||
if values == nil {
|
||||
return false
|
||||
@@ -144,6 +386,157 @@ func boolConfig(values map[string]any, key string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func intConfig(values map[string]any, key string, fallback int) int {
|
||||
if values == nil {
|
||||
return fallback
|
||||
}
|
||||
switch value := values[key].(type) {
|
||||
case int:
|
||||
return value
|
||||
case int64:
|
||||
return int(value)
|
||||
case float64:
|
||||
return int(value)
|
||||
case string:
|
||||
parsed, err := strconv.Atoi(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return parsed
|
||||
default:
|
||||
return fallback
|
||||
}
|
||||
}
|
||||
|
||||
func stringConfig(values map[string]any, key string, fallback string) string {
|
||||
if values == nil {
|
||||
return fallback
|
||||
}
|
||||
value, ok := values[key]
|
||||
if !ok {
|
||||
return fallback
|
||||
}
|
||||
if text, ok := value.(string); ok {
|
||||
return text
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func stringSliceConfig(values map[string]any, key string) []string {
|
||||
if values == nil {
|
||||
return nil
|
||||
}
|
||||
value, ok := values[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
switch typed := value.(type) {
|
||||
case []string:
|
||||
return dedupeStrings(typed)
|
||||
case []any:
|
||||
out := []string{}
|
||||
for _, item := range typed {
|
||||
if text, ok := item.(string); ok {
|
||||
out = append(out, strings.TrimSpace(text))
|
||||
}
|
||||
}
|
||||
return dedupeStrings(out)
|
||||
case string:
|
||||
parts := strings.Split(typed, ",")
|
||||
for index := range parts {
|
||||
parts[index] = strings.TrimSpace(parts[index])
|
||||
}
|
||||
return dedupeStrings(parts)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func intSliceConfig(values map[string]any, key string) []int {
|
||||
if values == nil {
|
||||
return nil
|
||||
}
|
||||
value, ok := values[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
add := func(out []int, item any) []int {
|
||||
switch typed := item.(type) {
|
||||
case int:
|
||||
if typed > 0 {
|
||||
out = append(out, typed)
|
||||
}
|
||||
case int64:
|
||||
if typed > 0 {
|
||||
out = append(out, int(typed))
|
||||
}
|
||||
case float64:
|
||||
if typed > 0 {
|
||||
out = append(out, int(typed))
|
||||
}
|
||||
case string:
|
||||
if parsed := intConfig(map[string]any{"value": typed}, "value", 0); parsed > 0 {
|
||||
out = append(out, parsed)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
out := []int{}
|
||||
switch typed := value.(type) {
|
||||
case []int:
|
||||
out = append(out, typed...)
|
||||
case []any:
|
||||
for _, item := range typed {
|
||||
out = add(out, item)
|
||||
}
|
||||
case string:
|
||||
for _, part := range strings.Split(typed, ",") {
|
||||
out = add(out, strings.TrimSpace(part))
|
||||
}
|
||||
default:
|
||||
out = add(out, typed)
|
||||
}
|
||||
seen := map[int]struct{}{}
|
||||
cleaned := make([]int, 0, len(out))
|
||||
for _, port := range out {
|
||||
if port <= 0 || port > 65535 {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[port]; ok {
|
||||
continue
|
||||
}
|
||||
seen[port] = struct{}{}
|
||||
cleaned = append(cleaned, port)
|
||||
}
|
||||
return cleaned
|
||||
}
|
||||
|
||||
func dedupeStrings(values []string) []string {
|
||||
out := []string{}
|
||||
seen := map[string]struct{}{}
|
||||
for _, value := range values {
|
||||
normalized := strings.TrimSpace(value)
|
||||
if normalized == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[normalized]; ok {
|
||||
continue
|
||||
}
|
||||
seen[normalized] = struct{}{}
|
||||
out = append(out, normalized)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func containsString(values []string, needle string) bool {
|
||||
for _, value := range values {
|
||||
if value == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func remoteWorkspaceAdapterChannels() []map[string]any {
|
||||
return []map[string]any{
|
||||
{"name": "input", "direction": "client_to_adapter", "reliability": "reliable_ordered", "priority": "critical", "droppable": true, "may_block_input": false},
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/client"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/webingress"
|
||||
)
|
||||
|
||||
func TestStubSupervisorReportsDegradedForEnabledWorkload(t *testing.T) {
|
||||
@@ -73,6 +74,245 @@ func TestStubSupervisorReportsBuiltinFabricServicesRunning(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorReportsVPNFabricOnlyContractsRunning(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "ipv4-egress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"pool_id": "us-los-angeles-ipv4",
|
||||
"region": "us-los-angeles",
|
||||
"allowed_cidrs": []any{"0.0.0.0/0"},
|
||||
"dns_servers": []any{"192.168.200.210"},
|
||||
},
|
||||
},
|
||||
{
|
||||
ServiceType: "vpn-client",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"exit_pool_id": "us-los-angeles-ipv4",
|
||||
"listen_tcp_ports": []any{443, "8443"},
|
||||
"listen_udp_ports": "443,51820",
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if len(statuses) != 2 {
|
||||
t.Fatalf("statuses length = %d", len(statuses))
|
||||
}
|
||||
for _, status := range statuses {
|
||||
if status.ReportedState != "running" {
|
||||
t.Fatalf("ReportedState = %q", status.ReportedState)
|
||||
}
|
||||
if status.StatusPayload["execution_mode"] != "contract_probe" {
|
||||
t.Fatalf("execution_mode = %v", status.StatusPayload["execution_mode"])
|
||||
}
|
||||
if status.StatusPayload["fabric_transport"] != "quic_only" {
|
||||
t.Fatalf("fabric_transport = %v", status.StatusPayload["fabric_transport"])
|
||||
}
|
||||
if status.StatusPayload["backend_relay_fallback"] != false {
|
||||
t.Fatalf("backend_relay_fallback = %v", status.StatusPayload["backend_relay_fallback"])
|
||||
}
|
||||
if status.StatusPayload["legacy_protocol_compatibility"] != false {
|
||||
t.Fatalf("legacy_protocol_compatibility = %v", status.StatusPayload["legacy_protocol_compatibility"])
|
||||
}
|
||||
}
|
||||
if statuses[0].StatusPayload["role"] != "ipv4-egress" || statuses[0].StatusPayload["internet_egress"] != true {
|
||||
t.Fatalf("ipv4 egress payload = %#v", statuses[0].StatusPayload)
|
||||
}
|
||||
if statuses[1].StatusPayload["role"] != "vpn-client" || statuses[1].StatusPayload["android_node_supported"] != true {
|
||||
t.Fatalf("vpn client payload = %#v", statuses[1].StatusPayload)
|
||||
}
|
||||
exitBinding := statuses[0].StatusPayload["service_binding"].(map[string]any)
|
||||
if exitBinding["type"] != "ipv4_egress" || exitBinding["accepts_from_fabric_only"] != true || exitBinding["exit_pool_id"] != "us-los-angeles-ipv4" {
|
||||
t.Fatalf("ipv4 egress binding = %#v", exitBinding)
|
||||
}
|
||||
clientBinding := statuses[1].StatusPayload["service_binding"].(map[string]any)
|
||||
if clientBinding["type"] != "local_ipv4_ingress" || clientBinding["preferred_exit_pool_id"] != "us-los-angeles-ipv4" || clientBinding["legacy_protocol_listener"] != false {
|
||||
t.Fatalf("vpn client binding = %#v", clientBinding)
|
||||
}
|
||||
if got := clientBinding["listen_tcp_ports"].([]int); len(got) != 2 || got[0] != 443 || got[1] != 8443 {
|
||||
t.Fatalf("listen_tcp_ports = %#v", got)
|
||||
}
|
||||
if got := clientBinding["listen_udp_ports"].([]int); len(got) != 2 || got[0] != 443 || got[1] != 51820 {
|
||||
t.Fatalf("listen_udp_ports = %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorReportsWebIngressContractReady(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "admin-ingress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"listen_http_port": 80,
|
||||
"listen_https_port": 443,
|
||||
"tls_mode": "terminate",
|
||||
"scope": "platform",
|
||||
"service_classes": []any{"platform_admin", "cluster_admin"},
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "running" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
if payload["reason"] != "web_ingress_contract_ready" ||
|
||||
payload["fabric_transport"] != "quic_only" ||
|
||||
payload["http_between_fabric_nodes"] != false ||
|
||||
payload["authority_service"] != false ||
|
||||
payload["real_listener_start_allowed"] != false ||
|
||||
payload["runtime_handler_ready"] != true ||
|
||||
payload["runtime_handler_payload_status"] != "fabric_service_channel_binding_not_implemented" ||
|
||||
payload["ports_opened_by_stub"] != false {
|
||||
t.Fatalf("unexpected payload: %#v", payload)
|
||||
}
|
||||
roles, ok := payload["runtime_roles_required"].([]string)
|
||||
if !ok || !containsString(roles, "global-admin-runtime") || !containsString(roles, "policy-authority") {
|
||||
t.Fatalf("runtime roles = %#v", payload["runtime_roles_required"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorBlocksWebIngressRealListenerWithoutRuntimeGate(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "admin-ingress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"listen_http_port": 80,
|
||||
"listen_https_port": 443,
|
||||
"tls_mode": "terminate",
|
||||
"scope": "platform",
|
||||
"service_classes": []any{"platform_admin"},
|
||||
"real_listener_enabled": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "degraded" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
if payload["reason"] != "web_ingress_real_listener_gate_disabled" ||
|
||||
payload["real_listener_requested"] != true ||
|
||||
payload["real_listener_runtime_enabled"] != false ||
|
||||
payload["real_listener_start_allowed"] != false ||
|
||||
payload["ports_opened_by_stub"] != false {
|
||||
t.Fatalf("unexpected payload: %#v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorAllowsWebIngressRealListenerGateButDoesNotOpenPorts(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test", WebIngressRuntimeEnabled: true}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "admin-ingress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"listen_http_port": 80,
|
||||
"listen_https_port": 443,
|
||||
"tls_mode": "terminate",
|
||||
"scope": "platform",
|
||||
"service_classes": []any{"platform_admin"},
|
||||
"real_listener_enabled": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "running" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
if payload["real_listener_requested"] != true ||
|
||||
payload["real_listener_runtime_enabled"] != true ||
|
||||
payload["real_listener_start_allowed"] != true ||
|
||||
payload["ports_opened_by_stub"] != false {
|
||||
t.Fatalf("unexpected payload: %#v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorStartsWebIngressManagerWhenRealListenerAllowed(t *testing.T) {
|
||||
manager := webingress.NewManager()
|
||||
statuses, err := (StubSupervisor{Version: "test", WebIngressRuntimeEnabled: true, WebIngressManager: manager}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "admin-ingress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"listen_http_port": 80,
|
||||
"listen_https_port": 443,
|
||||
"listen_http_addr": "127.0.0.1:0",
|
||||
"listen_https_addr": "127.0.0.1:0",
|
||||
"tls_mode": "terminate",
|
||||
"scope": "platform",
|
||||
"service_classes": []any{"platform_admin"},
|
||||
"real_listener_enabled": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "degraded" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
listenerStatus, ok := payload["listener_status"].(webingress.ListenerStatus)
|
||||
if !ok {
|
||||
t.Fatalf("listener_status = %#v", payload["listener_status"])
|
||||
}
|
||||
if !listenerStatus.HTTPRunning || listenerStatus.HTTPSRunning || listenerStatus.HTTPAddr == "" {
|
||||
t.Fatalf("listener status = %+v", listenerStatus)
|
||||
}
|
||||
if payload["reason"] != "web_ingress_listener_partial" || payload["ports_opened_by_runtime"] != true || payload["ports_opened_by_stub"] != false {
|
||||
t.Fatalf("payload = %#v", payload)
|
||||
}
|
||||
_ = manager.Stop(context.Background())
|
||||
}
|
||||
|
||||
func TestStubSupervisorBlocksInvalidWebIngressContract(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "public-ingress",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"listen_http_port": 8080,
|
||||
"listen_https_port": 443,
|
||||
"scope": "organization",
|
||||
"service_classes": []any{"platform_admin"},
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "degraded" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
if payload["reason"] != "web_ingress_contract_invalid" || payload["traffic"] != "blocked" {
|
||||
t.Fatalf("unexpected payload: %#v", payload)
|
||||
}
|
||||
missing, ok := payload["missing_checks"].([]string)
|
||||
if !ok || !containsString(missing, "listen_http_port_must_be_80") || !containsString(missing, "service_class_not_allowed:platform_admin") {
|
||||
t.Fatalf("missing checks = %#v", payload["missing_checks"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorKeepsUnsupportedEnabledWorkloadDegraded(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{Version: "test"}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{ServiceType: "rdp-worker", DesiredState: "enabled", RuntimeMode: "container"},
|
||||
|
||||
@@ -0,0 +1,189 @@
|
||||
package vpnruntime
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
)
|
||||
|
||||
type FabricSessionFrameWriter interface {
|
||||
SendFrame(context.Context, fabricproto.Frame) error
|
||||
}
|
||||
|
||||
type FabricSessionPacketPeerRegistry struct {
|
||||
mu sync.RWMutex
|
||||
peers map[string]FabricSessionPacketPeer
|
||||
}
|
||||
|
||||
type FabricSessionPacketPeer struct {
|
||||
VPNConnectionID string
|
||||
Sender FabricSessionFrameWriter
|
||||
StreamID uint64
|
||||
StreamIDsByTrafficClass map[string][]uint64
|
||||
RegisteredAt time.Time
|
||||
LastPacketAt time.Time
|
||||
}
|
||||
|
||||
type FabricSessionPacketPeerTransport struct {
|
||||
Registry *FabricSessionPacketPeerRegistry
|
||||
Inbox *FabricPacketInbox
|
||||
VPNConnectionID string
|
||||
}
|
||||
|
||||
func NewFabricSessionPacketPeerRegistry() *FabricSessionPacketPeerRegistry {
|
||||
return &FabricSessionPacketPeerRegistry{peers: map[string]FabricSessionPacketPeer{}}
|
||||
}
|
||||
|
||||
func (r *FabricSessionPacketPeerRegistry) RegisterFrame(ctx context.Context, sender FabricSessionFrameWriter, frame fabricproto.Frame) (bool, error) {
|
||||
if r == nil || sender == nil || frame.Type != fabricproto.FrameData || frame.StreamID == 0 {
|
||||
return false, nil
|
||||
}
|
||||
payload, err := DecodeFabricVPNPacketDataFrame(frame)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
if payload.VPNConnectionID == "" {
|
||||
return false, nil
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
r.mu.Lock()
|
||||
if r.peers == nil {
|
||||
r.peers = map[string]FabricSessionPacketPeer{}
|
||||
}
|
||||
peer := r.peers[payload.VPNConnectionID]
|
||||
if peer.RegisteredAt.IsZero() {
|
||||
peer.RegisteredAt = now
|
||||
}
|
||||
peer.VPNConnectionID = payload.VPNConnectionID
|
||||
peer.Sender = sender
|
||||
peer.StreamID = frame.StreamID
|
||||
peer.LastPacketAt = now
|
||||
if peer.StreamIDsByTrafficClass == nil {
|
||||
peer.StreamIDsByTrafficClass = map[string][]uint64{}
|
||||
}
|
||||
trafficClass := fabricSessionTrafficClassName(frame.TrafficClass)
|
||||
if trafficClass != "" && !containsUint64(peer.StreamIDsByTrafficClass[trafficClass], frame.StreamID) {
|
||||
peer.StreamIDsByTrafficClass[trafficClass] = append(peer.StreamIDsByTrafficClass[trafficClass], frame.StreamID)
|
||||
}
|
||||
r.peers[payload.VPNConnectionID] = peer
|
||||
r.mu.Unlock()
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *FabricSessionPacketPeerRegistry) TransportFor(vpnConnectionID string, inbox *FabricPacketInbox) PacketTransport {
|
||||
if r == nil || inbox == nil || vpnConnectionID == "" {
|
||||
return nil
|
||||
}
|
||||
r.mu.RLock()
|
||||
peer, ok := r.peers[vpnConnectionID]
|
||||
r.mu.RUnlock()
|
||||
if !ok || peer.Sender == nil || peer.StreamID == 0 {
|
||||
return nil
|
||||
}
|
||||
return &FabricSessionPacketTransport{
|
||||
Sender: fabricSessionFrameWriterAdapter{writer: peer.Sender},
|
||||
Inbox: inbox,
|
||||
StreamID: peer.StreamID,
|
||||
StreamIDsByTrafficClass: copyStreamIDsByClass(peer.StreamIDsByTrafficClass),
|
||||
VPNConnectionID: vpnConnectionID,
|
||||
SendDirection: FabricDirectionGatewayToClient,
|
||||
ReceiveDirection: FabricDirectionClientToGateway,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *FabricSessionPacketPeerTransport) SendGatewayPacketBatch(ctx context.Context, packets [][]byte) error {
|
||||
if t == nil || t.Registry == nil || t.Inbox == nil || t.VPNConnectionID == "" {
|
||||
return mesh.ErrForwardRuntimeUnavailable
|
||||
}
|
||||
transport := t.Registry.TransportFor(t.VPNConnectionID, t.Inbox)
|
||||
if transport == nil {
|
||||
return mesh.ErrForwardRuntimeUnavailable
|
||||
}
|
||||
return transport.SendGatewayPacketBatch(ctx, packets)
|
||||
}
|
||||
|
||||
func (t *FabricSessionPacketPeerTransport) ReceiveGatewayPacketBatch(ctx context.Context, timeout time.Duration) ([][]byte, error) {
|
||||
if t == nil || t.Inbox == nil || t.VPNConnectionID == "" {
|
||||
return nil, mesh.ErrForwardRuntimeUnavailable
|
||||
}
|
||||
return t.Inbox.Receive(ctx, t.VPNConnectionID, FabricDirectionClientToGateway, timeout)
|
||||
}
|
||||
|
||||
func (t *FabricSessionPacketPeerTransport) Snapshot() map[string]any {
|
||||
if t == nil {
|
||||
return map[string]any{
|
||||
"transport": "fabric_session_peer_dynamic",
|
||||
"peer_ready": false,
|
||||
}
|
||||
}
|
||||
ready := 0
|
||||
if t.Registry != nil {
|
||||
if transport := t.Registry.TransportFor(t.VPNConnectionID, t.Inbox); transport != nil {
|
||||
ready = 1
|
||||
}
|
||||
}
|
||||
return map[string]any{
|
||||
"transport": "fabric_session_peer_dynamic",
|
||||
"vpn_connection_id": t.VPNConnectionID,
|
||||
"peer_ready": ready == 1,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *FabricSessionPacketPeerRegistry) Snapshot() map[string]any {
|
||||
if r == nil {
|
||||
return map[string]any{"ready": 0}
|
||||
}
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
out := map[string]any{"ready": len(r.peers)}
|
||||
items := make([]map[string]any, 0, len(r.peers))
|
||||
for _, peer := range r.peers {
|
||||
item := map[string]any{
|
||||
"vpn_connection_id": peer.VPNConnectionID,
|
||||
"stream_id": peer.StreamID,
|
||||
}
|
||||
if !peer.RegisteredAt.IsZero() {
|
||||
item["registered_at"] = peer.RegisteredAt.Format(time.RFC3339Nano)
|
||||
}
|
||||
if !peer.LastPacketAt.IsZero() {
|
||||
item["last_packet_at"] = peer.LastPacketAt.Format(time.RFC3339Nano)
|
||||
}
|
||||
items = append(items, item)
|
||||
}
|
||||
out["peers"] = items
|
||||
return out
|
||||
}
|
||||
|
||||
type fabricSessionFrameWriterAdapter struct {
|
||||
writer FabricSessionFrameWriter
|
||||
}
|
||||
|
||||
func (a fabricSessionFrameWriterAdapter) Send(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if a.writer == nil {
|
||||
return mesh.ErrForwardRuntimeUnavailable
|
||||
}
|
||||
return a.writer.SendFrame(ctx, frame)
|
||||
}
|
||||
|
||||
func containsUint64(values []uint64, value uint64) bool {
|
||||
for _, item := range values {
|
||||
if item == value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func copyStreamIDsByClass(values map[string][]uint64) map[string][]uint64 {
|
||||
if len(values) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string][]uint64, len(values))
|
||||
for key, ids := range values {
|
||||
out[key] = append([]uint64(nil), ids...)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -130,11 +130,14 @@ func (t *FabricSessionPacketTransport) ReceiveGatewayPacketBatch(ctx context.Con
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
if packets, receiveErr := t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 100*time.Millisecond); receiveErr != nil || len(packets) > 0 {
|
||||
return packets, receiveErr
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
case frame, ok := <-frames:
|
||||
if !ok {
|
||||
return t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 5*time.Millisecond)
|
||||
return t.Inbox.Receive(ctx, t.VPNConnectionID, direction, 100*time.Millisecond)
|
||||
}
|
||||
if frame.Type != fabricproto.FrameData || !t.acceptsStream(frame.StreamID) {
|
||||
continue
|
||||
|
||||
@@ -426,6 +426,59 @@ func TestFabricSessionPacketTransportRunFrameIngressDeliversInbox(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricSessionPacketPeerTransportSendsReplyToLatestRegisteredPeer(t *testing.T) {
|
||||
inbox := NewFabricPacketInbox(4)
|
||||
registry := NewFabricSessionPacketPeerRegistry()
|
||||
sender := &recordingFrameSender{}
|
||||
frame, err := NewFabricVPNPacketDataFrame(FabricVPNPacketFrameInput{
|
||||
StreamID: 7,
|
||||
VPNConnectionID: "vpn-1",
|
||||
Direction: FabricDirectionClientToGateway,
|
||||
Packets: [][]byte{[]byte("request")},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("frame: %v", err)
|
||||
}
|
||||
handled, err := registry.RegisterFrame(context.Background(), sender, frame)
|
||||
if err != nil || !handled {
|
||||
t.Fatalf("register frame handled=%v err=%v", handled, err)
|
||||
}
|
||||
if err := inbox.DeliverFabricSessionFrame(context.Background(), frame); err != nil {
|
||||
t.Fatalf("deliver frame: %v", err)
|
||||
}
|
||||
transport := &FabricSessionPacketPeerTransport{
|
||||
Registry: registry,
|
||||
Inbox: inbox,
|
||||
VPNConnectionID: "vpn-1",
|
||||
}
|
||||
requests, err := transport.ReceiveGatewayPacketBatch(context.Background(), time.Second)
|
||||
if err != nil || len(requests) != 1 || string(requests[0]) != "request" {
|
||||
t.Fatalf("requests=%q err=%v", requests, err)
|
||||
}
|
||||
if err := transport.SendGatewayPacketBatch(context.Background(), [][]byte{[]byte("reply")}); err != nil {
|
||||
t.Fatalf("send reply: %v", err)
|
||||
}
|
||||
if len(sender.frames) != 1 {
|
||||
t.Fatalf("sent frames = %d, want 1", len(sender.frames))
|
||||
}
|
||||
payload, err := DecodeFabricVPNPacketDataFrame(sender.frames[0])
|
||||
if err != nil {
|
||||
t.Fatalf("decode reply: %v", err)
|
||||
}
|
||||
if payload.Direction != FabricDirectionGatewayToClient || string(payload.Packets[0]) != "reply" {
|
||||
t.Fatalf("reply payload = %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
type recordingFrameSender struct {
|
||||
frames []fabricproto.Frame
|
||||
}
|
||||
|
||||
func (s *recordingFrameSender) SendFrame(_ context.Context, frame fabricproto.Frame) error {
|
||||
s.frames = append(s.frames, frame)
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestFabricSessionPacketTransportReceiveReadsPumpFrames(t *testing.T) {
|
||||
inbox := NewFabricPacketInbox(4)
|
||||
receiver := memoryFabricSessionReceiver{
|
||||
|
||||
@@ -169,6 +169,9 @@ func (g *Gateway) Snapshot() map[string]any {
|
||||
|
||||
out := map[string]any{
|
||||
"running": running,
|
||||
"service_role": "ipv4-egress",
|
||||
"service_class": "vpn_packets",
|
||||
"adapter_contract": "fabric_channel_to_ipv4_nat",
|
||||
"transport": g.transportName(),
|
||||
"poll_timeout_ms": g.PollTimeout.Milliseconds(),
|
||||
"client_to_gateway_batches": g.clientToGatewayBatches.Load(),
|
||||
@@ -234,14 +237,7 @@ func (g *Gateway) setStopped(err error) {
|
||||
|
||||
func (g *Gateway) normalize() error {
|
||||
if g.Transport == nil {
|
||||
if g.API == nil {
|
||||
return fmt.Errorf("api client or packet transport is required")
|
||||
}
|
||||
g.Transport = BackendPacketTransport{
|
||||
API: g.API,
|
||||
ClusterID: g.ClusterID,
|
||||
VPNConnectionID: g.VPNConnectionID,
|
||||
}
|
||||
return fmt.Errorf("fabric packet transport is required; backend packet relay fallback is disabled")
|
||||
}
|
||||
if g.ClusterID == "" || g.VPNConnectionID == "" {
|
||||
return fmt.Errorf("cluster id and vpn connection id are required")
|
||||
|
||||
@@ -95,6 +95,30 @@ func TestGatewayRunClosesPacketTransportOnRuntimeError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGatewayNormalizeRejectsBackendPacketRelayFallback(t *testing.T) {
|
||||
gateway := &Gateway{
|
||||
API: nil,
|
||||
ClusterID: "cluster-1",
|
||||
VPNConnectionID: "vpn-1",
|
||||
}
|
||||
|
||||
err := gateway.normalize()
|
||||
if err == nil {
|
||||
t.Fatal("normalize succeeded without a fabric packet transport")
|
||||
}
|
||||
if got, want := err.Error(), "fabric packet transport is required; backend packet relay fallback is disabled"; got != want {
|
||||
t.Fatalf("normalize error = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGatewaySnapshotReportsIPv4EgressServiceAdapter(t *testing.T) {
|
||||
gateway := &Gateway{Transport: &recordingGatewayTransport{}, VPNConnectionID: "vpn-1"}
|
||||
snapshot := gateway.Snapshot()
|
||||
if snapshot["service_role"] != "ipv4-egress" || snapshot["service_class"] != "vpn_packets" || snapshot["adapter_contract"] != "fabric_channel_to_ipv4_nat" {
|
||||
t.Fatalf("unexpected gateway service snapshot: %#v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGatewayUploadPrioritizesTCPControlPackets(t *testing.T) {
|
||||
transport := &recordingGatewayTransport{}
|
||||
gateway := &Gateway{Transport: transport, VPNConnectionID: "vpn-1"}
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const AdminRuntimeResponseSchema = "rap.web_ingress.admin_runtime_response.v1"
|
||||
const ControlAPIProjectionRequestSchema = "rap.web_ingress.control_api_projection_request.v1"
|
||||
const ControlAPIProjectionResponseSchema = "rap.web_ingress.control_api_projection_response.v1"
|
||||
|
||||
type AdminRuntimeDispatcher struct {
|
||||
ProjectionClient ControlAPIProjectionClient
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type ControlAPIProjectionClient interface {
|
||||
Project(ctx context.Context, request ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error)
|
||||
}
|
||||
|
||||
type ControlAPIProjectionRequest struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Query string `json:"query,omitempty"`
|
||||
Host string `json:"host,omitempty"`
|
||||
Scope string `json:"scope"`
|
||||
ServiceClass string `json:"service_class"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
}
|
||||
|
||||
type ControlAPIProjectionResponse struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Headers map[string]string `json:"headers,omitempty"`
|
||||
Body json.RawMessage `json:"body,omitempty"`
|
||||
}
|
||||
|
||||
type AdminRuntimeJSONResponse struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Scope string `json:"scope,omitempty"`
|
||||
ServiceClass string `json:"service_class,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
Manifest map[string]any `json:"manifest,omitempty"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
}
|
||||
|
||||
func (d AdminRuntimeDispatcher) HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
method := strings.ToUpper(strings.TrimSpace(request.Method))
|
||||
path := normalizeRuntimePath(request.Path)
|
||||
if method == "" {
|
||||
method = http.MethodGet
|
||||
}
|
||||
if !allowedAdminRuntimeScope(strings.TrimSpace(request.Scope), strings.TrimSpace(request.ServiceClass)) {
|
||||
return d.json(http.StatusForbidden, request, "blocked", "admin_runtime_scope_rejected", nil), nil
|
||||
}
|
||||
switch {
|
||||
case method == http.MethodGet && (path == "/healthz" || path == "/readyz"):
|
||||
return d.json(http.StatusOK, request, "ready", "admin_runtime_ready", nil), nil
|
||||
case d.ProjectionClient != nil && (method == http.MethodGet || method == http.MethodHead):
|
||||
return d.project(ctx, request)
|
||||
case method == http.MethodGet && (path == "/ui-manifest" || strings.HasSuffix(path, "/ui-manifest")):
|
||||
return d.json(http.StatusOK, request, "ready", "ui_manifest_ready", d.manifest(request)), nil
|
||||
case method != http.MethodGet && method != http.MethodHead:
|
||||
return d.json(http.StatusForbidden, request, "blocked", "control_api_mutation_binding_not_implemented", nil), nil
|
||||
default:
|
||||
return d.json(http.StatusNotImplemented, request, "blocked", "control_api_projection_binding_not_implemented", nil), nil
|
||||
}
|
||||
}
|
||||
|
||||
func allowedAdminRuntimeScope(scope string, serviceClass string) bool {
|
||||
switch serviceClass {
|
||||
case "platform_admin":
|
||||
return scope == "platform"
|
||||
case "cluster_admin":
|
||||
return scope == "cluster"
|
||||
case "organization_portal":
|
||||
return scope == "organization"
|
||||
case "user_portal":
|
||||
return scope == "user" || scope == "organization"
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (d AdminRuntimeDispatcher) project(ctx context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
response, err := d.ProjectionClient.Project(ctx, ControlAPIProjectionRequest{
|
||||
SchemaVersion: ControlAPIProjectionRequestSchema,
|
||||
Method: strings.ToUpper(strings.TrimSpace(request.Method)),
|
||||
Path: normalizeRuntimePath(request.Path),
|
||||
Query: request.Query,
|
||||
Host: request.Host,
|
||||
Scope: request.Scope,
|
||||
ServiceClass: request.ServiceClass,
|
||||
ObservedAt: d.observedAt(),
|
||||
})
|
||||
if err != nil {
|
||||
return d.json(http.StatusBadGateway, request, "blocked", "control_api_projection_failed", nil), nil
|
||||
}
|
||||
if response.SchemaVersion != ControlAPIProjectionResponseSchema {
|
||||
return d.json(http.StatusBadGateway, request, "blocked", "control_api_projection_invalid_response", nil), nil
|
||||
}
|
||||
headers := http.Header{"Content-Type": []string{"application/json"}}
|
||||
for key, value := range response.Headers {
|
||||
if safeResponseHeader(key) && strings.TrimSpace(value) != "" {
|
||||
headers.Set(key, value)
|
||||
}
|
||||
}
|
||||
statusCode := response.StatusCode
|
||||
if statusCode < 100 || statusCode > 599 {
|
||||
statusCode = http.StatusOK
|
||||
}
|
||||
return FabricResponse{StatusCode: statusCode, Headers: headers, Body: append([]byte(nil), response.Body...)}, nil
|
||||
}
|
||||
|
||||
func (d AdminRuntimeDispatcher) json(statusCode int, request FabricRequest, status string, reason string, manifest map[string]any) FabricResponse {
|
||||
payload, _ := json.Marshal(AdminRuntimeJSONResponse{
|
||||
SchemaVersion: AdminRuntimeResponseSchema,
|
||||
Status: status,
|
||||
Reason: reason,
|
||||
Scope: request.Scope,
|
||||
ServiceClass: request.ServiceClass,
|
||||
Path: request.Path,
|
||||
Manifest: manifest,
|
||||
ObservedAt: d.observedAt(),
|
||||
})
|
||||
return FabricResponse{
|
||||
StatusCode: statusCode,
|
||||
Headers: http.Header{"Content-Type": []string{"application/json"}},
|
||||
Body: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func (d AdminRuntimeDispatcher) manifest(request FabricRequest) map[string]any {
|
||||
serviceClass := strings.TrimSpace(request.ServiceClass)
|
||||
sections := []string{}
|
||||
actions := []string{}
|
||||
switch serviceClass {
|
||||
case "platform_admin":
|
||||
sections = []string{"clusters", "nodes", "roles", "fabric", "workloads", "audit"}
|
||||
actions = []string{"read_platform_summary", "read_cluster_summaries", "read_node_status"}
|
||||
case "cluster_admin":
|
||||
sections = []string{"cluster", "nodes", "fabric", "workloads", "audit"}
|
||||
actions = []string{"read_cluster_summary", "read_node_status"}
|
||||
case "organization_portal":
|
||||
sections = []string{"organization", "sessions", "resources", "audit"}
|
||||
actions = []string{"read_organization_summary", "read_sessions"}
|
||||
case "user_portal":
|
||||
sections = []string{"profile", "sessions", "resources"}
|
||||
actions = []string{"read_profile", "read_sessions"}
|
||||
default:
|
||||
sections = []string{"status"}
|
||||
actions = []string{"read_status"}
|
||||
}
|
||||
return map[string]any{
|
||||
"schema_version": "rap.web_ingress.ui_manifest.v1",
|
||||
"scope": request.Scope,
|
||||
"service_class": serviceClass,
|
||||
"sections": sections,
|
||||
"allowed_actions": actions,
|
||||
"mutation_enabled": false,
|
||||
"projection_binding": "control_api_not_bound",
|
||||
}
|
||||
}
|
||||
|
||||
func (d AdminRuntimeDispatcher) observedAt() string {
|
||||
now := time.Now().UTC()
|
||||
if d.Now != nil {
|
||||
now = d.Now().UTC()
|
||||
}
|
||||
return now.Format(time.RFC3339Nano)
|
||||
}
|
||||
|
||||
func normalizeRuntimePath(path string) string {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
return "/"
|
||||
}
|
||||
if !strings.HasPrefix(path, "/") {
|
||||
path = "/" + path
|
||||
}
|
||||
return path
|
||||
}
|
||||
@@ -0,0 +1,212 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAdminRuntimeDispatcherReturnsHealthAndManifest(t *testing.T) {
|
||||
dispatcher := AdminRuntimeDispatcher{Now: fixedEnvelopeNow}
|
||||
|
||||
health, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/readyz",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("health: %v", err)
|
||||
}
|
||||
if health.StatusCode != http.StatusOK {
|
||||
t.Fatalf("health = %+v", health)
|
||||
}
|
||||
|
||||
manifest, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/ui-manifest",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("manifest: %v", err)
|
||||
}
|
||||
var payload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(manifest.Body, &payload); err != nil {
|
||||
t.Fatalf("decode manifest: %v", err)
|
||||
}
|
||||
if manifest.StatusCode != http.StatusOK ||
|
||||
payload.SchemaVersion != AdminRuntimeResponseSchema ||
|
||||
payload.Status != "ready" ||
|
||||
payload.Reason != "ui_manifest_ready" ||
|
||||
payload.Manifest["schema_version"] != "rap.web_ingress.ui_manifest.v1" ||
|
||||
payload.Manifest["mutation_enabled"] != false {
|
||||
t.Fatalf("payload = %+v status=%d", payload, manifest.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeDispatcherBlocksMutationsAndUnknownProjection(t *testing.T) {
|
||||
dispatcher := AdminRuntimeDispatcher{Now: fixedEnvelopeNow}
|
||||
|
||||
mutation, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodPost,
|
||||
Path: "/platform-admin/nodes",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("mutation: %v", err)
|
||||
}
|
||||
var mutationPayload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(mutation.Body, &mutationPayload); err != nil {
|
||||
t.Fatalf("decode mutation: %v", err)
|
||||
}
|
||||
if mutation.StatusCode != http.StatusForbidden || mutationPayload.Reason != "control_api_mutation_binding_not_implemented" {
|
||||
t.Fatalf("mutation payload = %+v status=%d", mutationPayload, mutation.StatusCode)
|
||||
}
|
||||
|
||||
projection, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/nodes",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("projection: %v", err)
|
||||
}
|
||||
var projectionPayload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(projection.Body, &projectionPayload); err != nil {
|
||||
t.Fatalf("decode projection: %v", err)
|
||||
}
|
||||
if projection.StatusCode != http.StatusNotImplemented || projectionPayload.Reason != "control_api_projection_binding_not_implemented" {
|
||||
t.Fatalf("projection payload = %+v status=%d", projectionPayload, projection.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeDispatcherRejectsInvalidScopeClassPair(t *testing.T) {
|
||||
dispatcher := AdminRuntimeDispatcher{ProjectionClient: &recordingProjectionClient{}, Now: fixedEnvelopeNow}
|
||||
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/ui-manifest",
|
||||
Scope: "organization",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("projection: %v", err)
|
||||
}
|
||||
var payload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(response.Body, &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusForbidden || payload.Reason != "admin_runtime_scope_rejected" {
|
||||
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeDispatcherUsesControlAPIProjectionClientForReadRequests(t *testing.T) {
|
||||
client := &recordingProjectionClient{
|
||||
response: ControlAPIProjectionResponse{
|
||||
SchemaVersion: ControlAPIProjectionResponseSchema,
|
||||
Status: "ready",
|
||||
StatusCode: http.StatusOK,
|
||||
Headers: map[string]string{"X-RAP-Projection": "control-api", "Set-Cookie": "blocked"},
|
||||
Body: json.RawMessage(`{"schema_version":"control.projection.v1","ok":true}`),
|
||||
},
|
||||
}
|
||||
dispatcher := AdminRuntimeDispatcher{ProjectionClient: client, Now: fixedEnvelopeNow}
|
||||
|
||||
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/nodes",
|
||||
Query: "limit=10",
|
||||
Host: "admin.example.test",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("projection: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusOK ||
|
||||
response.Headers.Get("X-RAP-Projection") != "control-api" ||
|
||||
response.Headers.Get("Set-Cookie") != "" ||
|
||||
string(response.Body) != `{"schema_version":"control.projection.v1","ok":true}` {
|
||||
t.Fatalf("response = %+v body=%s", response, string(response.Body))
|
||||
}
|
||||
if client.request.Path != "/platform-admin/nodes" ||
|
||||
client.request.Query != "limit=10" ||
|
||||
client.request.Scope != "platform" ||
|
||||
client.request.ServiceClass != "platform_admin" {
|
||||
t.Fatalf("request = %+v", client.request)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeDispatcherReportsProjectionClientFailure(t *testing.T) {
|
||||
dispatcher := AdminRuntimeDispatcher{ProjectionClient: failingProjectionClient{}, Now: fixedEnvelopeNow}
|
||||
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/nodes",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("projection: %v", err)
|
||||
}
|
||||
var payload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(response.Body, &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusBadGateway || payload.Reason != "control_api_projection_failed" {
|
||||
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminRuntimeDispatcherRejectsInvalidProjectionResponseSchema(t *testing.T) {
|
||||
dispatcher := AdminRuntimeDispatcher{
|
||||
ProjectionClient: &recordingProjectionClient{
|
||||
response: ControlAPIProjectionResponse{
|
||||
SchemaVersion: "wrong.schema",
|
||||
Status: "ready",
|
||||
StatusCode: http.StatusOK,
|
||||
Body: json.RawMessage(`{"ok":true}`),
|
||||
},
|
||||
},
|
||||
Now: fixedEnvelopeNow,
|
||||
}
|
||||
response, err := dispatcher.HandleFabricRequest(context.Background(), FabricRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/nodes",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("projection: %v", err)
|
||||
}
|
||||
var payload AdminRuntimeJSONResponse
|
||||
if err := json.Unmarshal(response.Body, &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusBadGateway || payload.Reason != "control_api_projection_invalid_response" {
|
||||
t.Fatalf("payload = %+v status=%d", payload, response.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
type recordingProjectionClient struct {
|
||||
request ControlAPIProjectionRequest
|
||||
response ControlAPIProjectionResponse
|
||||
}
|
||||
|
||||
func (c *recordingProjectionClient) Project(_ context.Context, request ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error) {
|
||||
c.request = request
|
||||
return c.response, nil
|
||||
}
|
||||
|
||||
type failingProjectionClient struct{}
|
||||
|
||||
func (failingProjectionClient) Project(context.Context, ControlAPIProjectionRequest) (ControlAPIProjectionResponse, error) {
|
||||
return ControlAPIProjectionResponse{}, errTestProjectionFailure{}
|
||||
}
|
||||
|
||||
type errTestProjectionFailure struct{}
|
||||
|
||||
func (errTestProjectionFailure) Error() string { return "projection failed" }
|
||||
@@ -0,0 +1,151 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const FabricServiceChannelEnvelopeSchema = "rap.web_ingress.fabric_service_channel_envelope.v1"
|
||||
|
||||
var (
|
||||
ErrFabricEnvelopeSignerRequired = errors.New("web ingress fabric envelope signer required")
|
||||
ErrFabricEnvelopeSenderRequired = errors.New("web ingress fabric envelope sender required")
|
||||
ErrFabricEnvelopeScopeRequired = errors.New("web ingress fabric envelope scope required")
|
||||
ErrFabricEnvelopeClassRequired = errors.New("web ingress fabric envelope service class required")
|
||||
)
|
||||
|
||||
type EnvelopeSigner interface {
|
||||
Sign(ctx context.Context, canonical []byte) (FabricEnvelopeSignature, error)
|
||||
}
|
||||
|
||||
type EnvelopeSender interface {
|
||||
Send(ctx context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error)
|
||||
}
|
||||
|
||||
type DefaultFabricBinder struct {
|
||||
Signer EnvelopeSigner
|
||||
Sender EnvelopeSender
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type FabricServiceChannelEnvelope struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
RequestSchema string `json:"request_schema"`
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Query string `json:"query,omitempty"`
|
||||
Host string `json:"host"`
|
||||
ServiceType string `json:"service_type"`
|
||||
Scope string `json:"scope"`
|
||||
ServiceClass string `json:"service_class"`
|
||||
Headers map[string][]string `json:"headers,omitempty"`
|
||||
BodyBase64 string `json:"body_b64,omitempty"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
EnvelopedAt string `json:"enveloped_at"`
|
||||
}
|
||||
|
||||
type FabricEnvelopeSignature struct {
|
||||
KeyID string `json:"key_id"`
|
||||
Alg string `json:"alg"`
|
||||
Signature string `json:"signature"`
|
||||
SignedAt string `json:"signed_at,omitempty"`
|
||||
}
|
||||
|
||||
type SignedFabricServiceChannelEnvelope struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Envelope FabricServiceChannelEnvelope `json:"envelope"`
|
||||
Signature FabricEnvelopeSignature `json:"signature"`
|
||||
Canonical []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (b DefaultFabricBinder) Forward(ctx context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
if b.Signer == nil {
|
||||
return FabricResponse{}, ErrFabricEnvelopeSignerRequired
|
||||
}
|
||||
if b.Sender == nil {
|
||||
return FabricResponse{}, ErrFabricEnvelopeSenderRequired
|
||||
}
|
||||
if strings.TrimSpace(request.Scope) == "" {
|
||||
return FabricResponse{}, ErrFabricEnvelopeScopeRequired
|
||||
}
|
||||
if strings.TrimSpace(request.ServiceClass) == "" {
|
||||
return FabricResponse{}, ErrFabricEnvelopeClassRequired
|
||||
}
|
||||
|
||||
envelope := b.envelope(request)
|
||||
canonical, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
signature, err := b.Signer.Sign(ctx, canonical)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
return b.Sender.Send(ctx, SignedFabricServiceChannelEnvelope{
|
||||
SchemaVersion: SignedFabricServiceChannelEnvelopeSchema,
|
||||
Envelope: envelope,
|
||||
Signature: signature,
|
||||
Canonical: canonical,
|
||||
})
|
||||
}
|
||||
|
||||
func (b DefaultFabricBinder) envelope(request FabricRequest) FabricServiceChannelEnvelope {
|
||||
now := time.Now().UTC()
|
||||
if b.Now != nil {
|
||||
now = b.Now().UTC()
|
||||
}
|
||||
observedAt := request.ObservedAt.UTC()
|
||||
if observedAt.IsZero() {
|
||||
observedAt = now
|
||||
}
|
||||
return FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
RequestSchema: strings.TrimSpace(request.SchemaVersion),
|
||||
Method: strings.ToUpper(strings.TrimSpace(request.Method)),
|
||||
Path: request.Path,
|
||||
Query: request.Query,
|
||||
Host: strings.TrimSpace(request.Host),
|
||||
ServiceType: strings.TrimSpace(request.ServiceType),
|
||||
Scope: strings.TrimSpace(request.Scope),
|
||||
ServiceClass: strings.TrimSpace(request.ServiceClass),
|
||||
Headers: canonicalHeaders(request.Headers),
|
||||
BodyBase64: base64.StdEncoding.EncodeToString(request.Body),
|
||||
ObservedAt: observedAt.Format(time.RFC3339Nano),
|
||||
EnvelopedAt: now.Format(time.RFC3339Nano),
|
||||
}
|
||||
}
|
||||
|
||||
func canonicalHeaders(headers http.Header) map[string][]string {
|
||||
if len(headers) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := map[string][]string{}
|
||||
for key, values := range headers {
|
||||
canonicalKey := http.CanonicalHeaderKey(strings.TrimSpace(key))
|
||||
if canonicalKey == "" || !safeRequestHeader(canonicalKey) {
|
||||
continue
|
||||
}
|
||||
copied := make([]string, 0, len(values))
|
||||
for _, value := range values {
|
||||
value = strings.TrimSpace(value)
|
||||
if value != "" {
|
||||
copied = append(copied, value)
|
||||
}
|
||||
}
|
||||
if len(copied) == 0 {
|
||||
continue
|
||||
}
|
||||
sort.Strings(copied)
|
||||
out[canonicalKey] = copied
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestDefaultFabricBinderBuildsSignedEnvelopeAndSendsIt(t *testing.T) {
|
||||
signer := &recordingEnvelopeSigner{
|
||||
signature: FabricEnvelopeSignature{KeyID: "node-key-1", Alg: "ed25519", Signature: "sig-1", SignedAt: "2026-05-17T00:00:02Z"},
|
||||
}
|
||||
sender := &recordingEnvelopeSender{
|
||||
response: FabricResponse{StatusCode: http.StatusAccepted, Body: []byte(`{"accepted":true}`)},
|
||||
}
|
||||
binder := DefaultFabricBinder{Signer: signer, Sender: sender, Now: fixedEnvelopeNow}
|
||||
|
||||
response, err := binder.Forward(context.Background(), FabricRequest{
|
||||
SchemaVersion: "rap.web_ingress.fabric_request.v1",
|
||||
Method: "post",
|
||||
Path: "/platform-admin/root",
|
||||
Query: "tab=nodes",
|
||||
Host: "admin.example.test",
|
||||
ServiceType: "admin-ingress",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
Headers: http.Header{
|
||||
"X-Trace-Id": []string{"trace-b", "trace-a"},
|
||||
"Authorization": []string{"Bearer should-not-forward"},
|
||||
"X-Empty-Header": []string{" "},
|
||||
},
|
||||
Body: []byte(`{"hello":"world"}`),
|
||||
ObservedAt: fixedNow(),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Forward failed: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusAccepted {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
if len(signer.canonical) == 0 {
|
||||
t.Fatal("signer did not receive canonical envelope")
|
||||
}
|
||||
if !bytes.Equal(sender.envelope.Canonical, signer.canonical) {
|
||||
t.Fatalf("sender canonical does not match signer canonical")
|
||||
}
|
||||
if sender.envelope.SchemaVersion != "rap.web_ingress.signed_fabric_service_channel_envelope.v1" {
|
||||
t.Fatalf("signed schema = %q", sender.envelope.SchemaVersion)
|
||||
}
|
||||
if sender.envelope.Signature.KeyID != "node-key-1" || sender.envelope.Signature.Signature != "sig-1" {
|
||||
t.Fatalf("signature = %+v", sender.envelope.Signature)
|
||||
}
|
||||
|
||||
var canonical FabricServiceChannelEnvelope
|
||||
if err := json.Unmarshal(signer.canonical, &canonical); err != nil {
|
||||
t.Fatalf("decode canonical: %v", err)
|
||||
}
|
||||
if canonical.SchemaVersion != FabricServiceChannelEnvelopeSchema ||
|
||||
canonical.RequestSchema != "rap.web_ingress.fabric_request.v1" ||
|
||||
canonical.Method != http.MethodPost ||
|
||||
canonical.Scope != "platform" ||
|
||||
canonical.ServiceClass != "platform_admin" ||
|
||||
canonical.BodyBase64 != "eyJoZWxsbyI6IndvcmxkIn0=" ||
|
||||
canonical.ObservedAt != "2026-05-17T00:00:00Z" ||
|
||||
canonical.EnvelopedAt != "2026-05-17T00:00:01Z" {
|
||||
t.Fatalf("canonical envelope = %+v", canonical)
|
||||
}
|
||||
if got := canonical.Headers["X-Trace-Id"]; len(got) != 2 || got[0] != "trace-a" || got[1] != "trace-b" {
|
||||
t.Fatalf("canonical headers = %#v", canonical.Headers)
|
||||
}
|
||||
if canonical.Headers["Authorization"] != nil || canonical.Headers["X-Empty-Header"] != nil {
|
||||
t.Fatalf("unsafe/empty headers leaked: %#v", canonical.Headers)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultFabricBinderRequiresSignerAndSender(t *testing.T) {
|
||||
request := FabricRequest{Scope: "platform", ServiceClass: "platform_admin"}
|
||||
|
||||
_, err := (DefaultFabricBinder{Sender: &recordingEnvelopeSender{}}).Forward(context.Background(), request)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSignerRequired) {
|
||||
t.Fatalf("signer error = %v", err)
|
||||
}
|
||||
|
||||
_, err = (DefaultFabricBinder{Signer: &recordingEnvelopeSigner{}}).Forward(context.Background(), request)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSenderRequired) {
|
||||
t.Fatalf("sender error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultFabricBinderRequiresScopeAndServiceClass(t *testing.T) {
|
||||
binder := DefaultFabricBinder{Signer: &recordingEnvelopeSigner{}, Sender: &recordingEnvelopeSender{}}
|
||||
|
||||
_, err := binder.Forward(context.Background(), FabricRequest{ServiceClass: "platform_admin"})
|
||||
if !errors.Is(err, ErrFabricEnvelopeScopeRequired) {
|
||||
t.Fatalf("scope error = %v", err)
|
||||
}
|
||||
|
||||
_, err = binder.Forward(context.Background(), FabricRequest{Scope: "platform"})
|
||||
if !errors.Is(err, ErrFabricEnvelopeClassRequired) {
|
||||
t.Fatalf("class error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultFabricBinderPropagatesSignerAndSenderFailures(t *testing.T) {
|
||||
signerErr := errors.New("sign failed")
|
||||
senderErr := errors.New("send failed")
|
||||
request := FabricRequest{Scope: "platform", ServiceClass: "platform_admin"}
|
||||
|
||||
_, err := (DefaultFabricBinder{
|
||||
Signer: &recordingEnvelopeSigner{err: signerErr},
|
||||
Sender: &recordingEnvelopeSender{},
|
||||
}).Forward(context.Background(), request)
|
||||
if !errors.Is(err, signerErr) {
|
||||
t.Fatalf("signer error = %v", err)
|
||||
}
|
||||
|
||||
_, err = (DefaultFabricBinder{
|
||||
Signer: &recordingEnvelopeSigner{},
|
||||
Sender: &recordingEnvelopeSender{err: senderErr},
|
||||
}).Forward(context.Background(), request)
|
||||
if !errors.Is(err, senderErr) {
|
||||
t.Fatalf("sender error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func fixedEnvelopeNow() time.Time {
|
||||
return time.Date(2026, 5, 17, 0, 0, 1, 0, time.UTC)
|
||||
}
|
||||
|
||||
type recordingEnvelopeSigner struct {
|
||||
canonical []byte
|
||||
signature FabricEnvelopeSignature
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *recordingEnvelopeSigner) Sign(_ context.Context, canonical []byte) (FabricEnvelopeSignature, error) {
|
||||
s.canonical = append([]byte{}, canonical...)
|
||||
if s.err != nil {
|
||||
return FabricEnvelopeSignature{}, s.err
|
||||
}
|
||||
if s.signature.KeyID == "" {
|
||||
s.signature = FabricEnvelopeSignature{KeyID: "test-key", Alg: "ed25519", Signature: "test-signature"}
|
||||
}
|
||||
return s.signature, nil
|
||||
}
|
||||
|
||||
type recordingEnvelopeSender struct {
|
||||
envelope SignedFabricServiceChannelEnvelope
|
||||
response FabricResponse
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *recordingEnvelopeSender) Send(_ context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error) {
|
||||
s.envelope = envelope
|
||||
if s.err != nil {
|
||||
return FabricResponse{}, s.err
|
||||
}
|
||||
return s.response, nil
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TrustedKeyConfig struct {
|
||||
KeyID string `json:"key_id"`
|
||||
PublicKey string `json:"public_key"`
|
||||
}
|
||||
|
||||
func ParseTrustedKeysJSON(value string) (StaticEnvelopeKeyResolver, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return nil, nil
|
||||
}
|
||||
resolver := StaticEnvelopeKeyResolver{}
|
||||
var byID map[string]string
|
||||
if err := json.Unmarshal([]byte(value), &byID); err == nil && len(byID) > 0 {
|
||||
for keyID, publicKeyB64 := range byID {
|
||||
if err := resolver.addBase64(keyID, publicKeyB64); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return resolver, nil
|
||||
}
|
||||
var list []TrustedKeyConfig
|
||||
if err := json.Unmarshal([]byte(value), &list); err != nil {
|
||||
return nil, fmt.Errorf("%w: trusted keys json must be object or array", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
for _, item := range list {
|
||||
if err := resolver.addBase64(item.KeyID, item.PublicKey); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return resolver, nil
|
||||
}
|
||||
|
||||
func (r StaticEnvelopeKeyResolver) addBase64(keyID string, publicKeyB64 string) error {
|
||||
keyID = strings.TrimSpace(keyID)
|
||||
if keyID == "" {
|
||||
return fmt.Errorf("%w: trusted key id required", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
decoded, err := decodeEnvelopeBase64(strings.TrimSpace(publicKeyB64))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: trusted public key must be base64 encoded", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
if len(decoded) != ed25519.PublicKeySize {
|
||||
return fmt.Errorf("%w: trusted public key must decode to %d bytes", ErrFabricEnvelopeSignatureInvalid, ed25519.PublicKeySize)
|
||||
}
|
||||
r[keyID] = append(ed25519.PublicKey(nil), decoded...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func TrustedKeysJSONForPublicKey(keyID string, publicKey ed25519.PublicKey) string {
|
||||
payload, _ := json.Marshal(map[string]string{
|
||||
strings.TrimSpace(keyID): base64.StdEncoding.EncodeToString(publicKey),
|
||||
})
|
||||
return string(payload)
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseTrustedKeysJSONAcceptsMapAndArray(t *testing.T) {
|
||||
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
keyB64 := base64.StdEncoding.EncodeToString(publicKey)
|
||||
|
||||
resolver, err := ParseTrustedKeysJSON(`{"key-1":"` + keyB64 + `"}`)
|
||||
if err != nil {
|
||||
t.Fatalf("parse map: %v", err)
|
||||
}
|
||||
if got, ok, err := resolver.PublicKey(nil, "key-1"); err != nil || !ok || string(got) != string(publicKey) {
|
||||
t.Fatalf("map resolver got=%x ok=%t err=%v", got, ok, err)
|
||||
}
|
||||
|
||||
resolver, err = ParseTrustedKeysJSON(`[{"key_id":"key-2","public_key":"` + keyB64 + `"}]`)
|
||||
if err != nil {
|
||||
t.Fatalf("parse array: %v", err)
|
||||
}
|
||||
if _, ok, err := resolver.PublicKey(nil, "key-2"); err != nil || !ok {
|
||||
t.Fatalf("array resolver ok=%t err=%v", ok, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTrustedKeysJSONRejectsInvalidKeys(t *testing.T) {
|
||||
_, err := ParseTrustedKeysJSON(`{"":"abc"}`)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
|
||||
t.Fatalf("empty key err = %v", err)
|
||||
}
|
||||
|
||||
_, err = ParseTrustedKeysJSON(`{"key-1":"abc"}`)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
|
||||
t.Fatalf("bad public key err = %v", err)
|
||||
}
|
||||
|
||||
_, err = ParseTrustedKeysJSON(`not-json`)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
|
||||
t.Fatalf("bad json err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrustedKeysJSONForPublicKey(t *testing.T) {
|
||||
publicKey, _, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
resolver, err := ParseTrustedKeysJSON(TrustedKeysJSONForPublicKey("key-1", publicKey))
|
||||
if err != nil {
|
||||
t.Fatalf("parse generated json: %v", err)
|
||||
}
|
||||
if _, ok, err := resolver.PublicKey(nil, "key-1"); err != nil || !ok {
|
||||
t.Fatalf("generated resolver ok=%t err=%v", ok, err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ListenerConfig struct {
|
||||
RuntimeConfig
|
||||
HTTPAddr string
|
||||
HTTPSAddr string
|
||||
TLSCertFile string
|
||||
TLSKeyFile string
|
||||
Binder FabricBinder
|
||||
}
|
||||
|
||||
type ListenerStatus struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Running bool `json:"running"`
|
||||
HTTPRunning bool `json:"http_running"`
|
||||
HTTPSRunning bool `json:"https_running"`
|
||||
HTTPAddr string `json:"http_addr,omitempty"`
|
||||
HTTPSAddr string `json:"https_addr,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
http *http.Server
|
||||
https *http.Server
|
||||
status ListenerStatus
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
func NewManager() *Manager {
|
||||
return &Manager{now: time.Now}
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(ctx context.Context, cfg ListenerConfig) ListenerStatus {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
_ = m.stopLocked(ctx)
|
||||
|
||||
runtime := Runtime{Config: cfg.RuntimeConfig, Binder: cfg.Binder, Now: m.now}
|
||||
status := ListenerStatus{
|
||||
SchemaVersion: "rap.web_ingress.listener_status.v1",
|
||||
Reason: "started",
|
||||
ObservedAt: m.observedAt(),
|
||||
}
|
||||
errorsOut := []string{}
|
||||
if strings.TrimSpace(cfg.HTTPAddr) == "" {
|
||||
cfg.HTTPAddr = ":80"
|
||||
}
|
||||
if strings.TrimSpace(cfg.HTTPSAddr) == "" {
|
||||
cfg.HTTPSAddr = ":443"
|
||||
}
|
||||
if server, addr, err := startHTTPServer(ctx, cfg.HTTPAddr, runtime.HTTPHandler()); err == nil {
|
||||
m.http = server
|
||||
status.HTTPRunning = true
|
||||
status.HTTPAddr = addr
|
||||
} else {
|
||||
errorsOut = append(errorsOut, "http:"+err.Error())
|
||||
}
|
||||
if cfg.TLSCertFile == "" || cfg.TLSKeyFile == "" {
|
||||
errorsOut = append(errorsOut, "https:tls_cert_file_and_key_file_required")
|
||||
} else if server, addr, err := startHTTPSServer(ctx, cfg.HTTPSAddr, cfg.TLSCertFile, cfg.TLSKeyFile, runtime.HTTPSHandler()); err == nil {
|
||||
m.https = server
|
||||
status.HTTPSRunning = true
|
||||
status.HTTPSAddr = addr
|
||||
} else {
|
||||
errorsOut = append(errorsOut, "https:"+err.Error())
|
||||
}
|
||||
status.Running = status.HTTPRunning || status.HTTPSRunning
|
||||
if len(errorsOut) > 0 {
|
||||
status.Errors = errorsOut
|
||||
if status.Running {
|
||||
status.Reason = "partial"
|
||||
} else {
|
||||
status.Reason = "blocked"
|
||||
}
|
||||
}
|
||||
m.status = status
|
||||
return status
|
||||
}
|
||||
|
||||
func (m *Manager) Stop(ctx context.Context) ListenerStatus {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
_ = m.stopLocked(ctx)
|
||||
m.status = ListenerStatus{
|
||||
SchemaVersion: "rap.web_ingress.listener_status.v1",
|
||||
Reason: "stopped",
|
||||
ObservedAt: m.observedAt(),
|
||||
}
|
||||
return m.status
|
||||
}
|
||||
|
||||
func (m *Manager) Status() ListenerStatus {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.status.SchemaVersion == "" {
|
||||
return ListenerStatus{
|
||||
SchemaVersion: "rap.web_ingress.listener_status.v1",
|
||||
Reason: "not_started",
|
||||
ObservedAt: m.observedAt(),
|
||||
}
|
||||
}
|
||||
return m.status
|
||||
}
|
||||
|
||||
func (m *Manager) stopLocked(ctx context.Context) error {
|
||||
var out error
|
||||
if m.http != nil {
|
||||
out = errors.Join(out, m.http.Shutdown(ctx))
|
||||
m.http = nil
|
||||
}
|
||||
if m.https != nil {
|
||||
out = errors.Join(out, m.https.Shutdown(ctx))
|
||||
m.https = nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *Manager) observedAt() string {
|
||||
now := time.Now().UTC()
|
||||
if m.now != nil {
|
||||
now = m.now().UTC()
|
||||
}
|
||||
return now.Format(time.RFC3339Nano)
|
||||
}
|
||||
|
||||
func startHTTPServer(ctx context.Context, addr string, handler http.Handler) (*http.Server, string, error) {
|
||||
listener, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
server := &http.Server{Handler: handler, ReadHeaderTimeout: 5 * time.Second}
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
_ = server.Shutdown(context.Background())
|
||||
}()
|
||||
go func() {
|
||||
if err := server.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
_ = server.Close()
|
||||
}
|
||||
}()
|
||||
return server, listener.Addr().String(), nil
|
||||
}
|
||||
|
||||
func startHTTPSServer(ctx context.Context, addr, certFile, keyFile string, handler http.Handler) (*http.Server, string, error) {
|
||||
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
listener, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
server := &http.Server{
|
||||
Handler: handler,
|
||||
ReadHeaderTimeout: 5 * time.Second,
|
||||
TLSConfig: &tls.Config{MinVersion: tls.VersionTLS12, Certificates: []tls.Certificate{cert}},
|
||||
}
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
_ = server.Shutdown(context.Background())
|
||||
}()
|
||||
go func() {
|
||||
if err := server.ServeTLS(listener, "", ""); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
_ = server.Close()
|
||||
}
|
||||
}()
|
||||
return server, listener.Addr().String(), nil
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/pem"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestManagerStartsHTTPRedirectAndStops(t *testing.T) {
|
||||
manager := NewManager()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
status := manager.Apply(ctx, ListenerConfig{
|
||||
RuntimeConfig: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
|
||||
HTTPAddr: "127.0.0.1:0",
|
||||
HTTPSAddr: "127.0.0.1:0",
|
||||
})
|
||||
if !status.HTTPRunning || status.HTTPSRunning || !status.Running || status.HTTPAddr == "" {
|
||||
t.Fatalf("status = %+v", status)
|
||||
}
|
||||
if status.Reason != "partial" || !containsError(status.Errors, "https:tls_cert_file_and_key_file_required") {
|
||||
t.Fatalf("status = %+v", status)
|
||||
}
|
||||
client := &http.Client{CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse }}
|
||||
resp, err := client.Get("http://" + status.HTTPAddr + "/cluster-admin")
|
||||
if err != nil {
|
||||
t.Fatalf("http get: %v", err)
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusPermanentRedirect {
|
||||
t.Fatalf("status = %d", resp.StatusCode)
|
||||
}
|
||||
stopped := manager.Stop(context.Background())
|
||||
if stopped.Running || stopped.Reason != "stopped" {
|
||||
t.Fatalf("stopped = %+v", stopped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerStartsHTTPSWhenCertificateProvided(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certFile, keyFile := writeSelfSignedCert(t, dir)
|
||||
manager := NewManager()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
status := manager.Apply(ctx, ListenerConfig{
|
||||
RuntimeConfig: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
|
||||
HTTPAddr: "127.0.0.1:0",
|
||||
HTTPSAddr: "127.0.0.1:0",
|
||||
TLSCertFile: certFile,
|
||||
TLSKeyFile: keyFile,
|
||||
})
|
||||
if !status.HTTPRunning || !status.HTTPSRunning || status.HTTPAddr == "" || status.HTTPSAddr == "" || len(status.Errors) != 0 {
|
||||
t.Fatalf("status = %+v", status)
|
||||
}
|
||||
}
|
||||
|
||||
func writeSelfSignedCert(t *testing.T, dir string) (string, string) {
|
||||
t.Helper()
|
||||
key, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
template := x509.Certificate{
|
||||
SerialNumber: big.NewInt(1),
|
||||
Subject: pkix.Name{CommonName: "localhost"},
|
||||
NotBefore: time.Now().Add(-time.Hour),
|
||||
NotAfter: time.Now().Add(time.Hour),
|
||||
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
|
||||
DNSNames: []string{"localhost"},
|
||||
}
|
||||
der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
|
||||
if err != nil {
|
||||
t.Fatalf("create cert: %v", err)
|
||||
}
|
||||
certFile := filepath.Join(dir, "cert.pem")
|
||||
keyFile := filepath.Join(dir, "key.pem")
|
||||
if err := os.WriteFile(certFile, pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}), 0o600); err != nil {
|
||||
t.Fatalf("write cert: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(keyFile, pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}), 0o600); err != nil {
|
||||
t.Fatalf("write key: %v", err)
|
||||
}
|
||||
return certFile, keyFile
|
||||
}
|
||||
|
||||
func containsError(values []string, needle string) bool {
|
||||
for _, value := range values {
|
||||
if value == needle || strings.Contains(value, needle) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrMeshEnvelopeRuntimeRequired = errors.New("web ingress mesh envelope runtime required")
|
||||
ErrMeshEnvelopeRouteRequired = errors.New("web ingress mesh envelope route set required")
|
||||
ErrMeshEnvelopeIdentityInvalid = errors.New("web ingress mesh envelope identity invalid")
|
||||
)
|
||||
|
||||
type FabricChannelReliableRuntime interface {
|
||||
SendReliable(ctx context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payloads [][]byte) (mesh.FabricChannelRuntimeResult, error)
|
||||
}
|
||||
|
||||
type FabricChannelRequestResponseRuntime interface {
|
||||
SendRequestResponse(ctx context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payload []byte) (mesh.FabricChannelRequestResponseResult, error)
|
||||
}
|
||||
|
||||
type MeshEnvelopeSender struct {
|
||||
Runtime FabricChannelReliableRuntime
|
||||
ResponseRuntime FabricChannelRequestResponseRuntime
|
||||
RouteSet mesh.FabricRouteSet
|
||||
ClusterID string
|
||||
SourceNodeID string
|
||||
TargetKind mesh.FabricChannelTargetKind
|
||||
TargetID string
|
||||
ChannelID string
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type MeshEnvelopeDeliveryResponse struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
RouteID string `json:"route_id,omitempty"`
|
||||
TargetNode string `json:"target_node,omitempty"`
|
||||
BytesSent uint64 `json:"bytes_sent"`
|
||||
FramesSent uint64 `json:"frames_sent"`
|
||||
AcksReceived uint64 `json:"acks_received"`
|
||||
MigrationEvents int `json:"migration_events"`
|
||||
}
|
||||
|
||||
func (s MeshEnvelopeSender) Send(ctx context.Context, envelope SignedFabricServiceChannelEnvelope) (FabricResponse, error) {
|
||||
if s.Runtime == nil && s.ResponseRuntime == nil {
|
||||
return FabricResponse{}, ErrMeshEnvelopeRuntimeRequired
|
||||
}
|
||||
if strings.TrimSpace(s.RouteSet.Primary.RouteID) == "" && len(s.RouteSet.WarmStandby) == 0 && len(s.RouteSet.ColdFallbacks) == 0 {
|
||||
return FabricResponse{}, ErrMeshEnvelopeRouteRequired
|
||||
}
|
||||
spec, err := s.channelSpec(envelope)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
if s.ResponseRuntime != nil {
|
||||
result, err := s.ResponseRuntime.SendRequestResponse(ctx, spec, s.routeSet(spec), payload)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
responsePayload, err := unwrapWebIngressForwardResponse(result.ResponsePayload)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
if response, ok := decodeRuntimeHTTPResponse(responsePayload); ok {
|
||||
return response, nil
|
||||
}
|
||||
return acceptedDeliveryResponse(spec.ChannelID, result.FabricChannelRuntimeResult)
|
||||
}
|
||||
result, err := s.Runtime.SendReliable(ctx, spec, s.routeSet(spec), [][]byte{payload})
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
return acceptedDeliveryResponse(spec.ChannelID, result)
|
||||
}
|
||||
|
||||
func unwrapWebIngressForwardResponse(payload []byte) ([]byte, error) {
|
||||
var response struct {
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
if len(payload) == 0 || json.Unmarshal(payload, &response) != nil {
|
||||
return payload, nil
|
||||
}
|
||||
if strings.TrimSpace(response.Error) != "" {
|
||||
return nil, fmt.Errorf("%w: %s", ErrMeshEnvelopeRuntimeRequired, response.Error)
|
||||
}
|
||||
if len(response.Payload) == 0 {
|
||||
return payload, nil
|
||||
}
|
||||
return append([]byte(nil), response.Payload...), nil
|
||||
}
|
||||
|
||||
func acceptedDeliveryResponse(channelID string, result mesh.FabricChannelRuntimeResult) (FabricResponse, error) {
|
||||
response, err := json.Marshal(MeshEnvelopeDeliveryResponse{
|
||||
SchemaVersion: "rap.web_ingress.mesh_envelope_delivery_response.v1",
|
||||
Status: "accepted",
|
||||
ChannelID: channelID,
|
||||
RouteID: result.Channel.RouteID,
|
||||
TargetNode: result.Channel.TargetNode,
|
||||
BytesSent: result.BytesSent,
|
||||
FramesSent: result.FramesSent,
|
||||
AcksReceived: result.AcksReceived,
|
||||
MigrationEvents: result.MigrationEvents,
|
||||
})
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
return FabricResponse{
|
||||
StatusCode: http.StatusAccepted,
|
||||
Headers: http.Header{"Content-Type": []string{"application/json"}},
|
||||
Body: response,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func decodeRuntimeHTTPResponse(payload []byte) (FabricResponse, bool) {
|
||||
var response struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Headers map[string][]string `json:"headers,omitempty"`
|
||||
BodyBase64 string `json:"body_b64,omitempty"`
|
||||
Body string `json:"body,omitempty"`
|
||||
}
|
||||
if len(payload) == 0 || json.Unmarshal(payload, &response) != nil {
|
||||
return FabricResponse{}, false
|
||||
}
|
||||
if response.SchemaVersion != FabricRuntimeResponseSchema {
|
||||
return FabricResponse{}, false
|
||||
}
|
||||
body := []byte(response.Body)
|
||||
if response.BodyBase64 != "" {
|
||||
decoded, err := decodeEnvelopeBase64(response.BodyBase64)
|
||||
if err != nil {
|
||||
return FabricResponse{}, false
|
||||
}
|
||||
body = decoded
|
||||
}
|
||||
headers := http.Header{}
|
||||
for key, values := range response.Headers {
|
||||
if !safeResponseHeader(key) {
|
||||
continue
|
||||
}
|
||||
for _, value := range values {
|
||||
headers.Add(key, value)
|
||||
}
|
||||
}
|
||||
return FabricResponse{StatusCode: response.StatusCode, Headers: headers, Body: body}, true
|
||||
}
|
||||
|
||||
func (s MeshEnvelopeSender) channelSpec(envelope SignedFabricServiceChannelEnvelope) (mesh.FabricChannelSpec, error) {
|
||||
clusterID := strings.TrimSpace(s.ClusterID)
|
||||
sourceNodeID := strings.TrimSpace(s.SourceNodeID)
|
||||
targetID := strings.TrimSpace(s.TargetID)
|
||||
if clusterID == "" || sourceNodeID == "" || targetID == "" {
|
||||
return mesh.FabricChannelSpec{}, ErrMeshEnvelopeIdentityInvalid
|
||||
}
|
||||
targetKind := s.TargetKind
|
||||
if targetKind == "" {
|
||||
targetKind = mesh.FabricChannelTargetPool
|
||||
}
|
||||
channelID := strings.TrimSpace(s.ChannelID)
|
||||
if channelID == "" {
|
||||
channelID = defaultMeshEnvelopeChannelID(envelope, s.now())
|
||||
}
|
||||
spec := mesh.FabricChannelSpec{
|
||||
ChannelID: channelID,
|
||||
ClusterID: clusterID,
|
||||
SourceNodeID: sourceNodeID,
|
||||
TargetKind: targetKind,
|
||||
TargetID: targetID,
|
||||
TrafficClass: "control",
|
||||
StickyKey: envelope.Envelope.Scope + ":" + envelope.Envelope.ServiceClass,
|
||||
CreatedAt: s.now(),
|
||||
}
|
||||
if err := mesh.ValidateFabricChannelSpec(spec); err != nil {
|
||||
return mesh.FabricChannelSpec{}, err
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
func (s MeshEnvelopeSender) routeSet(spec mesh.FabricChannelSpec) mesh.FabricRouteSet {
|
||||
routeSet := s.RouteSet
|
||||
if routeSet.TargetKind == "" {
|
||||
routeSet.TargetKind = spec.TargetKind
|
||||
}
|
||||
if strings.TrimSpace(routeSet.TargetID) == "" {
|
||||
routeSet.TargetID = spec.TargetID
|
||||
}
|
||||
return routeSet
|
||||
}
|
||||
|
||||
func (s MeshEnvelopeSender) now() time.Time {
|
||||
if s.Now != nil {
|
||||
return s.Now().UTC()
|
||||
}
|
||||
return time.Now().UTC()
|
||||
}
|
||||
|
||||
func defaultMeshEnvelopeChannelID(envelope SignedFabricServiceChannelEnvelope, now time.Time) string {
|
||||
serviceClass := strings.ReplaceAll(strings.TrimSpace(envelope.Envelope.ServiceClass), "_", "-")
|
||||
if serviceClass == "" {
|
||||
serviceClass = "web-ingress"
|
||||
}
|
||||
return fmt.Sprintf("web-ingress-%s-%d", serviceClass, now.UnixNano())
|
||||
}
|
||||
@@ -0,0 +1,267 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
|
||||
)
|
||||
|
||||
func TestMeshEnvelopeSenderSendsSignedEnvelopeOverReliableFabricRuntime(t *testing.T) {
|
||||
runtime := &recordingReliableRuntime{
|
||||
result: mesh.FabricChannelRuntimeResult{
|
||||
Channel: mesh.FabricChannel{RouteID: "route-fast", TargetNode: "node-runtime"},
|
||||
BytesSent: 123,
|
||||
FramesSent: 1,
|
||||
AcksReceived: 1,
|
||||
},
|
||||
}
|
||||
sender := MeshEnvelopeSender{
|
||||
Runtime: runtime,
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetKind: mesh.FabricChannelTargetPool,
|
||||
TargetID: "pool-admin-runtime",
|
||||
ChannelID: "channel-web-1",
|
||||
Now: fixedEnvelopeNow,
|
||||
}
|
||||
envelope := SignedFabricServiceChannelEnvelope{
|
||||
SchemaVersion: "rap.web_ingress.signed_fabric_service_channel_envelope.v1",
|
||||
Envelope: FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
},
|
||||
Signature: FabricEnvelopeSignature{KeyID: "node-key", Alg: "ed25519", Signature: "sig"},
|
||||
}
|
||||
|
||||
response, err := sender.Send(context.Background(), envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("send: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusAccepted || response.Headers.Get("Content-Type") != "application/json" {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
if runtime.spec.ChannelID != "channel-web-1" ||
|
||||
runtime.spec.ClusterID != "cluster-1" ||
|
||||
runtime.spec.SourceNodeID != "node-ingress" ||
|
||||
runtime.spec.TargetID != "pool-admin-runtime" ||
|
||||
runtime.spec.TargetKind != mesh.FabricChannelTargetPool ||
|
||||
runtime.spec.TrafficClass != "control" ||
|
||||
runtime.spec.StickyKey != "platform:platform_admin" {
|
||||
t.Fatalf("spec = %+v", runtime.spec)
|
||||
}
|
||||
if runtime.routeSet.TargetID != "pool-admin-runtime" || len(runtime.payloads) != 1 {
|
||||
t.Fatalf("route/payload = %+v payloads=%d", runtime.routeSet, len(runtime.payloads))
|
||||
}
|
||||
var delivered SignedFabricServiceChannelEnvelope
|
||||
if err := json.Unmarshal(runtime.payloads[0], &delivered); err != nil {
|
||||
t.Fatalf("decode delivered envelope: %v", err)
|
||||
}
|
||||
if delivered.Signature.Signature != "sig" || delivered.Envelope.ServiceClass != "platform_admin" {
|
||||
t.Fatalf("delivered = %+v", delivered)
|
||||
}
|
||||
var body MeshEnvelopeDeliveryResponse
|
||||
if err := json.Unmarshal(response.Body, &body); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if body.SchemaVersion != "rap.web_ingress.mesh_envelope_delivery_response.v1" ||
|
||||
body.Status != "accepted" ||
|
||||
body.RouteID != "route-fast" ||
|
||||
body.AcksReceived != 1 {
|
||||
t.Fatalf("body = %+v", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshEnvelopeSenderReturnsRuntimeHTTPResponse(t *testing.T) {
|
||||
runtime := &recordingRequestResponseRuntime{
|
||||
result: mesh.FabricChannelRequestResponseResult{
|
||||
FabricChannelRuntimeResult: mesh.FabricChannelRuntimeResult{
|
||||
Channel: mesh.FabricChannel{RouteID: "route-runtime", TargetNode: "node-runtime"},
|
||||
BytesSent: 123,
|
||||
BytesRecv: 16,
|
||||
FramesSent: 1,
|
||||
FramesRecv: 1,
|
||||
AcksReceived: 1,
|
||||
},
|
||||
ResponsePayload: []byte(`{"payload":{"schema_version":"rap.web_ingress.fabric_runtime_response.v1","status_code":201,"headers":{"X-RAP-Runtime":["ok"],"Set-Cookie":["blocked"]},"body_b64":"eyJvayI6dHJ1ZX0="}}`),
|
||||
},
|
||||
}
|
||||
sender := MeshEnvelopeSender{
|
||||
ResponseRuntime: runtime,
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetKind: mesh.FabricChannelTargetPool,
|
||||
TargetID: "pool-admin-runtime",
|
||||
ChannelID: "channel-web-1",
|
||||
Now: fixedEnvelopeNow,
|
||||
}
|
||||
|
||||
response, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
|
||||
SchemaVersion: "rap.web_ingress.signed_fabric_service_channel_envelope.v1",
|
||||
Envelope: FabricServiceChannelEnvelope{SchemaVersion: FabricServiceChannelEnvelopeSchema, Scope: "platform", ServiceClass: "platform_admin"},
|
||||
Signature: FabricEnvelopeSignature{KeyID: "node-key", Alg: "ed25519", Signature: "sig"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusCreated || response.Headers.Get("X-RAP-Runtime") != "ok" || response.Headers.Get("Set-Cookie") != "" || string(response.Body) != `{"ok":true}` {
|
||||
t.Fatalf("response = %+v body=%s", response, string(response.Body))
|
||||
}
|
||||
if runtime.spec.ChannelID != "channel-web-1" || len(runtime.payload) == 0 {
|
||||
t.Fatalf("runtime spec=%+v payload=%s", runtime.spec, string(runtime.payload))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshEnvelopeSenderReportsWrappedRuntimeError(t *testing.T) {
|
||||
sender := MeshEnvelopeSender{
|
||||
ResponseRuntime: &recordingRequestResponseRuntime{
|
||||
result: mesh.FabricChannelRequestResponseResult{ResponsePayload: []byte(`{"error":"runtime unavailable"}`)},
|
||||
},
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetID: "pool-admin-runtime",
|
||||
ChannelID: "channel-web-1",
|
||||
}
|
||||
|
||||
_, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
|
||||
Envelope: FabricServiceChannelEnvelope{Scope: "platform", ServiceClass: "platform_admin"},
|
||||
})
|
||||
if !errors.Is(err, ErrMeshEnvelopeRuntimeRequired) {
|
||||
t.Fatalf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshEnvelopeSenderFallsBackToDeliveryAckForNonHTTPRuntimePayload(t *testing.T) {
|
||||
runtime := &recordingRequestResponseRuntime{
|
||||
result: mesh.FabricChannelRequestResponseResult{
|
||||
FabricChannelRuntimeResult: mesh.FabricChannelRuntimeResult{
|
||||
Channel: mesh.FabricChannel{RouteID: "route-runtime", TargetNode: "node-runtime"},
|
||||
BytesSent: 123,
|
||||
FramesSent: 1,
|
||||
AcksReceived: 1,
|
||||
},
|
||||
ResponsePayload: []byte(`{"not":"http"}`),
|
||||
},
|
||||
}
|
||||
sender := MeshEnvelopeSender{
|
||||
ResponseRuntime: runtime,
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetID: "pool-admin-runtime",
|
||||
ChannelID: "channel-web-1",
|
||||
}
|
||||
|
||||
response, err := sender.Send(context.Background(), SignedFabricServiceChannelEnvelope{
|
||||
Envelope: FabricServiceChannelEnvelope{Scope: "platform", ServiceClass: "platform_admin"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("send: %v", err)
|
||||
}
|
||||
if response.StatusCode != http.StatusAccepted {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
var body MeshEnvelopeDeliveryResponse
|
||||
if err := json.Unmarshal(response.Body, &body); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if body.Status != "accepted" || body.RouteID != "route-runtime" {
|
||||
t.Fatalf("body = %+v", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshEnvelopeSenderReportsRuntimeRouteAndIdentityErrors(t *testing.T) {
|
||||
_, err := (MeshEnvelopeSender{}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
|
||||
if !errors.Is(err, ErrMeshEnvelopeRuntimeRequired) {
|
||||
t.Fatalf("runtime error = %v", err)
|
||||
}
|
||||
|
||||
_, err = (MeshEnvelopeSender{
|
||||
Runtime: &recordingReliableRuntime{},
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetID: "pool-admin-runtime",
|
||||
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
|
||||
if !errors.Is(err, ErrMeshEnvelopeRouteRequired) {
|
||||
t.Fatalf("route error = %v", err)
|
||||
}
|
||||
|
||||
_, err = (MeshEnvelopeSender{
|
||||
Runtime: &recordingReliableRuntime{},
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
|
||||
if !errors.Is(err, ErrMeshEnvelopeIdentityInvalid) {
|
||||
t.Fatalf("identity error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshEnvelopeSenderPropagatesReliableRuntimeFailure(t *testing.T) {
|
||||
sendErr := errors.New("send failed")
|
||||
_, err := (MeshEnvelopeSender{
|
||||
Runtime: &recordingReliableRuntime{err: sendErr},
|
||||
RouteSet: testWebIngressRouteSet(),
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
TargetID: "pool-admin-runtime",
|
||||
}).Send(context.Background(), SignedFabricServiceChannelEnvelope{})
|
||||
if !errors.Is(err, sendErr) {
|
||||
t.Fatalf("send error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
type recordingReliableRuntime struct {
|
||||
spec mesh.FabricChannelSpec
|
||||
routeSet mesh.FabricRouteSet
|
||||
payloads [][]byte
|
||||
result mesh.FabricChannelRuntimeResult
|
||||
err error
|
||||
}
|
||||
|
||||
type recordingRequestResponseRuntime struct {
|
||||
spec mesh.FabricChannelSpec
|
||||
routeSet mesh.FabricRouteSet
|
||||
payload []byte
|
||||
result mesh.FabricChannelRequestResponseResult
|
||||
err error
|
||||
}
|
||||
|
||||
func (r *recordingRequestResponseRuntime) SendRequestResponse(_ context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payload []byte) (mesh.FabricChannelRequestResponseResult, error) {
|
||||
r.spec = spec
|
||||
r.routeSet = routeSet
|
||||
r.payload = payload
|
||||
if r.err != nil {
|
||||
return mesh.FabricChannelRequestResponseResult{}, r.err
|
||||
}
|
||||
return r.result, nil
|
||||
}
|
||||
|
||||
func (r *recordingReliableRuntime) SendReliable(_ context.Context, spec mesh.FabricChannelSpec, routeSet mesh.FabricRouteSet, payloads [][]byte) (mesh.FabricChannelRuntimeResult, error) {
|
||||
r.spec = spec
|
||||
r.routeSet = routeSet
|
||||
r.payloads = payloads
|
||||
if r.err != nil {
|
||||
return mesh.FabricChannelRuntimeResult{}, r.err
|
||||
}
|
||||
return r.result, nil
|
||||
}
|
||||
|
||||
func testWebIngressRouteSet() mesh.FabricRouteSet {
|
||||
return mesh.FabricRouteSet{
|
||||
Primary: mesh.FabricRoute{
|
||||
RouteID: "route-fast",
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: "node-ingress",
|
||||
DestinationNodeID: "node-runtime",
|
||||
PoolID: "pool-admin-runtime",
|
||||
Healthy: true,
|
||||
Capacity: 100,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
SignedFabricServiceChannelEnvelopeSchema = "rap.web_ingress.signed_fabric_service_channel_envelope.v1"
|
||||
FabricRuntimeResponseSchema = "rap.web_ingress.fabric_runtime_response.v1"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrFabricEnvelopeSignatureInvalid = errors.New("web ingress fabric envelope signature invalid")
|
||||
ErrFabricEnvelopeUnauthorized = errors.New("web ingress fabric envelope unauthorized")
|
||||
ErrFabricEnvelopeRuntimeRequired = errors.New("web ingress fabric runtime handler required")
|
||||
)
|
||||
|
||||
type EnvelopeKeyResolver interface {
|
||||
PublicKey(ctx context.Context, keyID string) (ed25519.PublicKey, bool, error)
|
||||
}
|
||||
|
||||
type EnvelopeRuntimeHandler interface {
|
||||
HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error)
|
||||
}
|
||||
|
||||
type RuntimeHandlerFunc func(ctx context.Context, request FabricRequest) (FabricResponse, error)
|
||||
|
||||
func (f RuntimeHandlerFunc) HandleFabricRequest(ctx context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
return f(ctx, request)
|
||||
}
|
||||
|
||||
type ReceiverConfig struct {
|
||||
ServiceType string
|
||||
Scope string
|
||||
ServiceClasses []string
|
||||
MaxClockSkew time.Duration
|
||||
}
|
||||
|
||||
type FabricRuntimeReceiver struct {
|
||||
Config ReceiverConfig
|
||||
Keys EnvelopeKeyResolver
|
||||
Handler EnvelopeRuntimeHandler
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type StaticEnvelopeKeyResolver map[string]ed25519.PublicKey
|
||||
|
||||
func (r StaticEnvelopeKeyResolver) PublicKey(_ context.Context, keyID string) (ed25519.PublicKey, bool, error) {
|
||||
key, ok := r[strings.TrimSpace(keyID)]
|
||||
if !ok {
|
||||
return nil, false, nil
|
||||
}
|
||||
return append(ed25519.PublicKey(nil), key...), true, nil
|
||||
}
|
||||
|
||||
func (r FabricRuntimeReceiver) Receive(ctx context.Context, payload []byte) ([]byte, error) {
|
||||
response, err := r.ReceiveResponse(ctx, payload)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return encodeFabricRuntimeResponse(response)
|
||||
}
|
||||
|
||||
func (r FabricRuntimeReceiver) ReceiveResponse(ctx context.Context, payload []byte) (FabricResponse, error) {
|
||||
if r.Handler == nil {
|
||||
return FabricResponse{}, ErrFabricEnvelopeRuntimeRequired
|
||||
}
|
||||
var signed SignedFabricServiceChannelEnvelope
|
||||
if err := json.Unmarshal(payload, &signed); err != nil {
|
||||
return FabricResponse{}, fmt.Errorf("%w: invalid signed envelope json", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
if err := r.verify(ctx, signed); err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
request, err := requestFromEnvelope(signed.Envelope)
|
||||
if err != nil {
|
||||
return FabricResponse{}, err
|
||||
}
|
||||
return r.Handler.HandleFabricRequest(ctx, request)
|
||||
}
|
||||
|
||||
func (r FabricRuntimeReceiver) verify(ctx context.Context, signed SignedFabricServiceChannelEnvelope) error {
|
||||
if signed.SchemaVersion != SignedFabricServiceChannelEnvelopeSchema {
|
||||
return fmt.Errorf("%w: signed schema mismatch", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
if signed.Envelope.SchemaVersion != FabricServiceChannelEnvelopeSchema ||
|
||||
strings.TrimSpace(signed.Envelope.Scope) == "" ||
|
||||
strings.TrimSpace(signed.Envelope.ServiceClass) == "" {
|
||||
return fmt.Errorf("%w: envelope contract invalid", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
if scope := strings.TrimSpace(r.Config.Scope); scope != "" && signed.Envelope.Scope != scope {
|
||||
return fmt.Errorf("%w: scope mismatch", ErrFabricEnvelopeUnauthorized)
|
||||
}
|
||||
if len(r.Config.ServiceClasses) > 0 && !contains(r.Config.ServiceClasses, signed.Envelope.ServiceClass) {
|
||||
return fmt.Errorf("%w: service class not allowed", ErrFabricEnvelopeUnauthorized)
|
||||
}
|
||||
if err := r.verifyClock(signed.Envelope); err != nil {
|
||||
return err
|
||||
}
|
||||
if r.Keys == nil {
|
||||
return fmt.Errorf("%w: key resolver required", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
keyID := strings.TrimSpace(signed.Signature.KeyID)
|
||||
publicKey, ok, err := r.Keys.PublicKey(ctx, keyID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !ok || len(publicKey) != ed25519.PublicKeySize {
|
||||
return fmt.Errorf("%w: signing key not trusted", ErrFabricEnvelopeUnauthorized)
|
||||
}
|
||||
if signed.Signature.Alg != "ed25519" {
|
||||
return fmt.Errorf("%w: algorithm mismatch", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
signature, err := decodeEnvelopeBase64(signed.Signature.Signature)
|
||||
if err != nil || len(signature) != ed25519.SignatureSize {
|
||||
return fmt.Errorf("%w: signature must be base64 ed25519", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
canonical, err := json.Marshal(signed.Envelope)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !ed25519.Verify(publicKey, canonical, signature) {
|
||||
return ErrFabricEnvelopeSignatureInvalid
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r FabricRuntimeReceiver) verifyClock(envelope FabricServiceChannelEnvelope) error {
|
||||
maxSkew := r.Config.MaxClockSkew
|
||||
if maxSkew <= 0 {
|
||||
maxSkew = 5 * time.Minute
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
if r.Now != nil {
|
||||
now = r.Now().UTC()
|
||||
}
|
||||
for _, value := range []string{envelope.ObservedAt, envelope.EnvelopedAt} {
|
||||
if strings.TrimSpace(value) == "" {
|
||||
continue
|
||||
}
|
||||
parsed, err := time.Parse(time.RFC3339Nano, value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: invalid envelope timestamp", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
if parsed.After(now.Add(maxSkew)) || parsed.Before(now.Add(-maxSkew)) {
|
||||
return fmt.Errorf("%w: envelope timestamp outside skew", ErrFabricEnvelopeUnauthorized)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func requestFromEnvelope(envelope FabricServiceChannelEnvelope) (FabricRequest, error) {
|
||||
body, err := base64.StdEncoding.DecodeString(envelope.BodyBase64)
|
||||
if err != nil && envelope.BodyBase64 != "" {
|
||||
return FabricRequest{}, fmt.Errorf("%w: invalid body_b64", ErrFabricEnvelopeSignatureInvalid)
|
||||
}
|
||||
observedAt, _ := time.Parse(time.RFC3339Nano, envelope.ObservedAt)
|
||||
headers := http.Header{}
|
||||
for key, values := range envelope.Headers {
|
||||
if !safeRequestHeader(key) {
|
||||
continue
|
||||
}
|
||||
for _, value := range values {
|
||||
headers.Add(key, value)
|
||||
}
|
||||
}
|
||||
return FabricRequest{
|
||||
SchemaVersion: envelope.RequestSchema,
|
||||
Method: envelope.Method,
|
||||
Path: envelope.Path,
|
||||
Query: envelope.Query,
|
||||
Host: envelope.Host,
|
||||
ServiceType: envelope.ServiceType,
|
||||
Scope: envelope.Scope,
|
||||
ServiceClass: envelope.ServiceClass,
|
||||
Headers: headers,
|
||||
Body: body,
|
||||
ObservedAt: observedAt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func encodeFabricRuntimeResponse(response FabricResponse) ([]byte, error) {
|
||||
headers := map[string][]string{}
|
||||
for key, values := range response.Headers {
|
||||
if !safeResponseHeader(key) {
|
||||
continue
|
||||
}
|
||||
copied := append([]string(nil), values...)
|
||||
if len(copied) > 0 {
|
||||
headers[http.CanonicalHeaderKey(key)] = copied
|
||||
}
|
||||
}
|
||||
payload := struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Headers map[string][]string `json:"headers,omitempty"`
|
||||
BodyBase64 string `json:"body_b64,omitempty"`
|
||||
}{
|
||||
SchemaVersion: FabricRuntimeResponseSchema,
|
||||
StatusCode: response.StatusCode,
|
||||
Headers: headers,
|
||||
BodyBase64: base64.StdEncoding.EncodeToString(response.Body),
|
||||
}
|
||||
if payload.StatusCode < 100 || payload.StatusCode > 599 {
|
||||
payload.StatusCode = http.StatusOK
|
||||
}
|
||||
if len(payload.Headers) == 0 {
|
||||
payload.Headers = nil
|
||||
}
|
||||
return json.Marshal(payload)
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFabricRuntimeReceiverVerifiesEnvelopeAndReturnsRuntimeResponse(t *testing.T) {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
keyID := ed25519EnvelopeKeyID(publicKey)
|
||||
receiver := FabricRuntimeReceiver{
|
||||
Config: ReceiverConfig{ServiceType: "global-admin-runtime", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
|
||||
Keys: StaticEnvelopeKeyResolver{keyID: publicKey},
|
||||
Handler: recordingRuntimeHandler{response: FabricResponse{
|
||||
StatusCode: http.StatusCreated,
|
||||
Headers: http.Header{"X-RAP-Runtime": []string{"ok"}, "Set-Cookie": []string{"blocked"}},
|
||||
Body: []byte(`{"ok":true}`),
|
||||
}},
|
||||
Now: fixedEnvelopeNow,
|
||||
}
|
||||
payload := signedReceiverEnvelope(t, privateKey, keyID, FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
RequestSchema: "rap.web_ingress.fabric_request.v1",
|
||||
Method: http.MethodPost,
|
||||
Path: "/platform-admin/root",
|
||||
Query: "tab=nodes",
|
||||
Host: "admin.example.test",
|
||||
ServiceType: "admin-ingress",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
Headers: map[string][]string{"X-Trace-Id": {"trace-1"}},
|
||||
BodyBase64: base64.StdEncoding.EncodeToString([]byte(`{"hello":"world"}`)),
|
||||
ObservedAt: "2026-05-17T00:00:00Z",
|
||||
EnvelopedAt: "2026-05-17T00:00:01Z",
|
||||
})
|
||||
|
||||
responsePayload, err := receiver.Receive(context.Background(), payload)
|
||||
if err != nil {
|
||||
t.Fatalf("receive: %v", err)
|
||||
}
|
||||
var response struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Headers map[string][]string `json:"headers"`
|
||||
BodyBase64 string `json:"body_b64"`
|
||||
}
|
||||
if err := json.Unmarshal(responsePayload, &response); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if response.SchemaVersion != FabricRuntimeResponseSchema ||
|
||||
response.StatusCode != http.StatusCreated ||
|
||||
response.Headers["X-Rap-Runtime"][0] != "ok" ||
|
||||
response.Headers["Set-Cookie"] != nil ||
|
||||
response.BodyBase64 != "eyJvayI6dHJ1ZX0=" {
|
||||
t.Fatalf("response = %+v", response)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRuntimeReceiverRejectsBadSignatureScopeClassAndStaleEnvelope(t *testing.T) {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
keyID := ed25519EnvelopeKeyID(publicKey)
|
||||
receiver := FabricRuntimeReceiver{
|
||||
Config: ReceiverConfig{Scope: "platform", ServiceClasses: []string{"platform_admin"}},
|
||||
Keys: StaticEnvelopeKeyResolver{keyID: publicKey},
|
||||
Handler: recordingRuntimeHandler{},
|
||||
Now: fixedEnvelopeNow,
|
||||
}
|
||||
|
||||
base := FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
RequestSchema: "rap.web_ingress.fabric_request.v1",
|
||||
Method: http.MethodGet,
|
||||
Path: "/platform-admin/root",
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
ObservedAt: "2026-05-17T00:00:00Z",
|
||||
EnvelopedAt: "2026-05-17T00:00:01Z",
|
||||
}
|
||||
badSignature := signedReceiverEnvelope(t, privateKey, keyID, base)
|
||||
badSignature[len(badSignature)-2] = 'x'
|
||||
if _, err := receiver.Receive(context.Background(), badSignature); !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
|
||||
t.Fatalf("bad signature err = %v", err)
|
||||
}
|
||||
|
||||
wrongScope := base
|
||||
wrongScope.Scope = "organization"
|
||||
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, wrongScope)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
|
||||
t.Fatalf("wrong scope err = %v", err)
|
||||
}
|
||||
|
||||
wrongClass := base
|
||||
wrongClass.ServiceClass = "cluster_admin"
|
||||
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, wrongClass)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
|
||||
t.Fatalf("wrong class err = %v", err)
|
||||
}
|
||||
|
||||
stale := base
|
||||
stale.EnvelopedAt = "2026-05-16T00:00:00Z"
|
||||
if _, err := receiver.Receive(context.Background(), signedReceiverEnvelope(t, privateKey, keyID, stale)); !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
|
||||
t.Fatalf("stale err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRuntimeReceiverRequiresTrustedKeyAndHandler(t *testing.T) {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
keyID := ed25519EnvelopeKeyID(publicKey)
|
||||
payload := signedReceiverEnvelope(t, privateKey, keyID, FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
ObservedAt: "2026-05-17T00:00:00Z",
|
||||
EnvelopedAt: "2026-05-17T00:00:01Z",
|
||||
})
|
||||
|
||||
_, err = (FabricRuntimeReceiver{Keys: StaticEnvelopeKeyResolver{keyID: publicKey}, Now: fixedEnvelopeNow}).Receive(context.Background(), payload)
|
||||
if !errors.Is(err, ErrFabricEnvelopeRuntimeRequired) {
|
||||
t.Fatalf("handler err = %v", err)
|
||||
}
|
||||
|
||||
_, err = (FabricRuntimeReceiver{Handler: recordingRuntimeHandler{}, Now: fixedEnvelopeNow}).Receive(context.Background(), payload)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSignatureInvalid) {
|
||||
t.Fatalf("key resolver err = %v", err)
|
||||
}
|
||||
|
||||
_, otherPrivateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate other key: %v", err)
|
||||
}
|
||||
untrusted := signedReceiverEnvelope(t, otherPrivateKey, "other-key", FabricServiceChannelEnvelope{
|
||||
SchemaVersion: FabricServiceChannelEnvelopeSchema,
|
||||
Scope: "platform",
|
||||
ServiceClass: "platform_admin",
|
||||
ObservedAt: "2026-05-17T00:00:00Z",
|
||||
EnvelopedAt: "2026-05-17T00:00:01Z",
|
||||
})
|
||||
_, err = (FabricRuntimeReceiver{Keys: StaticEnvelopeKeyResolver{keyID: publicKey}, Handler: recordingRuntimeHandler{}, Now: fixedEnvelopeNow}).Receive(context.Background(), untrusted)
|
||||
if !errors.Is(err, ErrFabricEnvelopeUnauthorized) {
|
||||
t.Fatalf("untrusted key err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func signedReceiverEnvelope(t *testing.T, privateKey ed25519.PrivateKey, keyID string, envelope FabricServiceChannelEnvelope) []byte {
|
||||
t.Helper()
|
||||
canonical, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
payload, err := json.Marshal(SignedFabricServiceChannelEnvelope{
|
||||
SchemaVersion: SignedFabricServiceChannelEnvelopeSchema,
|
||||
Envelope: envelope,
|
||||
Signature: FabricEnvelopeSignature{
|
||||
KeyID: keyID,
|
||||
Alg: "ed25519",
|
||||
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(privateKey, canonical)),
|
||||
SignedAt: "2026-05-17T00:00:01Z",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal signed envelope: %v", err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
type recordingRuntimeHandler struct {
|
||||
request FabricRequest
|
||||
response FabricResponse
|
||||
err error
|
||||
}
|
||||
|
||||
func (h recordingRuntimeHandler) HandleFabricRequest(_ context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
h.request = request
|
||||
if h.err != nil {
|
||||
return FabricResponse{}, h.err
|
||||
}
|
||||
if h.response.StatusCode == 0 {
|
||||
h.response = FabricResponse{StatusCode: http.StatusOK, Body: []byte(`{"ready":true}`)}
|
||||
}
|
||||
return h.response, nil
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RuntimeConfig struct {
|
||||
ServiceType string
|
||||
Scope string
|
||||
ServiceClasses []string
|
||||
TLSMode string
|
||||
HTTPPort int
|
||||
HTTPSPort int
|
||||
}
|
||||
|
||||
type Runtime struct {
|
||||
Config RuntimeConfig
|
||||
Binder FabricBinder
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type FabricBinder interface {
|
||||
Forward(ctx context.Context, request FabricRequest) (FabricResponse, error)
|
||||
}
|
||||
|
||||
type FabricRequest struct {
|
||||
SchemaVersion string
|
||||
Method string
|
||||
Path string
|
||||
Query string
|
||||
Host string
|
||||
ServiceType string
|
||||
Scope string
|
||||
ServiceClass string
|
||||
Headers http.Header
|
||||
Body []byte
|
||||
ObservedAt time.Time
|
||||
}
|
||||
|
||||
type FabricResponse struct {
|
||||
StatusCode int
|
||||
Headers http.Header
|
||||
Body []byte
|
||||
}
|
||||
|
||||
type Response struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ServiceType string `json:"service_type,omitempty"`
|
||||
Scope string `json:"scope,omitempty"`
|
||||
ServiceClass string `json:"service_class,omitempty"`
|
||||
Allowed []string `json:"allowed_service_classes,omitempty"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
}
|
||||
|
||||
func (r Runtime) HTTPHandler() http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||||
if strings.HasPrefix(req.URL.Path, "/.well-known/acme-challenge/") {
|
||||
writeJSON(w, http.StatusNotFound, r.response("not_found", "acme_challenge_backend_not_configured", ""))
|
||||
return
|
||||
}
|
||||
if req.URL.Path == "/healthz" || req.URL.Path == "/readyz" {
|
||||
writeJSON(w, http.StatusOK, r.response("ready", "http_redirect_runtime_ready", ""))
|
||||
return
|
||||
}
|
||||
target := "https://" + req.Host + req.URL.RequestURI()
|
||||
w.Header().Set("Location", target)
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
w.WriteHeader(http.StatusPermanentRedirect)
|
||||
})
|
||||
}
|
||||
|
||||
func (r Runtime) HTTPSHandler() http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||||
if req.URL.Path == "/healthz" || req.URL.Path == "/readyz" {
|
||||
writeJSON(w, http.StatusOK, r.response("ready", "https_runtime_ready", ""))
|
||||
return
|
||||
}
|
||||
serviceClass := strings.TrimSpace(req.Header.Get("X-RAP-Service-Class"))
|
||||
if serviceClass == "" {
|
||||
serviceClass = serviceClassFromPath(req.URL.Path)
|
||||
}
|
||||
if serviceClass == "" {
|
||||
writeJSON(w, http.StatusBadRequest, r.response("blocked", "service_class_required", ""))
|
||||
return
|
||||
}
|
||||
if !contains(r.Config.ServiceClasses, serviceClass) {
|
||||
writeJSON(w, http.StatusForbidden, r.response("blocked", "service_class_not_allowed", serviceClass))
|
||||
return
|
||||
}
|
||||
if r.Binder == nil {
|
||||
writeJSON(w, http.StatusNotImplemented, r.response("blocked", "fabric_service_channel_binding_not_implemented", serviceClass))
|
||||
return
|
||||
}
|
||||
scope := scopeForServiceClass(serviceClass, r.Config.Scope)
|
||||
body, err := io.ReadAll(http.MaxBytesReader(w, req.Body, 1<<20))
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusRequestEntityTooLarge, r.response("blocked", "request_body_too_large", serviceClass))
|
||||
return
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
if r.Now != nil {
|
||||
now = r.Now().UTC()
|
||||
}
|
||||
fabricResponse, err := r.Binder.Forward(req.Context(), FabricRequest{
|
||||
SchemaVersion: "rap.web_ingress.fabric_request.v1",
|
||||
Method: req.Method,
|
||||
Path: req.URL.Path,
|
||||
Query: req.URL.RawQuery,
|
||||
Host: req.Host,
|
||||
ServiceType: strings.TrimSpace(r.Config.ServiceType),
|
||||
Scope: scope,
|
||||
ServiceClass: serviceClass,
|
||||
Headers: cloneSafeHeaders(req.Header),
|
||||
Body: body,
|
||||
ObservedAt: now,
|
||||
})
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusBadGateway, r.response("blocked", "fabric_service_channel_forward_failed", serviceClass))
|
||||
return
|
||||
}
|
||||
writeFabricResponse(w, fabricResponse)
|
||||
})
|
||||
}
|
||||
|
||||
func (r Runtime) response(status, reason, serviceClass string) Response {
|
||||
now := time.Now().UTC()
|
||||
if r.Now != nil {
|
||||
now = r.Now().UTC()
|
||||
}
|
||||
return Response{
|
||||
SchemaVersion: "rap.web_ingress.runtime_response.v1",
|
||||
Status: status,
|
||||
Reason: reason,
|
||||
ServiceType: strings.TrimSpace(r.Config.ServiceType),
|
||||
Scope: strings.TrimSpace(r.Config.Scope),
|
||||
ServiceClass: serviceClass,
|
||||
Allowed: append([]string{}, r.Config.ServiceClasses...),
|
||||
ObservedAt: now.Format(time.RFC3339Nano),
|
||||
}
|
||||
}
|
||||
|
||||
func scopeForServiceClass(serviceClass string, fallback string) string {
|
||||
switch strings.TrimSpace(serviceClass) {
|
||||
case "platform_admin":
|
||||
return "platform"
|
||||
case "cluster_admin":
|
||||
return "cluster"
|
||||
case "organization_portal":
|
||||
return "organization"
|
||||
case "user_portal":
|
||||
return "user"
|
||||
default:
|
||||
return strings.TrimSpace(fallback)
|
||||
}
|
||||
}
|
||||
|
||||
func serviceClassFromPath(path string) string {
|
||||
path = strings.Trim(strings.ToLower(path), "/")
|
||||
switch {
|
||||
case strings.HasPrefix(path, "platform-admin"):
|
||||
return "platform_admin"
|
||||
case strings.HasPrefix(path, "cluster-admin"):
|
||||
return "cluster_admin"
|
||||
case strings.HasPrefix(path, "organizations/"):
|
||||
return "organization_portal"
|
||||
case strings.HasPrefix(path, "users/"):
|
||||
return "user_portal"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, payload Response) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(payload)
|
||||
}
|
||||
|
||||
func writeFabricResponse(w http.ResponseWriter, payload FabricResponse) {
|
||||
for key, values := range payload.Headers {
|
||||
if !safeResponseHeader(key) {
|
||||
continue
|
||||
}
|
||||
for _, value := range values {
|
||||
w.Header().Add(key, value)
|
||||
}
|
||||
}
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
status := payload.StatusCode
|
||||
if status < 100 || status > 599 {
|
||||
status = http.StatusOK
|
||||
}
|
||||
w.WriteHeader(status)
|
||||
_, _ = w.Write(payload.Body)
|
||||
}
|
||||
|
||||
func cloneSafeHeaders(headers http.Header) http.Header {
|
||||
out := http.Header{}
|
||||
for key, values := range headers {
|
||||
if !safeRequestHeader(key) {
|
||||
continue
|
||||
}
|
||||
for _, value := range values {
|
||||
out.Add(key, value)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func safeRequestHeader(key string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(key)) {
|
||||
case "authorization", "cookie", "set-cookie", "x-rap-service-channel-token":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func safeResponseHeader(key string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(key)) {
|
||||
case "set-cookie", "transfer-encoding", "connection":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func contains(values []string, needle string) bool {
|
||||
for _, value := range values {
|
||||
if value == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,206 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHTTPHandlerRedirectsToHTTPS(t *testing.T) {
|
||||
runtime := Runtime{Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform"}}
|
||||
req := httptest.NewRequest(http.MethodGet, "http://admin.example.test/cluster-admin/dashboard?x=1", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusPermanentRedirect {
|
||||
t.Fatalf("status = %d", rec.Code)
|
||||
}
|
||||
if rec.Header().Get("Location") != "https://admin.example.test/cluster-admin/dashboard?x=1" {
|
||||
t.Fatalf("Location = %q", rec.Header().Get("Location"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerBlocksUnknownServiceClass(t *testing.T) {
|
||||
runtime := Runtime{
|
||||
Config: RuntimeConfig{
|
||||
ServiceType: "public-ingress",
|
||||
Scope: "organization",
|
||||
ServiceClasses: []string{"organization_portal", "user_portal"},
|
||||
},
|
||||
Now: fixedNow,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodGet, "https://org.example.test/platform-admin/root", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusForbidden {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var payload Response
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if payload.Reason != "service_class_not_allowed" || payload.ServiceClass != "platform_admin" || payload.Scope != "organization" {
|
||||
t.Fatalf("payload = %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerRequiresFabricServiceChannelBinding(t *testing.T) {
|
||||
runtime := Runtime{
|
||||
Config: RuntimeConfig{
|
||||
ServiceType: "admin-ingress",
|
||||
Scope: "platform",
|
||||
ServiceClasses: []string{"platform_admin", "cluster_admin"},
|
||||
},
|
||||
Now: fixedNow,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var payload Response
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if payload.Reason != "fabric_service_channel_binding_not_implemented" ||
|
||||
payload.ServiceClass != "platform_admin" ||
|
||||
payload.ObservedAt != "2026-05-17T00:00:00Z" {
|
||||
t.Fatalf("payload = %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerForwardsAllowedRequestToBinder(t *testing.T) {
|
||||
binder := &recordingBinder{
|
||||
response: FabricResponse{
|
||||
StatusCode: http.StatusAccepted,
|
||||
Headers: http.Header{"X-RAP-Result": []string{"accepted"}},
|
||||
Body: []byte(`{"ok":true}`),
|
||||
},
|
||||
}
|
||||
runtime := Runtime{
|
||||
Config: RuntimeConfig{
|
||||
ServiceType: "admin-ingress",
|
||||
Scope: "platform",
|
||||
ServiceClasses: []string{"platform_admin", "cluster_admin"},
|
||||
},
|
||||
Binder: binder,
|
||||
Now: fixedNow,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root?tab=nodes", strings.NewReader(`{"hello":"world"}`))
|
||||
req.Header.Set("X-RAP-Service-Class", "platform_admin")
|
||||
req.Header.Set("Authorization", "Bearer secret")
|
||||
req.Header.Set("X-Trace-ID", "trace-1")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusAccepted {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if rec.Header().Get("X-RAP-Result") != "accepted" || rec.Body.String() != `{"ok":true}` {
|
||||
t.Fatalf("unexpected response headers=%v body=%s", rec.Header(), rec.Body.String())
|
||||
}
|
||||
if binder.request.ServiceClass != "platform_admin" ||
|
||||
binder.request.Scope != "platform" ||
|
||||
binder.request.Path != "/platform-admin/root" ||
|
||||
binder.request.Query != "tab=nodes" ||
|
||||
string(binder.request.Body) != `{"hello":"world"}` {
|
||||
t.Fatalf("request = %+v", binder.request)
|
||||
}
|
||||
if binder.request.Headers.Get("Authorization") != "" || binder.request.Headers.Get("X-Trace-ID") != "trace-1" {
|
||||
t.Fatalf("headers = %#v", binder.request.Headers)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerDerivesFabricScopeFromServiceClass(t *testing.T) {
|
||||
binder := &recordingBinder{response: FabricResponse{StatusCode: http.StatusOK}}
|
||||
runtime := Runtime{
|
||||
Config: RuntimeConfig{
|
||||
ServiceType: "admin-ingress",
|
||||
Scope: "platform",
|
||||
ServiceClasses: []string{"platform_admin", "cluster_admin"},
|
||||
},
|
||||
Binder: binder,
|
||||
Now: fixedNow,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodGet, "https://admin.example.test/cluster-admin/ui-manifest", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if binder.request.ServiceClass != "cluster_admin" || binder.request.Scope != "cluster" {
|
||||
t.Fatalf("request = %+v", binder.request)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerReportsBinderFailure(t *testing.T) {
|
||||
runtime := Runtime{
|
||||
Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform", ServiceClasses: []string{"platform_admin"}},
|
||||
Binder: failingBinder{},
|
||||
Now: fixedNow,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "https://admin.example.test/platform-admin/root", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadGateway {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var payload Response
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if payload.Reason != "fabric_service_channel_forward_failed" {
|
||||
t.Fatalf("payload = %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPSHandlerHealth(t *testing.T) {
|
||||
runtime := Runtime{Config: RuntimeConfig{ServiceType: "admin-ingress", Scope: "platform"}, Now: fixedNow}
|
||||
req := httptest.NewRequest(http.MethodGet, "https://admin.example.test/healthz", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
runtime.HTTPSHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func fixedNow() time.Time {
|
||||
return time.Date(2026, 5, 17, 0, 0, 0, 0, time.UTC)
|
||||
}
|
||||
|
||||
type recordingBinder struct {
|
||||
request FabricRequest
|
||||
response FabricResponse
|
||||
}
|
||||
|
||||
func (b *recordingBinder) Forward(_ context.Context, request FabricRequest) (FabricResponse, error) {
|
||||
b.request = request
|
||||
return b.response, nil
|
||||
}
|
||||
|
||||
type failingBinder struct{}
|
||||
|
||||
func (failingBinder) Forward(context.Context, FabricRequest) (FabricResponse, error) {
|
||||
return FabricResponse{}, errTestBinderFailure{}
|
||||
}
|
||||
|
||||
type errTestBinderFailure struct{}
|
||||
|
||||
func (errTestBinderFailure) Error() string { return "binder failed" }
|
||||
@@ -0,0 +1,95 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var ErrFabricEnvelopeSigningKeyInvalid = errors.New("web ingress fabric envelope signing key invalid")
|
||||
|
||||
type Ed25519EnvelopeSigner struct {
|
||||
PrivateKey ed25519.PrivateKey
|
||||
KeyID string
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
func NewEd25519EnvelopeSigner(privateKeyB64, keyID string) (Ed25519EnvelopeSigner, error) {
|
||||
privateKey, err := decodeEd25519PrivateKey(privateKeyB64)
|
||||
if err != nil {
|
||||
return Ed25519EnvelopeSigner{}, err
|
||||
}
|
||||
keyID = strings.TrimSpace(keyID)
|
||||
if keyID == "" {
|
||||
publicKey, ok := privateKey.Public().(ed25519.PublicKey)
|
||||
if !ok {
|
||||
return Ed25519EnvelopeSigner{}, ErrFabricEnvelopeSigningKeyInvalid
|
||||
}
|
||||
keyID = ed25519EnvelopeKeyID(publicKey)
|
||||
}
|
||||
return Ed25519EnvelopeSigner{PrivateKey: privateKey, KeyID: keyID}, nil
|
||||
}
|
||||
|
||||
func (s Ed25519EnvelopeSigner) Sign(_ context.Context, canonical []byte) (FabricEnvelopeSignature, error) {
|
||||
if len(s.PrivateKey) != ed25519.PrivateKeySize {
|
||||
return FabricEnvelopeSignature{}, ErrFabricEnvelopeSigningKeyInvalid
|
||||
}
|
||||
if len(canonical) == 0 {
|
||||
return FabricEnvelopeSignature{}, fmt.Errorf("%w: canonical envelope empty", ErrFabricEnvelopeSigningKeyInvalid)
|
||||
}
|
||||
keyID := strings.TrimSpace(s.KeyID)
|
||||
if keyID == "" {
|
||||
publicKey, ok := s.PrivateKey.Public().(ed25519.PublicKey)
|
||||
if !ok {
|
||||
return FabricEnvelopeSignature{}, ErrFabricEnvelopeSigningKeyInvalid
|
||||
}
|
||||
keyID = ed25519EnvelopeKeyID(publicKey)
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
if s.Now != nil {
|
||||
now = s.Now().UTC()
|
||||
}
|
||||
return FabricEnvelopeSignature{
|
||||
KeyID: keyID,
|
||||
Alg: "ed25519",
|
||||
Signature: base64.StdEncoding.EncodeToString(ed25519.Sign(s.PrivateKey, canonical)),
|
||||
SignedAt: now.Format(time.RFC3339Nano),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func decodeEd25519PrivateKey(value string) (ed25519.PrivateKey, error) {
|
||||
decoded, err := decodeEnvelopeBase64(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: private key must be base64 encoded", ErrFabricEnvelopeSigningKeyInvalid)
|
||||
}
|
||||
if len(decoded) != ed25519.PrivateKeySize {
|
||||
return nil, fmt.Errorf("%w: private key must decode to %d bytes", ErrFabricEnvelopeSigningKeyInvalid, ed25519.PrivateKeySize)
|
||||
}
|
||||
return ed25519.PrivateKey(decoded), nil
|
||||
}
|
||||
|
||||
func decodeEnvelopeBase64(value string) ([]byte, error) {
|
||||
if value == "" {
|
||||
return nil, errors.New("empty base64 value")
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(value)
|
||||
if err == nil {
|
||||
return decoded, nil
|
||||
}
|
||||
decoded, err = base64.RawStdEncoding.DecodeString(value)
|
||||
if err == nil {
|
||||
return decoded, nil
|
||||
}
|
||||
return base64.RawURLEncoding.DecodeString(value)
|
||||
}
|
||||
|
||||
func ed25519EnvelopeKeyID(publicKey ed25519.PublicKey) string {
|
||||
sum := sha256.Sum256(publicKey)
|
||||
return "rap-node-ed25519-" + hex.EncodeToString(sum[:16])
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
package webingress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEd25519EnvelopeSignerSignsCanonicalEnvelope(t *testing.T) {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
signer, err := NewEd25519EnvelopeSigner(base64.StdEncoding.EncodeToString(privateKey), "")
|
||||
if err != nil {
|
||||
t.Fatalf("new signer: %v", err)
|
||||
}
|
||||
signer.Now = fixedEnvelopeNow
|
||||
|
||||
signature, err := signer.Sign(context.Background(), []byte(`{"schema_version":"test"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("sign: %v", err)
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(signature.Signature)
|
||||
if err != nil {
|
||||
t.Fatalf("decode signature: %v", err)
|
||||
}
|
||||
if !ed25519.Verify(publicKey, []byte(`{"schema_version":"test"}`), decoded) {
|
||||
t.Fatal("signature did not verify")
|
||||
}
|
||||
if signature.KeyID != ed25519EnvelopeKeyID(publicKey) ||
|
||||
signature.Alg != "ed25519" ||
|
||||
signature.SignedAt != "2026-05-17T00:00:01Z" {
|
||||
t.Fatalf("signature metadata = %+v", signature)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEd25519EnvelopeSignerUsesExplicitKeyID(t *testing.T) {
|
||||
_, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
signer, err := NewEd25519EnvelopeSigner(base64.RawStdEncoding.EncodeToString(privateKey), "node-explicit")
|
||||
if err != nil {
|
||||
t.Fatalf("new signer: %v", err)
|
||||
}
|
||||
signature, err := signer.Sign(context.Background(), []byte(`{}`))
|
||||
if err != nil {
|
||||
t.Fatalf("sign: %v", err)
|
||||
}
|
||||
if signature.KeyID != "node-explicit" {
|
||||
t.Fatalf("key id = %q", signature.KeyID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEd25519EnvelopeSignerRejectsInvalidKeyAndPayload(t *testing.T) {
|
||||
_, err := NewEd25519EnvelopeSigner("not-base64", "")
|
||||
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
|
||||
t.Fatalf("invalid key error = %v", err)
|
||||
}
|
||||
|
||||
signer := Ed25519EnvelopeSigner{}
|
||||
_, err = signer.Sign(context.Background(), []byte(`{}`))
|
||||
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
|
||||
t.Fatalf("missing key error = %v", err)
|
||||
}
|
||||
|
||||
_, privateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("generate key: %v", err)
|
||||
}
|
||||
signer = Ed25519EnvelopeSigner{PrivateKey: privateKey}
|
||||
_, err = signer.Sign(context.Background(), nil)
|
||||
if !errors.Is(err, ErrFabricEnvelopeSigningKeyInvalid) {
|
||||
t.Fatalf("empty canonical error = %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user