рабочий вариант, но скороть 10 МБит
This commit is contained in:
@@ -1,111 +0,0 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
BaseURL string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
func NewClient(baseURL string) Client {
|
||||
return Client{
|
||||
BaseURL: baseURL,
|
||||
HTTPClient: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) SendHealth(ctx context.Context, message HealthMessage) (HealthAck, error) {
|
||||
payload, err := json.Marshal(message)
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/health", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return HealthAck{}, fmt.Errorf("mesh health rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var ack HealthAck
|
||||
if err := json.NewDecoder(resp.Body).Decode(&ack); err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
return ack, nil
|
||||
}
|
||||
|
||||
func (c Client) SendSynthetic(ctx context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/synthetic/probe", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return SyntheticEnvelope{}, fmt.Errorf("mesh synthetic probe rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var ack SyntheticEnvelope
|
||||
if err := json.NewDecoder(resp.Body).Decode(&ack); err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
return ack, nil
|
||||
}
|
||||
|
||||
func (c Client) SendProduction(ctx context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/forward", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return ProductionForwardResult{}, fmt.Errorf("mesh production forward rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var result ProductionForwardResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -70,7 +70,7 @@ const (
|
||||
FabricServiceChannelReliable = "reliable"
|
||||
FabricServiceChannelDroppable = "droppable"
|
||||
MaxProductionEnvelopePayloadBytes = 4096
|
||||
MaxProductionVPNPacketPayloadBytes = 256 * 1024
|
||||
MaxProductionVPNPacketPayloadBytes = 8 * 1024 * 1024
|
||||
MaxProductionEnvelopeFutureSkew = time.Minute
|
||||
ProductionForwardQUICStreamID = 1
|
||||
WebIngressForwardQUICStreamID = 2
|
||||
@@ -203,22 +203,6 @@ type SyntheticRelayQueueMetrics struct {
|
||||
QueueDepths map[string]int `json:"queue_depths"`
|
||||
}
|
||||
|
||||
type HealthMessage struct {
|
||||
ProtocolVersion string `json:"protocol_version"`
|
||||
From PeerIdentity `json:"from"`
|
||||
To PeerIdentity `json:"to"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
LatencyMs *int `json:"latency_ms,omitempty"`
|
||||
QualityScore *int `json:"quality_score,omitempty"`
|
||||
}
|
||||
|
||||
type HealthAck struct {
|
||||
ProtocolVersion string `json:"protocol_version"`
|
||||
Accepted bool `json:"accepted"`
|
||||
By PeerIdentity `json:"by"`
|
||||
}
|
||||
|
||||
type ProductionEnvelope struct {
|
||||
FabricProtocolVersion string `json:"fabric_protocol_version"`
|
||||
MessageID string `json:"message_id"`
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -9,6 +10,9 @@ import (
|
||||
type EndpointCandidateScoreOptions struct {
|
||||
ChannelClass string
|
||||
PreferredRegion string
|
||||
SiteID string
|
||||
LocalityGroupID string
|
||||
LocalNATGroupID string
|
||||
Now time.Time
|
||||
MaxVerificationAge time.Duration
|
||||
Observations map[string]EndpointCandidateHealthObservation
|
||||
@@ -21,6 +25,7 @@ type EndpointCandidateHealthObservation struct {
|
||||
EndpointID string `json:"endpoint_id"`
|
||||
Source string `json:"source,omitempty"`
|
||||
ReporterNodeID string `json:"reporter_node_id,omitempty"`
|
||||
ReporterRegion string `json:"reporter_region,omitempty"`
|
||||
LastLatencyMs int64 `json:"last_latency_ms,omitempty"`
|
||||
SuccessCount uint64 `json:"success_count,omitempty"`
|
||||
FailureCount uint64 `json:"failure_count,omitempty"`
|
||||
@@ -114,6 +119,9 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
||||
case "direct":
|
||||
score += 30
|
||||
reasons = append(reasons, "connectivity:direct")
|
||||
case "private_lan":
|
||||
score += 36
|
||||
reasons = append(reasons, "connectivity:private_lan")
|
||||
case "outbound_only":
|
||||
score += 5
|
||||
reasons = append(reasons, "connectivity:outbound_only")
|
||||
@@ -167,6 +175,7 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
||||
score += 18
|
||||
reasons = append(reasons, "policy:private-lan")
|
||||
}
|
||||
score, reasons = applyLocalityPreferences(candidate, opts, score, reasons)
|
||||
if hasPolicyTag(candidate.PolicyTags, "costly") {
|
||||
score -= 10
|
||||
reasons = append(reasons, "policy:costly")
|
||||
@@ -193,7 +202,7 @@ func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCa
|
||||
}
|
||||
}
|
||||
if observation, ok := opts.Observations[candidate.EndpointID]; ok {
|
||||
observationScore, observationReasons := scoreEndpointCandidateObservation(observation, opts)
|
||||
observationScore, observationReasons := scoreEndpointCandidateObservation(candidate, observation, opts)
|
||||
score += observationScore
|
||||
reasons = append(reasons, observationReasons...)
|
||||
}
|
||||
@@ -225,7 +234,7 @@ func scoreEndpointCandidateCapacityPressure(pressure EndpointCandidateCapacityPr
|
||||
return -penalty, []string{"capacity:pressure"}
|
||||
}
|
||||
|
||||
func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObservation, opts EndpointCandidateScoreOptions) (int, []string) {
|
||||
func scoreEndpointCandidateObservation(candidate PeerEndpointCandidate, observation EndpointCandidateHealthObservation, opts EndpointCandidateScoreOptions) (int, []string) {
|
||||
score := 0
|
||||
reasons := []string{"observation:present"}
|
||||
if !opts.Now.IsZero() && !observation.ObservedAt.IsZero() && opts.MaxObservationAge > 0 {
|
||||
@@ -236,6 +245,18 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
|
||||
score += 6
|
||||
reasons = append(reasons, "observation:fresh")
|
||||
}
|
||||
observationScope := endpointCandidateObservationScope(candidate, observation, opts)
|
||||
if observationScope != "" {
|
||||
reasons = append(reasons, "observation_scope:"+observationScope)
|
||||
}
|
||||
if endpointRequiresExternalNetworkVerification(candidate) && (observationScope == "self" || observationScope == "same_area") {
|
||||
reasons = append(reasons, "observation:non_authoritative_same_area_public")
|
||||
if strings.TrimSpace(observation.LastFailureReason) == "capacity_limited" {
|
||||
score -= 4
|
||||
reasons = append(reasons, "capacity:limited")
|
||||
}
|
||||
return score, reasons
|
||||
}
|
||||
switch {
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
|
||||
score += 24
|
||||
@@ -286,6 +307,118 @@ func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObserv
|
||||
return score, reasons
|
||||
}
|
||||
|
||||
func endpointCandidateObservationScope(candidate PeerEndpointCandidate, observation EndpointCandidateHealthObservation, opts EndpointCandidateScoreOptions) string {
|
||||
if strings.TrimSpace(observation.ReporterNodeID) != "" &&
|
||||
strings.TrimSpace(candidate.NodeID) != "" &&
|
||||
strings.EqualFold(strings.TrimSpace(observation.ReporterNodeID), strings.TrimSpace(candidate.NodeID)) {
|
||||
return "self"
|
||||
}
|
||||
reporterRegion := strings.TrimSpace(observation.ReporterRegion)
|
||||
if reporterRegion == "" && strings.EqualFold(strings.TrimSpace(observation.Source), "local_vpn_fabric_session") {
|
||||
reporterRegion = strings.TrimSpace(opts.PreferredRegion)
|
||||
}
|
||||
candidateRegion := strings.TrimSpace(candidate.Region)
|
||||
if reporterRegion == "" || candidateRegion == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.EqualFold(reporterRegion, candidateRegion) {
|
||||
return "same_area"
|
||||
}
|
||||
return "cross_area"
|
||||
}
|
||||
|
||||
func endpointRequiresExternalNetworkVerification(candidate PeerEndpointCandidate) bool {
|
||||
if !strings.EqualFold(strings.TrimSpace(candidate.Reachability), "public") {
|
||||
return false
|
||||
}
|
||||
if len(candidate.Metadata) == 0 || !json.Valid(candidate.Metadata) {
|
||||
return false
|
||||
}
|
||||
var metadata struct {
|
||||
VerificationScope string `json:"verification_scope,omitempty"`
|
||||
}
|
||||
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(metadata.VerificationScope), "external-network-required")
|
||||
}
|
||||
|
||||
func applyLocalityPreferences(candidate PeerEndpointCandidate, opts EndpointCandidateScoreOptions, score int, reasons []string) (int, []string) {
|
||||
locality := endpointCandidateLocality(candidate, opts)
|
||||
switch locality {
|
||||
case "local_segment":
|
||||
score += 65
|
||||
reasons = append(reasons, "locality:local_segment")
|
||||
case "same_nat":
|
||||
score += 45
|
||||
reasons = append(reasons, "locality:same_nat")
|
||||
case "private_scoped":
|
||||
score += 20
|
||||
reasons = append(reasons, "locality:private_scoped")
|
||||
case "private_unscoped":
|
||||
score -= 35
|
||||
reasons = append(reasons, "locality:private_unscoped")
|
||||
case "private_foreign":
|
||||
score -= 90
|
||||
reasons = append(reasons, "locality:private_foreign")
|
||||
case "public_fallback":
|
||||
score -= 5
|
||||
reasons = append(reasons, "locality:public_fallback")
|
||||
}
|
||||
return score, reasons
|
||||
}
|
||||
|
||||
func endpointCandidateLocality(candidate PeerEndpointCandidate, opts EndpointCandidateScoreOptions) string {
|
||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||
isPrivate := reachability == "private" || connectivity == "private_lan" || endpointHasPrivateHost(candidate.Address)
|
||||
if !isPrivate {
|
||||
if reachability == "public" && endpointRequiresExternalNetworkVerification(candidate) {
|
||||
return "public_fallback"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
metadata := decodeEndpointCandidateLocalityMetadata(candidate.Metadata)
|
||||
localityGroupID := strings.TrimSpace(opts.LocalityGroupID)
|
||||
if localityGroupID != "" && strings.TrimSpace(metadata.LocalityGroupID) != "" &&
|
||||
strings.EqualFold(strings.TrimSpace(metadata.LocalityGroupID), localityGroupID) {
|
||||
return "local_segment"
|
||||
}
|
||||
if opts.LocalNATGroupID != "" && metadata.NATGroupID != "" && strings.EqualFold(metadata.NATGroupID, strings.TrimSpace(opts.LocalNATGroupID)) {
|
||||
return "same_nat"
|
||||
}
|
||||
if strings.TrimSpace(opts.SiteID) != "" && metadata.SiteID != "" && strings.EqualFold(metadata.SiteID, strings.TrimSpace(opts.SiteID)) {
|
||||
return "private_scoped"
|
||||
}
|
||||
if hasPolicyTag(candidate.PolicyTags, "private-lan") || hasPolicyTag(candidate.PolicyTags, "corp-lan") || hasPolicyTag(candidate.PolicyTags, "same-site") {
|
||||
return "private_scoped"
|
||||
}
|
||||
if metadata.LocalityGroupID != "" || metadata.SiteID != "" || metadata.NATGroupID != "" {
|
||||
return "private_foreign"
|
||||
}
|
||||
return "private_unscoped"
|
||||
}
|
||||
|
||||
type endpointCandidateLocalityMetadata struct {
|
||||
SiteID string `json:"site_id,omitempty"`
|
||||
LocalityGroupID string `json:"locality_group_id,omitempty"`
|
||||
NATGroupID string `json:"nat_group_id,omitempty"`
|
||||
}
|
||||
|
||||
func decodeEndpointCandidateLocalityMetadata(raw json.RawMessage) endpointCandidateLocalityMetadata {
|
||||
if len(raw) == 0 || !json.Valid(raw) {
|
||||
return endpointCandidateLocalityMetadata{}
|
||||
}
|
||||
var metadata endpointCandidateLocalityMetadata
|
||||
if err := json.Unmarshal(raw, &metadata); err != nil {
|
||||
return endpointCandidateLocalityMetadata{}
|
||||
}
|
||||
metadata.SiteID = strings.TrimSpace(metadata.SiteID)
|
||||
metadata.LocalityGroupID = strings.TrimSpace(metadata.LocalityGroupID)
|
||||
metadata.NATGroupID = strings.TrimSpace(metadata.NATGroupID)
|
||||
return metadata
|
||||
}
|
||||
|
||||
func hasPolicyTag(tags []string, needle string) bool {
|
||||
for _, tag := range tags {
|
||||
if strings.EqualFold(strings.TrimSpace(tag), needle) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -526,6 +527,161 @@ func TestRankPeerEndpointCandidatesSpreadsFreshCapacityPressure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesIgnoresSameAreaPublicVerificationFailures(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 12, 0, 0, 0, time.UTC)
|
||||
candidate := PeerEndpointCandidate{
|
||||
EndpointID: "test-1-public",
|
||||
NodeID: "test-1",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19191",
|
||||
Reachability: "public",
|
||||
NATType: "port_restricted",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "home-test",
|
||||
Priority: 2,
|
||||
Metadata: json.RawMessage(`{"verification_scope":"external-network-required"}`),
|
||||
}
|
||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{candidate}, EndpointCandidateScoreOptions{
|
||||
PreferredRegion: "home-test",
|
||||
Now: now,
|
||||
MaxObservationAge: time.Minute,
|
||||
Observations: map[string]EndpointCandidateHealthObservation{
|
||||
"test-1-public": {
|
||||
EndpointID: "test-1-public",
|
||||
ReporterNodeID: "home-1",
|
||||
ReporterRegion: "home-test",
|
||||
FailureCount: 4,
|
||||
LastFailureReason: "context_deadline_exceeded",
|
||||
ReliabilityScore: 20,
|
||||
ObservedAt: now,
|
||||
},
|
||||
},
|
||||
})
|
||||
if len(ranked) != 1 {
|
||||
t.Fatalf("ranked length = %d, want 1", len(ranked))
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "observation:non_authoritative_same_area_public") {
|
||||
t.Fatalf("same-area public observation should be non-authoritative: %+v", ranked[0].Reasons)
|
||||
}
|
||||
if containsReason(ranked[0].Reasons, "history:failure") || containsReason(ranked[0].Reasons, "failure:recent") {
|
||||
t.Fatalf("same-area public failures should not demote candidate: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesUsesCrossAreaPublicVerificationFailures(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 12, 0, 0, 0, time.UTC)
|
||||
candidate := PeerEndpointCandidate{
|
||||
EndpointID: "test-1-public",
|
||||
NodeID: "test-1",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19191",
|
||||
Reachability: "public",
|
||||
NATType: "port_restricted",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "home-test",
|
||||
Priority: 2,
|
||||
Metadata: json.RawMessage(`{"verification_scope":"external-network-required"}`),
|
||||
}
|
||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{candidate}, EndpointCandidateScoreOptions{
|
||||
PreferredRegion: "usa",
|
||||
Now: now,
|
||||
MaxObservationAge: time.Minute,
|
||||
Observations: map[string]EndpointCandidateHealthObservation{
|
||||
"test-1-public": {
|
||||
EndpointID: "test-1-public",
|
||||
ReporterNodeID: "usa-los-1",
|
||||
ReporterRegion: "usa",
|
||||
FailureCount: 4,
|
||||
LastFailureReason: "context_deadline_exceeded",
|
||||
ReliabilityScore: 20,
|
||||
ObservedAt: now,
|
||||
},
|
||||
},
|
||||
})
|
||||
if len(ranked) != 1 {
|
||||
t.Fatalf("ranked length = %d, want 1", len(ranked))
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "observation_scope:cross_area") {
|
||||
t.Fatalf("cross-area scope missing: %+v", ranked[0].Reasons)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "history:failure") || !containsReason(ranked[0].Reasons, "failure:recent") {
|
||||
t.Fatalf("cross-area public failures should demote candidate: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesPrefersScopedPrivateLANOverPublic(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 13, 0, 0, 0, time.UTC)
|
||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19191",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
NATType: "port_restricted",
|
||||
Priority: 2,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-private",
|
||||
NodeID: "node-b",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://192.168.200.61:19134",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "private_lan",
|
||||
Priority: 1,
|
||||
Metadata: json.RawMessage(`{"locality_group_id":"home-test","nat_group_id":"home-router"}`),
|
||||
},
|
||||
}, EndpointCandidateScoreOptions{
|
||||
PreferredRegion: "home-test",
|
||||
LocalityGroupID: "home-test",
|
||||
LocalNATGroupID: "home-router",
|
||||
Now: now,
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-private" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-private: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "locality:local_segment") {
|
||||
t.Fatalf("missing locality group reason: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesPenalizesForeignPrivateEndpoint(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 13, 0, 0, 0, time.UTC)
|
||||
ranked := RankPeerEndpointCandidates([]PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://94.141.118.222:19191",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 2,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-private-foreign",
|
||||
NodeID: "node-b",
|
||||
Transport: "lan_quic",
|
||||
Address: "quic://10.24.10.20:19443",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "private_lan",
|
||||
Priority: 1,
|
||||
Metadata: json.RawMessage(`{"locality_group_id":"other-site","nat_group_id":"other-nat"}`),
|
||||
},
|
||||
}, EndpointCandidateScoreOptions{
|
||||
PreferredRegion: "home-test",
|
||||
LocalityGroupID: "home-test",
|
||||
LocalNATGroupID: "home-router",
|
||||
Now: now,
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-public" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-public: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if !containsReason(ranked[1].Reasons, "locality:private_foreign") {
|
||||
t.Fatalf("missing foreign private reason: %+v", ranked[1].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func containsReason(reasons []string, reason string) bool {
|
||||
for _, item := range reasons {
|
||||
if item == reason {
|
||||
|
||||
@@ -23,7 +23,7 @@ func FabricTransportTargetFromRegistryEndpoint(endpoint FabricRegistryEndpoint)
|
||||
return FabricTransportTarget{
|
||||
EndpointID: strings.TrimSpace(endpoint.EndpointID),
|
||||
PeerID: strings.TrimSpace(endpoint.EndpointID),
|
||||
Endpoint: strings.TrimSpace(endpoint.Address),
|
||||
Endpoint: fabricControlEndpointAddress(endpoint),
|
||||
Transport: strings.TrimSpace(endpoint.Transport),
|
||||
PeerCertSHA256: strings.TrimSpace(endpoint.PeerCertSHA256),
|
||||
Timeout: 5 * time.Second,
|
||||
@@ -32,6 +32,28 @@ func FabricTransportTargetFromRegistryEndpoint(endpoint FabricRegistryEndpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func fabricControlEndpointAddress(endpoint FabricRegistryEndpoint) string {
|
||||
if mapped := fabricControlMetadataString(endpoint.Metadata, "maps_to"); mapped != "" {
|
||||
if strings.Contains(mapped, "://") {
|
||||
return mapped
|
||||
}
|
||||
return "quic://" + mapped
|
||||
}
|
||||
return strings.TrimSpace(endpoint.Address)
|
||||
}
|
||||
|
||||
func fabricControlMetadataString(raw json.RawMessage, key string) string {
|
||||
if len(raw) == 0 {
|
||||
return ""
|
||||
}
|
||||
var metadata map[string]any
|
||||
if err := json.Unmarshal(raw, &metadata); err != nil {
|
||||
return ""
|
||||
}
|
||||
value, _ := metadata[key].(string)
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func SendFabricControlForward(ctx context.Context, transport FabricTransport, endpoint FabricRegistryEndpoint, payload []byte, timeout time.Duration) (FabricControlForwardResult, error) {
|
||||
if transport == nil {
|
||||
return FabricControlForwardResult{}, fmt.Errorf("fabric control transport is unavailable")
|
||||
|
||||
@@ -137,7 +137,7 @@ type FabricAdjacency struct {
|
||||
PressurePercent int
|
||||
Healthy bool
|
||||
PassiveOutbound bool
|
||||
LocalSegmentID string
|
||||
LocalityGroupID string
|
||||
NATGroupID string
|
||||
LastObservedAt time.Time
|
||||
LastFailureReason string
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
|
||||
)
|
||||
|
||||
func ProbeFabricTarget(ctx context.Context, target FabricTransportTarget) (time.Duration, error) {
|
||||
target.Timeout = positiveDurationOr(target.Timeout, 2*time.Second)
|
||||
target.InboundBuffer = positiveIntOr(target.InboundBuffer, 2)
|
||||
target.ErrorBuffer = positiveIntOr(target.ErrorBuffer, 2)
|
||||
|
||||
transport, normalizedTarget, err := FabricTransportForTarget(target, nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
session, err := transport.Connect(ctx, normalizedTarget)
|
||||
if err != nil {
|
||||
_ = transport.Close()
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
_ = session.Close()
|
||||
_ = transport.Close()
|
||||
}()
|
||||
|
||||
startedAt := time.Now()
|
||||
sequence := uint64(startedAt.UnixNano())
|
||||
if err := session.Send(ctx, fabricproto.Frame{
|
||||
Type: fabricproto.FramePing,
|
||||
TrafficClass: fabricproto.TrafficClassReliable,
|
||||
Sequence: sequence,
|
||||
Payload: []byte("fabric-live-probe"),
|
||||
}); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric live probe session closed")
|
||||
}
|
||||
if frame.Type == fabricproto.FramePong && frame.Sequence == sequence {
|
||||
return time.Since(startedAt), nil
|
||||
}
|
||||
case err, ok := <-session.Errors():
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("fabric live probe error channel closed")
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return 0, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func positiveDurationOr(value time.Duration, fallback time.Duration) time.Duration {
|
||||
if value > 0 {
|
||||
return value
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func positiveIntOr(value int, fallback int) int {
|
||||
if value > 0 {
|
||||
return value
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
@@ -59,7 +59,7 @@ func StartQUICFabricServer(ctx context.Context, cfg QUICFabricServerConfig) (*QU
|
||||
if len(tlsConfig.NextProtos) == 0 {
|
||||
tlsConfig.NextProtos = []string{fabricQUICNextProto}
|
||||
}
|
||||
listener, err := quic.ListenAddr(cfg.ListenAddr, tlsConfig, cfg.QUICConfig)
|
||||
listener, err := quic.ListenAddr(cfg.ListenAddr, tlsConfig, defaultQUICFabricConfig(cfg.QUICConfig))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -132,7 +132,7 @@ func (s *QUICFabricServer) handleConn(ctx context.Context, conn *quic.Conn) {
|
||||
|
||||
func (s *QUICFabricServer) handleStream(ctx context.Context, conn *quic.Conn, stream *quic.Stream) {
|
||||
session := fabricproto.NewSession(fabricproto.SessionConfig{})
|
||||
sender := quicStreamFrameSender{stream: stream}
|
||||
sender := &quicStreamFrameSender{stream: stream}
|
||||
defer func() { _ = stream.Close() }()
|
||||
s.logFabricSession(FabricSessionEventLogEntry{
|
||||
Event: "fabric_session_quic_stream_opened",
|
||||
@@ -207,7 +207,7 @@ type quicStreamFrameSender struct {
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (s quicStreamFrameSender) SendFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||
func (s *quicStreamFrameSender) SendFrame(ctx context.Context, frame fabricproto.Frame) error {
|
||||
if s.stream == nil {
|
||||
return fmt.Errorf("quic fabric stream is closed")
|
||||
}
|
||||
|
||||
@@ -22,6 +22,9 @@ const fabricQUICNextProto = "rap-fabric-data-session-v1"
|
||||
const fabricQUICReverseHelloPrefix = "rap-fabric-reverse-hello-v1:"
|
||||
const defaultQUICFabricConnIdleTTL = 5 * time.Minute
|
||||
const defaultQUICFabricMaxStreamsPerConn = 64
|
||||
const defaultQUICFabricHandshakeIdleTimeout = 8 * time.Second
|
||||
const defaultQUICFabricMaxIdleTimeout = 90 * time.Second
|
||||
const defaultQUICFabricKeepAlivePeriod = 15 * time.Second
|
||||
const ErrQUICFabricStreamLimitReached = quicFabricError("quic fabric stream limit reached")
|
||||
|
||||
type quicFabricError string
|
||||
@@ -31,20 +34,20 @@ func (e quicFabricError) Error() string {
|
||||
}
|
||||
|
||||
type QUICFabricTransport struct {
|
||||
Config *quic.Config
|
||||
LocalPeerID string
|
||||
IdleTTL time.Duration
|
||||
MaxStreamsPerConn int
|
||||
DialAddr func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error)
|
||||
mu sync.Mutex
|
||||
conns map[string]*quicFabricConnEntry
|
||||
reverseConns map[string]*quicFabricConnEntry
|
||||
inboundProductionHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||
inboundWebIngressHandler func(context.Context, []byte) ([]byte, error)
|
||||
Config *quic.Config
|
||||
LocalPeerID string
|
||||
IdleTTL time.Duration
|
||||
MaxStreamsPerConn int
|
||||
DialAddr func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error)
|
||||
mu sync.Mutex
|
||||
conns map[string]*quicFabricConnEntry
|
||||
reverseConns map[string]*quicFabricConnEntry
|
||||
inboundProductionHandler func(context.Context, ProductionEnvelope) (ProductionForwardResult, error)
|
||||
inboundWebIngressHandler func(context.Context, []byte) ([]byte, error)
|
||||
inboundFabricControlHandler func(context.Context, []byte) ([]byte, error)
|
||||
inboundSyntheticHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||
logger FabricSessionEventLogger
|
||||
stats QUICFabricTransportStats
|
||||
inboundSyntheticHandler func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error)
|
||||
logger FabricSessionEventLogger
|
||||
stats QUICFabricTransportStats
|
||||
}
|
||||
|
||||
type QUICFabricTransportStats struct {
|
||||
@@ -109,7 +112,25 @@ type quicFabricConnEntry struct {
|
||||
}
|
||||
|
||||
func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
|
||||
return &QUICFabricTransport{Config: config, IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}, reverseConns: map[string]*quicFabricConnEntry{}}
|
||||
return &QUICFabricTransport{Config: defaultQUICFabricConfig(config), IdleTTL: defaultQUICFabricConnIdleTTL, MaxStreamsPerConn: defaultQUICFabricMaxStreamsPerConn, conns: map[string]*quicFabricConnEntry{}, reverseConns: map[string]*quicFabricConnEntry{}}
|
||||
}
|
||||
|
||||
func defaultQUICFabricConfig(config *quic.Config) *quic.Config {
|
||||
out := &quic.Config{}
|
||||
if config != nil {
|
||||
clone := *config
|
||||
out = &clone
|
||||
}
|
||||
if out.HandshakeIdleTimeout <= 0 {
|
||||
out.HandshakeIdleTimeout = defaultQUICFabricHandshakeIdleTimeout
|
||||
}
|
||||
if out.MaxIdleTimeout <= 0 {
|
||||
out.MaxIdleTimeout = defaultQUICFabricMaxIdleTimeout
|
||||
}
|
||||
if out.KeepAlivePeriod <= 0 {
|
||||
out.KeepAlivePeriod = defaultQUICFabricKeepAlivePeriod
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) SetInboundHandlers(production func(context.Context, ProductionEnvelope) (ProductionForwardResult, error), synthetic func(context.Context, SyntheticEnvelope) (SyntheticEnvelope, error), logger FabricSessionEventLogger) {
|
||||
@@ -150,6 +171,7 @@ func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
|
||||
expectedFingerprint := normalizeCertSHA256(target.PeerCertSHA256)
|
||||
config := &tls.Config{NextProtos: []string{fabricQUICNextProto}}
|
||||
if expectedFingerprint == "" {
|
||||
config.InsecureSkipVerify = true
|
||||
return config
|
||||
}
|
||||
config.InsecureSkipVerify = true
|
||||
@@ -198,9 +220,12 @@ func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTranspor
|
||||
stream, err := conn.OpenStreamSync(ctx)
|
||||
if err != nil {
|
||||
t.releaseStream(connKey)
|
||||
t.evictConnByKey(connKey, conn)
|
||||
t.evictConn(target, conn)
|
||||
if closeConn {
|
||||
_ = conn.CloseWithError(1, "open stream failed")
|
||||
} else {
|
||||
_ = conn.CloseWithError(1, "cached stream open failed")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
@@ -680,8 +705,28 @@ func (t *QUICFabricTransport) evictConn(target FabricTransportTarget, conn *quic
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) evictConnByKey(key string, conn *quic.Conn) {
|
||||
if t == nil || key == "" || conn == nil {
|
||||
return
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if strings.HasPrefix(key, "reverse\x00") {
|
||||
peerID := strings.TrimPrefix(key, "reverse\x00")
|
||||
if entry := t.reverseConns[peerID]; entry != nil && entry.conn == conn {
|
||||
delete(t.reverseConns, peerID)
|
||||
t.stats.ClosedEvicted++
|
||||
}
|
||||
return
|
||||
}
|
||||
if entry := t.conns[key]; entry != nil && entry.conn == conn {
|
||||
delete(t.conns, key)
|
||||
t.stats.ClosedEvicted++
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QUICFabricTransport) pruneIdleLocked(now time.Time) {
|
||||
if t == nil || len(t.conns) == 0 {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
ttl := t.IdleTTL
|
||||
@@ -897,7 +942,13 @@ func (s *quicFabricSession) Send(ctx context.Context, frame fabricproto.Frame) e
|
||||
s.writeMu.Lock()
|
||||
defer s.writeMu.Unlock()
|
||||
s.applyWriteDeadline(ctx)
|
||||
return fabricproto.WriteFrame(s.stream, frame)
|
||||
if err := fabricproto.WriteFrame(s.stream, frame); err != nil {
|
||||
if s.transport != nil && s.conn != nil {
|
||||
s.transport.evictConnByKey(s.connKey, s.conn)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *quicFabricSession) Frames() <-chan fabricproto.Frame {
|
||||
|
||||
@@ -21,7 +21,7 @@ const (
|
||||
type FabricRoutePlannerConfig struct {
|
||||
ClusterID string
|
||||
LocalNodeID string
|
||||
LocalSegmentID string
|
||||
LocalityGroupID string
|
||||
LocalNATGroupID string
|
||||
DefaultCapacity int
|
||||
RelayCapacity int
|
||||
@@ -34,13 +34,13 @@ type FabricRoutePlannerConfig struct {
|
||||
}
|
||||
|
||||
type FabricCandidateMetadata struct {
|
||||
LocalSegmentID string `json:"local_segment_id,omitempty"`
|
||||
NATGroupID string `json:"nat_group_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
ViaNodeID string `json:"via_node_id,omitempty"`
|
||||
STUNServer string `json:"stun_server,omitempty"`
|
||||
ICEFoundation string `json:"ice_foundation,omitempty"`
|
||||
LocalityGroupID string `json:"locality_group_id,omitempty"`
|
||||
NATGroupID string `json:"nat_group_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
ViaNodeID string `json:"via_node_id,omitempty"`
|
||||
STUNServer string `json:"stun_server,omitempty"`
|
||||
ICEFoundation string `json:"ice_foundation,omitempty"`
|
||||
}
|
||||
|
||||
func FabricRouteSetForPeerEndpointCandidates(targetNodeID string, candidates []PeerEndpointCandidate, cfg FabricRoutePlannerConfig) FabricRouteSet {
|
||||
@@ -141,7 +141,7 @@ func fabricRouteModeForPeerEndpointCandidate(candidate PeerEndpointCandidate, me
|
||||
}
|
||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||
if sameLocalSegment(metadata, cfg) || sameNATGroup(metadata, cfg) {
|
||||
if sameLocalityGroup(metadata, cfg) || sameNATGroup(metadata, cfg) {
|
||||
return FabricRouteLAN
|
||||
}
|
||||
if reachability == FabricCandidateReachabilityRelay || connectivity == FabricConnectivityRelayRequired || strings.TrimSpace(metadata.RelayEndpoint) != "" {
|
||||
@@ -240,12 +240,12 @@ func candidatePressureCount(endpointID string, cfg FabricRoutePlannerConfig) int
|
||||
return 0
|
||||
}
|
||||
|
||||
func sameLocalSegment(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||
localSegment := strings.TrimSpace(cfg.LocalSegmentID)
|
||||
if localSegment == "" {
|
||||
func sameLocalityGroup(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||
localityGroup := strings.TrimSpace(cfg.LocalityGroupID)
|
||||
if localityGroup == "" {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(metadata.LocalSegmentID), localSegment)
|
||||
return strings.EqualFold(strings.TrimSpace(metadata.LocalityGroupID), localityGroup)
|
||||
}
|
||||
|
||||
func sameNATGroup(metadata FabricCandidateMetadata, cfg FabricRoutePlannerConfig) bool {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
)
|
||||
|
||||
func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{LocalSegmentID: "site-a", NATGroupID: "nat-a"})
|
||||
metadata, _ := json.Marshal(FabricCandidateMetadata{LocalityGroupID: "home-lan", NATGroupID: "nat-a"})
|
||||
routeSet := FabricRouteSetForPeerEndpointCandidates("node-b", []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
@@ -31,7 +31,7 @@ func TestFabricRouteSetForPeerEndpointCandidatesPrefersLocalLAN(t *testing.T) {
|
||||
}, FabricRoutePlannerConfig{
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
LocalSegmentID: "site-a",
|
||||
LocalityGroupID: "home-lan",
|
||||
DefaultCapacity: 200,
|
||||
Now: time.Unix(100, 0).UTC(),
|
||||
})
|
||||
@@ -172,7 +172,7 @@ func TestFabricRouteSetForPeerEndpointCandidatesRejectsNonQUIC(t *testing.T) {
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-legacy-relay",
|
||||
EndpointID: "node-b-compat-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "quic://node-r:19443",
|
||||
@@ -180,7 +180,7 @@ func TestFabricRouteSetForPeerEndpointCandidatesRejectsNonQUIC(t *testing.T) {
|
||||
ConnectivityMode: "relay_required",
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-legacy-reverse",
|
||||
EndpointID: "node-b-compat-reverse",
|
||||
NodeID: "node-b",
|
||||
Transport: "outbound_reverse",
|
||||
Address: "quic://node-b:19443",
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -30,7 +29,6 @@ type FabricTransportTarget struct {
|
||||
Endpoint string
|
||||
Transport string
|
||||
Token string
|
||||
Header http.Header
|
||||
TLSConfig *tls.Config
|
||||
PeerCertSHA256 string
|
||||
Timeout time.Duration
|
||||
|
||||
@@ -11,6 +11,8 @@ const DefaultWarmPeerLimit = 8
|
||||
|
||||
type PeerCacheConfig struct {
|
||||
Local PeerIdentity
|
||||
LocalityGroupID string
|
||||
LocalNATGroupID string
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||
PeerEndpointObservations map[string]EndpointCandidateHealthObservation
|
||||
@@ -59,11 +61,12 @@ type PeerCacheEntry struct {
|
||||
BestCandidateScore int `json:"best_candidate_score,omitempty"`
|
||||
BestScoreReasons []string `json:"best_score_reasons,omitempty"`
|
||||
BestPeerCertSHA256 string `json:"best_peer_cert_sha256,omitempty"`
|
||||
PublicIngressCount int `json:"public_ingress_count,omitempty"`
|
||||
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
RelayControl bool `json:"relay_control"`
|
||||
RelayQUIC bool `json:"relay_quic"`
|
||||
}
|
||||
|
||||
type peerCacheBuildEntry struct {
|
||||
@@ -119,6 +122,8 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
scored := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
ChannelClass: SyntheticChannelFabricControl,
|
||||
PreferredRegion: cfg.PreferredRegion,
|
||||
LocalityGroupID: cfg.LocalityGroupID,
|
||||
LocalNATGroupID: cfg.LocalNATGroupID,
|
||||
Now: now,
|
||||
MaxVerificationAge: time.Hour,
|
||||
Observations: cfg.PeerEndpointObservations,
|
||||
@@ -129,6 +134,7 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
for _, scoredCandidate := range scored {
|
||||
entry.EndpointCandidates = append(entry.EndpointCandidates, scoredCandidate.Candidate)
|
||||
}
|
||||
entry.PublicIngressCount = publicIngressCountFromCandidates(entry.EndpointCandidates)
|
||||
entry.BestCandidateID = scored[0].Candidate.EndpointID
|
||||
entry.BestCandidateAddr = scored[0].Candidate.Address
|
||||
entry.BestTransport = scored[0].Candidate.Transport
|
||||
@@ -197,9 +203,9 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
entry.RendezvousLeaseID = lease.LeaseID
|
||||
entry.RelayNodeID = lease.RelayNodeID
|
||||
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
entry.RelayControl = true
|
||||
entry.RelayQUIC = true
|
||||
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_quic"})
|
||||
if useLeaseEndpoint {
|
||||
if localRelay {
|
||||
entry.BestTransport = "reverse_quic"
|
||||
@@ -225,7 +231,7 @@ func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
entry.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
}
|
||||
entry.EndpointCount = maxInt(entry.EndpointCount, 1)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{"relay_control"})
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{"relay_quic"})
|
||||
}
|
||||
}
|
||||
out := make([]peerCacheBuildEntry, 0, len(entries))
|
||||
@@ -334,13 +340,37 @@ func warmPeerPriority(entry peerCacheBuildEntry) int {
|
||||
if entry.bestScore > 0 {
|
||||
score += entry.bestScore
|
||||
}
|
||||
if entry.RelayControl {
|
||||
if entry.RelayQUIC {
|
||||
score += 300
|
||||
}
|
||||
if entry.PublicIngressCount > 0 {
|
||||
score += entry.PublicIngressCount * 75
|
||||
}
|
||||
score += entry.CandidateCount
|
||||
return score
|
||||
}
|
||||
|
||||
func publicIngressCountFromCandidates(candidates []PeerEndpointCandidate) int {
|
||||
if len(candidates) == 0 {
|
||||
return 0
|
||||
}
|
||||
distinct := map[string]struct{}{}
|
||||
for _, candidate := range candidates {
|
||||
if strings.ToLower(strings.TrimSpace(candidate.Reachability)) != "public" {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(strings.TrimSpace(candidate.Transport)), "quic") {
|
||||
continue
|
||||
}
|
||||
address := strings.TrimSpace(candidate.Address)
|
||||
if address == "" {
|
||||
continue
|
||||
}
|
||||
distinct[address] = struct{}{}
|
||||
}
|
||||
return len(distinct)
|
||||
}
|
||||
|
||||
func warmPeerReason(entry peerCacheBuildEntry) string {
|
||||
if entry.adjacentRoutePeer {
|
||||
return "route_adjacent"
|
||||
@@ -348,7 +378,7 @@ func warmPeerReason(entry peerCacheBuildEntry) string {
|
||||
if entry.RecoverySeed {
|
||||
return "recovery_seed"
|
||||
}
|
||||
if entry.RelayControl {
|
||||
if entry.RelayQUIC {
|
||||
return "rendezvous_lease"
|
||||
}
|
||||
if entry.BestCandidateID != "" {
|
||||
|
||||
@@ -98,6 +98,9 @@ func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
||||
if entry.BestCandidateID != "node-b-public" || !entry.Warm {
|
||||
t.Fatalf("unexpected candidate selection: %+v", entry)
|
||||
}
|
||||
if entry.PublicIngressCount != 1 {
|
||||
t.Fatalf("public ingress count = %d, want 1", entry.PublicIngressCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCacheAppliesEndpointHealthObservations(t *testing.T) {
|
||||
@@ -224,3 +227,12 @@ func peerCacheEntryByID(snapshot PeerCacheSnapshot, nodeID string) (PeerCacheEnt
|
||||
}
|
||||
return PeerCacheEntry{}, false
|
||||
}
|
||||
|
||||
func containsString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ const (
|
||||
PeerTransportModeCorporateLAN = "corporate_lan"
|
||||
PeerTransportModeOutboundOnly = "outbound_only"
|
||||
PeerTransportModeRelayRequired = "relay_required"
|
||||
PeerTransportModeRelayControl = "relay_control"
|
||||
PeerTransportModeRelayQUIC = "relay_quic"
|
||||
PeerTransportModeUnknown = "unknown"
|
||||
)
|
||||
|
||||
@@ -44,7 +44,7 @@ type PeerConnectionIntentPlan struct {
|
||||
CorporateLANCount int `json:"corporate_lan_count"`
|
||||
OutboundOnlyCount int `json:"outbound_only_count"`
|
||||
RelayRequiredCount int `json:"relay_required_count"`
|
||||
RelayControlCount int `json:"relay_control_count"`
|
||||
RelayQUICCount int `json:"relay_quic_count"`
|
||||
RendezvousRequiredCount int `json:"rendezvous_required_count"`
|
||||
RendezvousResolvedCount int `json:"rendezvous_resolved_count"`
|
||||
RendezvousLeaseCount int `json:"rendezvous_lease_count"`
|
||||
@@ -113,8 +113,8 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
|
||||
RendezvousLeaseID: entry.RendezvousLeaseID,
|
||||
RelayNodeID: entry.RelayNodeID,
|
||||
RelayEndpoint: entry.RelayEndpoint,
|
||||
RelayCandidate: entry.RelayControl,
|
||||
ControlPlaneOnly: entry.RelayControl,
|
||||
RelayCandidate: entry.RelayQUIC,
|
||||
ControlPlaneOnly: entry.RelayQUIC,
|
||||
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
|
||||
Priority: candidate.Priority,
|
||||
GeneratedAt: now,
|
||||
@@ -163,8 +163,8 @@ func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectio
|
||||
plan.OutboundOnlyCount++
|
||||
case PeerTransportModeRelayRequired:
|
||||
plan.RelayRequiredCount++
|
||||
case PeerTransportModeRelayControl:
|
||||
plan.RelayControlCount++
|
||||
case PeerTransportModeRelayQUIC:
|
||||
plan.RelayQUICCount++
|
||||
}
|
||||
if intent.RequiresRendezvous {
|
||||
plan.RendezvousRequiredCount++
|
||||
@@ -266,7 +266,7 @@ func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLeas
|
||||
} else {
|
||||
intent.Transport = firstNonEmpty(lease.Transport, "relay_quic")
|
||||
}
|
||||
intent.TransportMode = PeerTransportModeRelayControl
|
||||
intent.TransportMode = PeerTransportModeRelayQUIC
|
||||
intent.RequiresRendezvous = false
|
||||
intent.RendezvousResolved = true
|
||||
intent.DirectCandidate = false
|
||||
|
||||
@@ -170,11 +170,11 @@ func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.IntentCount != 1 || plan.RelayControlCount != 1 || plan.RendezvousResolvedCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
if plan.IntentCount != 1 || plan.RelayQUICCount != 1 || plan.RendezvousResolvedCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected relay-control plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.TransportMode != PeerTransportModeRelayControl ||
|
||||
if intent.TransportMode != PeerTransportModeRelayQUIC ||
|
||||
intent.Endpoint != "quic://node-r:19443" ||
|
||||
intent.RelayNodeID != "node-r" ||
|
||||
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
|
||||
@@ -239,7 +239,7 @@ func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.RendezvousResolvedCount != 1 || plan.RelayControlCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
if plan.RendezvousResolvedCount != 1 || plan.RelayQUICCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected reselected plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
|
||||
@@ -3,7 +3,6 @@ package mesh
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -25,7 +24,6 @@ type PeerConnectionManagerConfig struct {
|
||||
PeerCache *PeerCache
|
||||
Tracker *PeerConnectionTracker
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
HTTPClient *http.Client
|
||||
QUICTransport *QUICFabricTransport
|
||||
PreferredRegion string
|
||||
ProbeTimeout time.Duration
|
||||
@@ -37,7 +35,6 @@ type PeerConnectionManager struct {
|
||||
peerCache *PeerCache
|
||||
tracker *PeerConnectionTracker
|
||||
rendezvousLeases []PeerRendezvousLease
|
||||
httpClient *http.Client
|
||||
quicTransport *QUICFabricTransport
|
||||
preferredRegion string
|
||||
probeTimeout time.Duration
|
||||
@@ -60,7 +57,7 @@ type PeerConnectionManagerCycle struct {
|
||||
Skipped int `json:"skipped"`
|
||||
RendezvousRequiredCount int `json:"rendezvous_required_count"`
|
||||
RendezvousResolvedCount int `json:"rendezvous_resolved_count"`
|
||||
RelayControlCount int `json:"relay_control_count"`
|
||||
RelayQUICCount int `json:"relay_quic_count"`
|
||||
RecoveryPlan PeerRecoveryPlan `json:"recovery_plan"`
|
||||
IntentPlan PeerConnectionIntentPlan `json:"intent_plan"`
|
||||
Results []PeerConnectionProbeResult `json:"results,omitempty"`
|
||||
@@ -117,17 +114,6 @@ func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionMa
|
||||
if probeTimeout <= 0 {
|
||||
probeTimeout = DefaultPeerConnectionProbeTimeout
|
||||
}
|
||||
httpClient := cfg.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
MaxIdleConns: 64,
|
||||
MaxIdleConnsPerHost: 8,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
},
|
||||
Timeout: probeTimeout + time.Second,
|
||||
}
|
||||
}
|
||||
now := cfg.Now
|
||||
if now == nil {
|
||||
now = func() time.Time { return time.Now().UTC() }
|
||||
@@ -137,7 +123,6 @@ func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionMa
|
||||
peerCache: cfg.PeerCache,
|
||||
tracker: cfg.Tracker,
|
||||
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
|
||||
httpClient: httpClient,
|
||||
quicTransport: cfg.QUICTransport,
|
||||
preferredRegion: strings.TrimSpace(cfg.PreferredRegion),
|
||||
probeTimeout: probeTimeout,
|
||||
@@ -157,6 +142,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
||||
Connections: m.tracker.Snapshot(),
|
||||
TargetReadyPeers: DefaultStablePeerTarget,
|
||||
MaxProbeCandidates: DefaultRecoveryProbeLimit,
|
||||
PreferredRegion: m.preferredRegion,
|
||||
Now: startedAt,
|
||||
})
|
||||
intentPlan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
@@ -177,7 +163,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
||||
IntentCount: intentPlan.IntentCount,
|
||||
RendezvousRequiredCount: intentPlan.RendezvousRequiredCount,
|
||||
RendezvousResolvedCount: intentPlan.RendezvousResolvedCount,
|
||||
RelayControlCount: intentPlan.RelayControlCount,
|
||||
RelayQUICCount: intentPlan.RelayQUICCount,
|
||||
RecoveryPlan: recoveryPlan,
|
||||
IntentPlan: intentPlan,
|
||||
Results: make([]PeerConnectionProbeResult, 0, len(intentPlan.Intents)),
|
||||
@@ -270,7 +256,7 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
RendezvousLeaseID: intent.RendezvousLeaseID,
|
||||
RelayNodeID: intent.RelayNodeID,
|
||||
RelayEndpoint: intent.RelayEndpoint,
|
||||
RelayControl: intent.RelayCandidate,
|
||||
RelayQUIC: intent.RelayCandidate,
|
||||
BestPeerCertSHA256: firstNonEmpty(intent.BestPeerCertSHA256, cacheEntry.BestPeerCertSHA256),
|
||||
}
|
||||
if intent.RequiresRendezvous {
|
||||
@@ -385,7 +371,7 @@ func peerConnectionProbeTargetNodeID(intent PeerConnectionIntent, localNodeID st
|
||||
func (m *PeerConnectionManager) probePeerTarget(ctx context.Context, probePeer PeerCacheEntry, target PeerIdentity) error {
|
||||
endpoint := strings.TrimRight(strings.TrimSpace(probePeer.Endpoint), "/")
|
||||
transport := strings.TrimSpace(probePeer.BestTransport)
|
||||
if hasLegacyEndpointScheme(endpoint) {
|
||||
if hasUnsupportedEndpointScheme(endpoint) {
|
||||
return fmt.Errorf("non_quic_probe_rejected")
|
||||
}
|
||||
if peerConnectionTargetIsQUIC(transport, endpoint) {
|
||||
@@ -445,7 +431,7 @@ func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCach
|
||||
}
|
||||
add(candidate.EndpointID, candidate.Address, candidate.Transport, candidatePeerCertSHA256(candidate))
|
||||
}
|
||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport, cacheEntry.BestPeerCertSHA256)
|
||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport, intent.BestPeerCertSHA256)
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -455,7 +441,7 @@ func peerConnectionShouldProbeDirectUpgrade(intent PeerConnectionIntent, cacheEn
|
||||
}
|
||||
if strings.TrimSpace(intent.ConnectionState) != PeerConnectionRelayReady &&
|
||||
!intent.RelayCandidate &&
|
||||
strings.TrimSpace(intent.TransportMode) != PeerTransportModeRelayControl {
|
||||
strings.TrimSpace(intent.TransportMode) != PeerTransportModeRelayQUIC {
|
||||
return false
|
||||
}
|
||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||
@@ -509,8 +495,3 @@ func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionSta
|
||||
}
|
||||
return PeerConnectionState{NodeID: nodeID, State: PeerConnectionDisconnected}
|
||||
}
|
||||
|
||||
func (c Client) withHTTPClient(httpClient *http.Client) Client {
|
||||
c.HTTPClient = httpClient
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package mesh
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -90,7 +89,7 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "http://127.0.0.1:1",
|
||||
"node-b": "quic://127.0.0.1:1",
|
||||
},
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
@@ -100,7 +99,6 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
HTTPClient: &http.Client{Timeout: 20 * time.Millisecond},
|
||||
ProbeTimeout: 20 * time.Millisecond,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
@@ -121,7 +119,7 @@ func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testin
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
func TestPeerConnectionManagerProbesRelayQUICLease(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
tlsConfig := testQUICTLSConfig(t)
|
||||
@@ -188,7 +186,7 @@ func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
if cycle.Attempted != 1 ||
|
||||
cycle.Succeeded != 1 ||
|
||||
cycle.Deferred != 0 ||
|
||||
cycle.RelayControlCount != 1 ||
|
||||
cycle.RelayQUICCount != 1 ||
|
||||
cycle.RendezvousResolvedCount != 1 ||
|
||||
cycle.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected relay-control cycle: %+v", cycle)
|
||||
@@ -227,11 +225,11 @@ func TestPeerConnectionProbeTargetsFallsBackToBestPeerCertSHA256(t *testing.T) {
|
||||
BestPeerCertSHA256: "intent-cert",
|
||||
}
|
||||
cacheEntry := PeerCacheEntry{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "cache-cert",
|
||||
BestCandidateID: "node-b-best",
|
||||
BestTransport: "direct_quic",
|
||||
Endpoint: "quic://94.141.118.222:19199",
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "cache-cert",
|
||||
BestCandidateID: "node-b-best",
|
||||
BestTransport: "direct_quic",
|
||||
Endpoint: "quic://94.141.118.222:19199",
|
||||
EndpointCandidates: []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
@@ -259,6 +257,49 @@ func TestPeerConnectionProbeTargetsFallsBackToBestPeerCertSHA256(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsUsesRelayLeaseCertForRelayEndpoint(t *testing.T) {
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: "node-b",
|
||||
BestCandidateID: "lease-node-b-via-node-r",
|
||||
Endpoint: "quic://195.123.240.88:19131",
|
||||
Transport: "relay_quic",
|
||||
BestPeerCertSHA256: "relay-cert",
|
||||
RelayCandidate: true,
|
||||
ConnectionState: PeerConnectionBackoff,
|
||||
}
|
||||
cacheEntry := PeerCacheEntry{
|
||||
NodeID: "node-b",
|
||||
BestPeerCertSHA256: "direct-cert",
|
||||
EndpointCandidates: []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-private",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_quic",
|
||||
Address: "quic://192.168.200.61:19132",
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "private_lan",
|
||||
Priority: 1,
|
||||
Metadata: peerConnectionProbeMetadata(t, "direct-cert"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
targets := peerConnectionProbeTargets(intent, cacheEntry)
|
||||
if len(targets) != 2 {
|
||||
t.Fatalf("target count = %d, want 2", len(targets))
|
||||
}
|
||||
for _, target := range targets {
|
||||
if target.Endpoint != "quic://195.123.240.88:19131" {
|
||||
continue
|
||||
}
|
||||
if target.PeerCertSHA256 != "relay-cert" {
|
||||
t.Fatalf("relay endpoint cert = %q, want relay-cert", target.PeerCertSHA256)
|
||||
}
|
||||
return
|
||||
}
|
||||
t.Fatalf("relay endpoint target not found: %+v", targets)
|
||||
}
|
||||
|
||||
func TestPeerConnectionProbeTargetsUpgradeRelayReadyPeerToDirectQUIC(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
|
||||
@@ -36,7 +36,7 @@ type PeerConnectionState struct {
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
RelayControl bool `json:"relay_control"`
|
||||
RelayQUIC bool `json:"relay_quic"`
|
||||
ConsecutiveSuccesses int `json:"consecutive_successes"`
|
||||
ConsecutiveFailures int `json:"consecutive_failures"`
|
||||
LastLatencyMs int `json:"last_latency_ms,omitempty"`
|
||||
@@ -287,7 +287,7 @@ func (t *PeerConnectionTracker) entry(peer PeerCacheEntry, now time.Time) PeerCo
|
||||
entry.RendezvousLeaseID = peer.RendezvousLeaseID
|
||||
entry.RelayNodeID = peer.RelayNodeID
|
||||
entry.RelayEndpoint = peer.RelayEndpoint
|
||||
entry.RelayControl = peer.RelayControl
|
||||
entry.RelayQUIC = peer.RelayQUIC
|
||||
return entry
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ type PeerRecoveryPlanConfig struct {
|
||||
Connections PeerConnectionSnapshot
|
||||
TargetReadyPeers int
|
||||
MaxProbeCandidates int
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
@@ -42,6 +43,7 @@ type PeerRecoveryPlan struct {
|
||||
type PeerRecoveryCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
Warm bool `json:"warm"`
|
||||
WarmReason string `json:"warm_reason,omitempty"`
|
||||
RecoverySeed bool `json:"recovery_seed"`
|
||||
@@ -57,6 +59,7 @@ type PeerRecoveryCandidate struct {
|
||||
|
||||
type peerRecoveryCandidateBuild struct {
|
||||
PeerRecoveryCandidate
|
||||
PublicIngressCount int
|
||||
}
|
||||
|
||||
func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
@@ -96,6 +99,7 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
ready := 0
|
||||
degraded := 0
|
||||
backoff := 0
|
||||
readyExternalRegions := map[string]struct{}{}
|
||||
for nodeID, connection := range connectionByNode {
|
||||
entry, ok := entryByNode[nodeID]
|
||||
if !ok || strings.TrimSpace(entry.Endpoint) == "" {
|
||||
@@ -104,6 +108,10 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
switch connection.State {
|
||||
case PeerConnectionReady:
|
||||
ready++
|
||||
region := strings.TrimSpace(entry.BestRegion)
|
||||
if region != "" && (strings.TrimSpace(cfg.PreferredRegion) == "" || !strings.EqualFold(region, cfg.PreferredRegion)) {
|
||||
readyExternalRegions[strings.ToLower(region)] = struct{}{}
|
||||
}
|
||||
case PeerConnectionRelayReady:
|
||||
// Relay-ready peers remain valuable for control-plane reachability,
|
||||
// but they do not satisfy the target for direct-ready transport paths.
|
||||
@@ -125,6 +133,7 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
if mode == PeerRecoveryModeSteady {
|
||||
limit = target
|
||||
}
|
||||
missingExternalRegions := missingPeerRecoveryExternalRegions(cfg.PeerCache, cfg.PreferredRegion, readyExternalRegions, target)
|
||||
|
||||
candidates := make([]peerRecoveryCandidateBuild, 0, len(cfg.PeerCache.Entries))
|
||||
for _, entry := range cfg.PeerCache.Entries {
|
||||
@@ -138,13 +147,14 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
if connection.State == PeerConnectionBackoff && connection.BackoffUntil.After(now) {
|
||||
continue
|
||||
}
|
||||
reason, ok := peerRecoveryCandidateReason(mode, entry, connection)
|
||||
reason, ok := peerRecoveryCandidateReason(mode, entry, connection, missingExternalRegions, cfg.PreferredRegion)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
candidate := PeerRecoveryCandidate{
|
||||
NodeID: entry.NodeID,
|
||||
Endpoint: strings.TrimSpace(entry.Endpoint),
|
||||
Region: strings.TrimSpace(entry.BestRegion),
|
||||
Warm: entry.Warm,
|
||||
WarmReason: entry.WarmReason,
|
||||
RecoverySeed: entry.RecoverySeed,
|
||||
@@ -155,9 +165,12 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
LastLatencyMs: connection.LastLatencyMs,
|
||||
BackoffUntil: connection.BackoffUntil,
|
||||
Reason: reason,
|
||||
Priority: peerRecoveryCandidatePriority(entry, connection, reason),
|
||||
Priority: peerRecoveryCandidatePriority(entry, connection, reason, cfg.PreferredRegion),
|
||||
}
|
||||
candidates = append(candidates, peerRecoveryCandidateBuild{PeerRecoveryCandidate: candidate})
|
||||
candidates = append(candidates, peerRecoveryCandidateBuild{
|
||||
PeerRecoveryCandidate: candidate,
|
||||
PublicIngressCount: entry.PublicIngressCount,
|
||||
})
|
||||
}
|
||||
sort.SliceStable(candidates, func(i, j int) bool {
|
||||
if candidates[i].Priority != candidates[j].Priority {
|
||||
@@ -166,7 +179,7 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
return candidates[i].NodeID < candidates[j].NodeID
|
||||
})
|
||||
if len(candidates) > limit {
|
||||
candidates = candidates[:limit]
|
||||
candidates = trimPeerRecoveryCandidates(candidates, limit, cfg.PreferredRegion)
|
||||
}
|
||||
|
||||
outCandidates := make([]PeerRecoveryCandidate, 0, len(candidates))
|
||||
@@ -194,11 +207,143 @@ func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
}
|
||||
}
|
||||
|
||||
func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection PeerConnectionState) (string, bool) {
|
||||
func missingPeerRecoveryExternalRegions(snapshot PeerCacheSnapshot, preferredRegion string, readyExternalRegions map[string]struct{}, target int) map[string]struct{} {
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
availableExternalRegions := map[string]struct{}{}
|
||||
for _, entry := range snapshot.Entries {
|
||||
region := strings.TrimSpace(entry.BestRegion)
|
||||
if region == "" {
|
||||
continue
|
||||
}
|
||||
if preferredRegion != "" && strings.EqualFold(region, preferredRegion) {
|
||||
continue
|
||||
}
|
||||
availableExternalRegions[strings.ToLower(region)] = struct{}{}
|
||||
}
|
||||
if len(availableExternalRegions) == 0 {
|
||||
return nil
|
||||
}
|
||||
desiredExternal := len(availableExternalRegions)
|
||||
if desiredExternal > 2 {
|
||||
desiredExternal = 2
|
||||
}
|
||||
if target > 0 && desiredExternal > target {
|
||||
desiredExternal = target
|
||||
}
|
||||
if len(readyExternalRegions) >= desiredExternal {
|
||||
return nil
|
||||
}
|
||||
missing := map[string]struct{}{}
|
||||
for region := range availableExternalRegions {
|
||||
if _, ok := readyExternalRegions[region]; ok {
|
||||
continue
|
||||
}
|
||||
missing[region] = struct{}{}
|
||||
}
|
||||
if len(missing) == 0 {
|
||||
return nil
|
||||
}
|
||||
return missing
|
||||
}
|
||||
|
||||
func trimPeerRecoveryCandidates(candidates []peerRecoveryCandidateBuild, limit int, preferredRegion string) []peerRecoveryCandidateBuild {
|
||||
if len(candidates) <= limit || limit <= 0 {
|
||||
return candidates
|
||||
}
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
externalRegions := map[string]struct{}{}
|
||||
for _, candidate := range candidates {
|
||||
region := strings.TrimSpace(candidate.Region)
|
||||
if region == "" || (preferredRegion != "" && strings.EqualFold(region, preferredRegion)) {
|
||||
continue
|
||||
}
|
||||
externalRegions[strings.ToLower(region)] = struct{}{}
|
||||
}
|
||||
if len(externalRegions) < 2 {
|
||||
return candidates[:limit]
|
||||
}
|
||||
selected := make([]peerRecoveryCandidateBuild, 0, limit)
|
||||
selectedNodeIDs := map[string]struct{}{}
|
||||
selectedRegions := map[string]struct{}{}
|
||||
for _, candidate := range candidates {
|
||||
if len(selected) >= limit {
|
||||
break
|
||||
}
|
||||
region := strings.TrimSpace(candidate.Region)
|
||||
if region == "" || (preferredRegion != "" && strings.EqualFold(region, preferredRegion)) {
|
||||
continue
|
||||
}
|
||||
regionKey := strings.ToLower(region)
|
||||
if _, exists := selectedRegions[regionKey]; exists {
|
||||
continue
|
||||
}
|
||||
selected = append(selected, candidate)
|
||||
selectedNodeIDs[candidate.NodeID] = struct{}{}
|
||||
selectedRegions[regionKey] = struct{}{}
|
||||
}
|
||||
if len(selected) < limit && !selectedHasPublicIngress(selected) {
|
||||
for _, candidate := range candidates {
|
||||
if len(selected) >= limit {
|
||||
break
|
||||
}
|
||||
if _, exists := selectedNodeIDs[candidate.NodeID]; exists {
|
||||
continue
|
||||
}
|
||||
if candidatePublicIngressCount(candidate) <= 0 {
|
||||
continue
|
||||
}
|
||||
selected = append(selected, candidate)
|
||||
selectedNodeIDs[candidate.NodeID] = struct{}{}
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if len(selected) >= limit {
|
||||
break
|
||||
}
|
||||
if _, exists := selectedNodeIDs[candidate.NodeID]; exists {
|
||||
continue
|
||||
}
|
||||
selected = append(selected, candidate)
|
||||
selectedNodeIDs[candidate.NodeID] = struct{}{}
|
||||
}
|
||||
if len(selected) > limit {
|
||||
selected = selected[:limit]
|
||||
}
|
||||
return selected
|
||||
}
|
||||
|
||||
func selectedHasPublicIngress(candidates []peerRecoveryCandidateBuild) bool {
|
||||
for _, candidate := range candidates {
|
||||
if candidatePublicIngressCount(candidate) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func candidatePublicIngressCount(candidate peerRecoveryCandidateBuild) int {
|
||||
return candidate.PublicIngressCount
|
||||
}
|
||||
|
||||
func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection PeerConnectionState, missingExternalRegions map[string]struct{}, preferredRegion string) (string, bool) {
|
||||
if mode == PeerRecoveryModeSteady {
|
||||
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
|
||||
return "maintain_ready", true
|
||||
}
|
||||
region := strings.ToLower(strings.TrimSpace(entry.BestRegion))
|
||||
if region != "" && len(missingExternalRegions) > 0 {
|
||||
if _, ok := missingExternalRegions[region]; ok {
|
||||
if preferredRegion == "" || !strings.EqualFold(strings.TrimSpace(entry.BestRegion), preferredRegion) {
|
||||
if connection.State == PeerConnectionDegraded {
|
||||
return "recover_external_area", true
|
||||
}
|
||||
if entry.Warm || entry.RecoverySeed || connection.State == PeerConnectionDisconnected || connection.State == PeerConnectionConnecting {
|
||||
return "recover_external_area", true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
|
||||
@@ -216,7 +361,7 @@ func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection P
|
||||
return "recover_peer", true
|
||||
}
|
||||
|
||||
func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnectionState, reason string) int {
|
||||
func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnectionState, reason string, preferredRegion string) int {
|
||||
score := 0
|
||||
if entry.Warm {
|
||||
score += 1000
|
||||
@@ -237,6 +382,17 @@ func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnecti
|
||||
if entry.BestCandidateID != "" {
|
||||
score += 150
|
||||
}
|
||||
if entry.PublicIngressCount > 0 {
|
||||
score += entry.PublicIngressCount * 90
|
||||
}
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
entryRegion := strings.TrimSpace(entry.BestRegion)
|
||||
switch {
|
||||
case preferredRegion != "" && entryRegion != "" && !strings.EqualFold(entryRegion, preferredRegion):
|
||||
score += 275
|
||||
case preferredRegion != "" && entryRegion != "" && strings.EqualFold(entryRegion, preferredRegion):
|
||||
score += 25
|
||||
}
|
||||
score += entry.BestCandidateScore / 10
|
||||
switch connection.State {
|
||||
case PeerConnectionReady, PeerConnectionRelayReady:
|
||||
@@ -251,6 +407,8 @@ func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnecti
|
||||
switch reason {
|
||||
case "maintain_ready":
|
||||
score += 500
|
||||
case "recover_external_area":
|
||||
score += 450
|
||||
case "recover_degraded":
|
||||
score += 300
|
||||
case "recover_seed":
|
||||
|
||||
@@ -82,7 +82,7 @@ func TestPeerRecoveryPlanTreatsRelayReadyPeersAsRecoveryGap(t *testing.T) {
|
||||
RendezvousLeaseID: "lease-1",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "quic://relay:19443",
|
||||
RelayControl: true,
|
||||
RelayQUIC: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -121,6 +121,129 @@ func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanPrefersExternalRegionsWhenTrimmingReadyPeers(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-home-a", Endpoint: "quic://node-home-a:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-home-b", Endpoint: "quic://node-home-b:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-usa", Endpoint: "quic://node-usa:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "usa"},
|
||||
{NodeID: "node-ifcm", Endpoint: "quic://node-ifcm:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "ifcm"},
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-home-a", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-home-b", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-usa", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-ifcm", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
}},
|
||||
PreferredRegion: "home",
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if len(plan.Candidates) != DefaultStablePeerTarget {
|
||||
t.Fatalf("candidate count = %d, want %d", len(plan.Candidates), DefaultStablePeerTarget)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-usa", "maintain_ready") || !recoveryPlanHasCandidate(plan, "node-ifcm", "maintain_ready") {
|
||||
t.Fatalf("expected external-region peers to be retained: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanPrefersPublicIngressAtSameRegion(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-home-private-a", Endpoint: "quic://10.0.0.2:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-home-private-b", Endpoint: "quic://10.0.0.3:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-home-public", Endpoint: "quic://94.141.118.222:19199", Warm: true, WarmReason: "route_adjacent", BestRegion: "home", PublicIngressCount: 1},
|
||||
{NodeID: "node-usa", Endpoint: "quic://195.123.240.88:19131", Warm: true, WarmReason: "route_adjacent", BestRegion: "usa", PublicIngressCount: 1},
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-home-private-a", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-home-private-b", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-home-public", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-usa", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
}},
|
||||
PreferredRegion: "home",
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if len(plan.Candidates) != DefaultStablePeerTarget {
|
||||
t.Fatalf("candidate count = %d, want %d", len(plan.Candidates), DefaultStablePeerTarget)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-home-public", "maintain_ready") {
|
||||
t.Fatalf("expected public-ingress home peer to be retained: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanRetainsDistinctExternalRegionsWhenAvailable(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-home-a", Endpoint: "quic://node-home-a:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-home-b", Endpoint: "quic://node-home-b:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-home-c", Endpoint: "quic://node-home-c:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home"},
|
||||
{NodeID: "node-usa-a", Endpoint: "quic://node-usa-a:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "usa", PublicIngressCount: 1},
|
||||
{NodeID: "node-usa-b", Endpoint: "quic://node-usa-b:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "usa", PublicIngressCount: 1},
|
||||
{NodeID: "node-ifcm", Endpoint: "quic://node-ifcm:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "ifcm", PublicIngressCount: 1},
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-home-a", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-home-b", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-home-c", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-usa-a", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-usa-b", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-ifcm", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
}},
|
||||
PreferredRegion: "home",
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if len(plan.Candidates) != DefaultStablePeerTarget {
|
||||
t.Fatalf("candidate count = %d, want %d", len(plan.Candidates), DefaultStablePeerTarget)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-usa-a", "maintain_ready") && !recoveryPlanHasCandidate(plan, "node-usa-b", "maintain_ready") {
|
||||
t.Fatalf("expected at least one usa candidate to be retained: %+v", plan.Candidates)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-ifcm", "maintain_ready") {
|
||||
t.Fatalf("expected ifcm candidate to be retained for area diversity: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanSteadyModeAddsMissingExternalAreaCandidate(t *testing.T) {
|
||||
now := time.Date(2026, 5, 19, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-test-a", Endpoint: "quic://node-test-a:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "test"},
|
||||
{NodeID: "node-test-b", Endpoint: "quic://node-test-b:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "test"},
|
||||
{NodeID: "node-usa", Endpoint: "quic://node-usa:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "usa", PublicIngressCount: 1},
|
||||
{NodeID: "node-home", Endpoint: "quic://node-home:19443", Warm: true, WarmReason: "route_adjacent", BestRegion: "home", PublicIngressCount: 1},
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-test-a", State: PeerConnectionReady, LastLatencyMs: 10},
|
||||
{NodeID: "node-test-b", State: PeerConnectionReady, LastLatencyMs: 10},
|
||||
{NodeID: "node-usa", State: PeerConnectionReady, LastLatencyMs: 10},
|
||||
{NodeID: "node-home", State: PeerConnectionDegraded, LastLatencyMs: 20},
|
||||
}},
|
||||
PreferredRegion: "test",
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if len(plan.Candidates) != DefaultStablePeerTarget {
|
||||
t.Fatalf("candidate count = %d, want %d", len(plan.Candidates), DefaultStablePeerTarget)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-home", "recover_external_area") {
|
||||
t.Fatalf("expected missing external area candidate to be retained: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
|
||||
return PeerCacheEntry{
|
||||
NodeID: nodeID,
|
||||
|
||||
@@ -280,6 +280,9 @@ func (t *QUICProductionForwardTransport) sendProductionOnSession(ctx context.Con
|
||||
return fabricproto.Frame{}, 0, ErrForwardPeerUnavailable
|
||||
}
|
||||
if err != nil {
|
||||
if frame, ok := drainProductionResponseFrame(session, sequence); ok {
|
||||
return frame, time.Since(started).Milliseconds(), nil
|
||||
}
|
||||
return fabricproto.Frame{}, 0, err
|
||||
}
|
||||
case frame, ok := <-session.Frames():
|
||||
@@ -294,6 +297,25 @@ func (t *QUICProductionForwardTransport) sendProductionOnSession(ctx context.Con
|
||||
}
|
||||
}
|
||||
|
||||
func drainProductionResponseFrame(session FabricTransportSession, sequence uint64) (fabricproto.Frame, bool) {
|
||||
if session == nil {
|
||||
return fabricproto.Frame{}, false
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case frame, ok := <-session.Frames():
|
||||
if !ok {
|
||||
return fabricproto.Frame{}, false
|
||||
}
|
||||
if frame.Type == fabricproto.FrameData && frame.StreamID == ProductionForwardQUICStreamID && frame.Sequence == sequence {
|
||||
return frame, true
|
||||
}
|
||||
default:
|
||||
return fabricproto.Frame{}, false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func decodeQUICProductionForwardResponse(payload []byte) (ProductionForwardResult, error) {
|
||||
var response quicProductionForwardResponse
|
||||
if err := json.Unmarshal(payload, &response); err != nil {
|
||||
|
||||
@@ -283,12 +283,28 @@ func (r *FabricRegistry) ResolveService(req FabricRegistryResolveRequest) Fabric
|
||||
return FabricRegistryResolvedService{Found: false, Reason: "service_required"}
|
||||
}
|
||||
scopeOrder := fabricRegistryScopeResolutionOrder(req.Scope, req.OrganizationID)
|
||||
if resolved := r.resolveServiceFromRecords(req, service, scopeOrder, false); resolved.Found || resolved.Reason == "no_usable_endpoints" {
|
||||
return resolved
|
||||
}
|
||||
if resolved := r.resolveServiceFromRecords(req, service, scopeOrder, true); resolved.Found || resolved.Reason == "no_usable_endpoints" {
|
||||
return resolved
|
||||
}
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Reason: "no_active_record"}
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) resolveServiceFromRecords(req FabricRegistryResolveRequest, service string, scopeOrder []string, candidateOnly bool) FabricRegistryResolvedService {
|
||||
for _, scope := range scopeOrder {
|
||||
organizationID := strings.TrimSpace(req.OrganizationID)
|
||||
if scope != FabricRegistryScopeOrganization {
|
||||
organizationID = ""
|
||||
}
|
||||
record, ok := r.Active(req.ClusterID, service, scope, organizationID, req.Now)
|
||||
var record FabricRegistryGossipRecord
|
||||
var ok bool
|
||||
if candidateOnly {
|
||||
record, ok = r.Candidate(req.ClusterID, service, scope, organizationID, req.Now)
|
||||
} else {
|
||||
record, ok = r.Active(req.ClusterID, service, scope, organizationID, req.Now)
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
@@ -306,9 +322,28 @@ func (r *FabricRegistry) ResolveService(req FabricRegistryResolveRequest) Fabric
|
||||
RecordEpoch: record.Epoch,
|
||||
RecordHash: hex.EncodeToString(sum[:]),
|
||||
Endpoints: endpoints,
|
||||
Reason: fabricRegistryResolveReason(candidateOnly),
|
||||
}
|
||||
}
|
||||
return FabricRegistryResolvedService{Found: false, Service: service, Reason: "no_active_record"}
|
||||
return FabricRegistryResolvedService{Found: false, Service: service}
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Candidate(clusterID, service, scope, organizationID string, now time.Time) (FabricRegistryGossipRecord, bool) {
|
||||
if r == nil {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
entry, ok := r.candidates[fabricRegistryKey(clusterID, service, scope, organizationID)]
|
||||
if !ok || entry.State != FabricRegistryCandidate || !entry.Record.ExpiresAt.After(registryNow(now)) {
|
||||
return FabricRegistryGossipRecord{}, false
|
||||
}
|
||||
return entry.Record, true
|
||||
}
|
||||
|
||||
func fabricRegistryResolveReason(candidateOnly bool) string {
|
||||
if candidateOnly {
|
||||
return "candidate_record_pending_live_verification"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *FabricRegistry) Snapshot(now time.Time) FabricRegistrySnapshot {
|
||||
@@ -507,7 +542,7 @@ func validateFabricRegistryGossipRecord(record FabricRegistryGossipRecord, polic
|
||||
if strings.TrimSpace(endpoint.EndpointID) == "" || strings.TrimSpace(endpoint.Address) == "" || strings.TrimSpace(endpoint.Transport) == "" {
|
||||
return fmt.Errorf("fabric registry gossip record contains invalid endpoint")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
if !isQUICOnlyCandidateTransport(endpoint.Transport) || hasUnsupportedEndpointScheme(endpoint.Address) {
|
||||
return fmt.Errorf("fabric registry gossip endpoint must be QUIC-only")
|
||||
}
|
||||
if len(endpoint.Metadata) > 0 && !json.Valid(endpoint.Metadata) {
|
||||
@@ -605,7 +640,7 @@ func selectFabricRegistryEndpoints(endpoints []FabricRegistryEndpoint, preferred
|
||||
preferredRegion = strings.TrimSpace(preferredRegion)
|
||||
out := make([]FabricRegistryEndpoint, 0, len(endpoints))
|
||||
for _, endpoint := range endpoints {
|
||||
if strings.TrimSpace(endpoint.Address) == "" || !isQUICOnlyCandidateTransport(endpoint.Transport) || hasLegacyEndpointScheme(endpoint.Address) {
|
||||
if strings.TrimSpace(endpoint.Address) == "" || !isQUICOnlyCandidateTransport(endpoint.Transport) || hasUnsupportedEndpointScheme(endpoint.Address) {
|
||||
continue
|
||||
}
|
||||
out = append(out, endpoint)
|
||||
@@ -636,16 +671,10 @@ func probeFabricRegistryEndpoint(ctx context.Context, transport FabricTransport,
|
||||
if timeout <= 0 {
|
||||
timeout = 2 * time.Second
|
||||
}
|
||||
target := FabricTransportTarget{
|
||||
EndpointID: endpoint.EndpointID,
|
||||
PeerID: endpoint.EndpointID,
|
||||
Endpoint: endpoint.Address,
|
||||
Transport: endpoint.Transport,
|
||||
PeerCertSHA256: endpoint.PeerCertSHA256,
|
||||
Timeout: timeout,
|
||||
InboundBuffer: 2,
|
||||
ErrorBuffer: 2,
|
||||
}
|
||||
target := FabricTransportTargetFromRegistryEndpoint(endpoint)
|
||||
target.Timeout = timeout
|
||||
target.InboundBuffer = 2
|
||||
target.ErrorBuffer = 2
|
||||
startedAt := time.Now()
|
||||
session, err := transport.Connect(ctx, target)
|
||||
if err != nil {
|
||||
|
||||
@@ -45,7 +45,7 @@ func TestFabricRegistryGossipRecordRequiresTrustedSignature(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricRegistryRejectsLegacyEndpointAndExpiredRecord(t *testing.T) {
|
||||
func TestFabricRegistryRejectsDisallowedEndpointAndExpiredRecord(t *testing.T) {
|
||||
now := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC)
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
@@ -65,7 +65,7 @@ func TestFabricRegistryRejectsLegacyEndpointAndExpiredRecord(t *testing.T) {
|
||||
},
|
||||
Now: now,
|
||||
}); err == nil {
|
||||
t.Fatal("legacy HTTP endpoint was accepted")
|
||||
t.Fatal("compat HTTP endpoint was accepted")
|
||||
}
|
||||
expired := testFabricRegistryGossipRecord(now.Add(-2*time.Hour), 11)
|
||||
expired.ExpiresAt = now.Add(-time.Minute)
|
||||
|
||||
@@ -523,7 +523,7 @@ func (s *RemoteWorkspaceFrameProbeSink) AcceptRemoteWorkspaceFrameBatchProbe(_ c
|
||||
AckedFrames: acceptedFrames,
|
||||
Backpressure: false,
|
||||
DropPolicy: "drop_droppable_overflow_ack_accepted",
|
||||
DeliverySequence: s.sequence,
|
||||
DeliverySequence: uint64(s.sequence),
|
||||
DeliveredAt: now.Format(time.RFC3339Nano),
|
||||
}
|
||||
s.last = receipt
|
||||
@@ -695,6 +695,24 @@ func isValidRemoteWorkspaceAdapterSessionID(adapterSessionID string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func isValidRemoteWorkspaceAdapterMailboxConsumerID(consumerID string) bool {
|
||||
consumerID = strings.TrimSpace(consumerID)
|
||||
if consumerID == "" || len(consumerID) > 128 {
|
||||
return false
|
||||
}
|
||||
for _, ch := range consumerID {
|
||||
switch {
|
||||
case ch >= 'a' && ch <= 'z':
|
||||
case ch >= 'A' && ch <= 'Z':
|
||||
case ch >= '0' && ch <= '9':
|
||||
case ch == '-', ch == '_', ch == '.', ch == ':':
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func actionToAdapterSessionState(action string) string {
|
||||
switch action {
|
||||
case "expire":
|
||||
|
||||
@@ -106,7 +106,7 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
||||
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
|
||||
}
|
||||
if hasLegacyEndpointScheme(endpoint) {
|
||||
if hasUnsupportedEndpointScheme(endpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint")
|
||||
}
|
||||
}
|
||||
@@ -124,7 +124,7 @@ func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
||||
strings.TrimSpace(candidate.ConnectivityMode) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(candidate.Transport) || hasLegacyEndpointScheme(candidate.Address) {
|
||||
if !isQUICOnlyCandidateTransport(candidate.Transport) || hasUnsupportedEndpointScheme(candidate.Address) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC peer endpoint candidate")
|
||||
}
|
||||
}
|
||||
@@ -185,12 +185,12 @@ func validatePeerDirectory(entries []PeerDirectoryEntry, localNodeID string) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasLegacyEndpointScheme(endpoint string) bool {
|
||||
func hasUnsupportedEndpointScheme(endpoint string) bool {
|
||||
endpoint = strings.ToLower(strings.TrimSpace(endpoint))
|
||||
return strings.HasPrefix(endpoint, "http://") ||
|
||||
strings.HasPrefix(endpoint, "https://") ||
|
||||
strings.HasPrefix(endpoint, "ws://") ||
|
||||
strings.HasPrefix(endpoint, "wss://")
|
||||
if endpoint == "" || !strings.Contains(endpoint, "://") {
|
||||
return false
|
||||
}
|
||||
return !strings.HasPrefix(endpoint, "quic://")
|
||||
}
|
||||
|
||||
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||
@@ -205,7 +205,7 @@ func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||
strings.TrimSpace(seed.Transport) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(seed.Transport) || hasLegacyEndpointScheme(seed.Endpoint) {
|
||||
if !isQUICOnlyCandidateTransport(seed.Transport) || hasUnsupportedEndpointScheme(seed.Endpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC recovery seed")
|
||||
}
|
||||
if _, duplicate := seen[key]; duplicate {
|
||||
@@ -241,7 +241,7 @@ func validateRendezvousLeases(leases []PeerRendezvousLease, routes []SyntheticRo
|
||||
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
|
||||
}
|
||||
if !isQUICOnlyCandidateTransport(lease.Transport) || hasLegacyEndpointScheme(lease.RelayEndpoint) {
|
||||
if !isQUICOnlyCandidateTransport(lease.Transport) || hasUnsupportedEndpointScheme(lease.RelayEndpoint) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains non-QUIC rendezvous lease")
|
||||
}
|
||||
if _, duplicate := seen[lease.LeaseID]; duplicate {
|
||||
|
||||
@@ -174,7 +174,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpoint(t *testing.T) {
|
||||
func TestLoadScopedSyntheticConfigRejectsDisallowedPeerEndpoint(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
@@ -189,7 +189,7 @@ func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpoint(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateTransport(t *testing.T) {
|
||||
func TestLoadScopedSyntheticConfigRejectsDisallowedPeerEndpointCandidateTransport(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
@@ -215,7 +215,7 @@ func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateTransport(t
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyPeerEndpointCandidateScheme(t *testing.T) {
|
||||
func TestLoadScopedSyntheticConfigRejectsDisallowedPeerEndpointCandidateScheme(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
@@ -295,7 +295,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyRecoverySeed(t *testing.T) {
|
||||
func TestLoadScopedSyntheticConfigRejectsDisallowedRecoverySeed(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
@@ -337,7 +337,7 @@ func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsLegacyRendezvousLease(t *testing.T) {
|
||||
func TestLoadScopedSyntheticConfigRejectsDisallowedRendezvousLease(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17z12.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,21 @@ import (
|
||||
type VPNPacketBatchPayload struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
VPNConnectionID string `json:"vpn_connection_id"`
|
||||
TunnelID string `json:"tunnel_id,omitempty"`
|
||||
PoolID string `json:"pool_id,omitempty"`
|
||||
ServiceID string `json:"service_id,omitempty"`
|
||||
LocalServiceID string `json:"local_service_id,omitempty"`
|
||||
RemoteServiceID string `json:"remote_service_id,omitempty"`
|
||||
ServiceKind string `json:"service_kind,omitempty"`
|
||||
ServiceClass string `json:"service_class,omitempty"`
|
||||
ServiceRole string `json:"service_role,omitempty"`
|
||||
RouteLeaseID string `json:"route_lease_id,omitempty"`
|
||||
RouteGeneration string `json:"route_generation,omitempty"`
|
||||
DataPlane string `json:"data_plane,omitempty"`
|
||||
TransportOwner string `json:"transport_owner,omitempty"`
|
||||
RouteVisibility string `json:"route_visibility,omitempty"`
|
||||
TrafficClasses []string `json:"traffic_classes,omitempty"`
|
||||
StreamShards int `json:"stream_shards,omitempty"`
|
||||
Direction string `json:"direction"`
|
||||
Packets [][]byte `json:"packets"`
|
||||
SentAt time.Time `json:"sent_at"`
|
||||
|
||||
Reference in New Issue
Block a user