Initial project snapshot
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
BaseURL string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
func NewClient(baseURL string) Client {
|
||||
return Client{
|
||||
BaseURL: baseURL,
|
||||
HTTPClient: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) SendHealth(ctx context.Context, message HealthMessage) (HealthAck, error) {
|
||||
payload, err := json.Marshal(message)
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/health", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return HealthAck{}, fmt.Errorf("mesh health rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var ack HealthAck
|
||||
if err := json.NewDecoder(resp.Body).Decode(&ack); err != nil {
|
||||
return HealthAck{}, err
|
||||
}
|
||||
return ack, nil
|
||||
}
|
||||
|
||||
func (c Client) SendSynthetic(ctx context.Context, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/synthetic/probe", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return SyntheticEnvelope{}, fmt.Errorf("mesh synthetic probe rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var ack SyntheticEnvelope
|
||||
if err := json.NewDecoder(resp.Body).Decode(&ack); err != nil {
|
||||
return SyntheticEnvelope{}, err
|
||||
}
|
||||
return ack, nil
|
||||
}
|
||||
|
||||
func (c Client) SendProduction(ctx context.Context, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/mesh/v1/forward", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = http.DefaultClient
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return ProductionForwardResult{}, fmt.Errorf("mesh production forward rejected with status %d", resp.StatusCode)
|
||||
}
|
||||
var result ProductionForwardResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return ProductionForwardResult{}, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
const ProtocolVersion = "mesh-control-v1"
|
||||
|
||||
var (
|
||||
ErrClusterMismatch = errors.New("mesh peer cluster mismatch")
|
||||
ErrNodeMismatch = errors.New("mesh peer node mismatch")
|
||||
ErrForwardDisabled = errors.New("production payload forwarding is disabled by mesh production gate")
|
||||
ErrForwardRuntimeUnavailable = errors.New("production mesh forwarding runtime is unavailable for this route or stage")
|
||||
ErrForwardPeerUnavailable = errors.New("production mesh next peer is unavailable")
|
||||
ErrForwardEnvelopeInvalid = errors.New("production mesh envelope is invalid")
|
||||
ErrForwardObservationFailed = errors.New("production mesh envelope observation failed")
|
||||
ErrMeshRuntimeDisabled = errors.New("mesh synthetic runtime is disabled")
|
||||
ErrUnsupportedSyntheticMessage = errors.New("unsupported synthetic mesh message")
|
||||
ErrRouteIDRequired = errors.New("mesh synthetic route id is required")
|
||||
ErrRouteNotFound = errors.New("mesh synthetic route not found")
|
||||
ErrInvalidRoutePath = errors.New("mesh synthetic route path is invalid")
|
||||
ErrRouteExpired = errors.New("mesh synthetic route is expired")
|
||||
ErrTTLExhausted = errors.New("mesh synthetic route ttl exhausted")
|
||||
ErrLoopDetected = errors.New("mesh synthetic route loop detected")
|
||||
ErrUnauthorizedChannel = errors.New("mesh synthetic channel is not authorized")
|
||||
ErrSyntheticPeerUnavailable = errors.New("mesh synthetic next peer is unavailable")
|
||||
ErrNoHealthySyntheticRoute = errors.New("mesh synthetic no healthy route available")
|
||||
ErrSyntheticRelayQueueFull = errors.New("mesh synthetic relay queue is full")
|
||||
ErrSyntheticRelayQueueEmpty = errors.New("mesh synthetic relay queue is empty")
|
||||
ErrSyntheticPayloadTooLarge = errors.New("mesh synthetic payload is too large")
|
||||
ErrSyntheticOrganizationMismatch = errors.New("mesh synthetic organization mismatch")
|
||||
ErrUnsupportedSyntheticService = errors.New("unsupported synthetic test service")
|
||||
ErrSyntheticRequestInvalid = errors.New("mesh synthetic request is invalid")
|
||||
)
|
||||
|
||||
const (
|
||||
SyntheticMessageProbe = "fabric.probe"
|
||||
SyntheticMessageProbeAck = "fabric.probe_ack"
|
||||
SyntheticMessageRouteHealth = "fabric.route_health"
|
||||
SyntheticMessageRouteHealthAck = "fabric.route_health_ack"
|
||||
SyntheticMessageTelemetry = "fabric.telemetry"
|
||||
SyntheticMessageTestService = "fabric.test_service"
|
||||
SyntheticMessageTestServiceAck = "fabric.test_service_ack"
|
||||
|
||||
SyntheticTestServiceType = "synthetic.echo"
|
||||
SyntheticDefaultTestOrganizationID = "org-test"
|
||||
SyntheticDefaultMaxTestPayloadBytes = 4096
|
||||
|
||||
SyntheticChannelFabricControl = "fabric_control"
|
||||
SyntheticChannelRouteControl = "route_control"
|
||||
SyntheticChannelTelemetry = "telemetry"
|
||||
|
||||
SyntheticRouteStateUnknown = "unknown"
|
||||
SyntheticRouteStateHealthy = "healthy"
|
||||
SyntheticRouteStateDegraded = "degraded"
|
||||
SyntheticRouteStateFailed = "failed"
|
||||
|
||||
ProductionChannelFabricControl = "fabric_control"
|
||||
ProductionMessageFabricControl = "fabric.control"
|
||||
MaxProductionEnvelopePayloadBytes = 4096
|
||||
MaxProductionEnvelopeFutureSkew = time.Minute
|
||||
)
|
||||
|
||||
type PeerIdentity struct {
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
}
|
||||
|
||||
type SyntheticRoute struct {
|
||||
RouteID string `json:"route_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
SourceNodeID string `json:"source_node_id"`
|
||||
DestinationNodeID string `json:"destination_node_id"`
|
||||
Hops []string `json:"hops"`
|
||||
AllowedChannels []string `json:"allowed_channels"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
MaxTTL int `json:"max_ttl"`
|
||||
MaxHops int `json:"max_hops"`
|
||||
RouteVersion string `json:"route_version,omitempty"`
|
||||
PolicyVersion string `json:"policy_version,omitempty"`
|
||||
PeerDirectoryVersion string `json:"peer_directory_version,omitempty"`
|
||||
}
|
||||
|
||||
type SyntheticEnvelope struct {
|
||||
ProtocolVersion string `json:"protocol_version"`
|
||||
RouteID string `json:"route_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
From PeerIdentity `json:"from"`
|
||||
To PeerIdentity `json:"to"`
|
||||
Channel string `json:"channel"`
|
||||
MessageType string `json:"message_type"`
|
||||
TTL int `json:"ttl"`
|
||||
HopCount int `json:"hop_count"`
|
||||
Visited []string `json:"visited"`
|
||||
Sequence uint64 `json:"sequence"`
|
||||
SentAt time.Time `json:"sent_at"`
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
}
|
||||
|
||||
type SyntheticProbePayload struct {
|
||||
ProbeID string `json:"probe_id"`
|
||||
SentAt time.Time `json:"sent_at"`
|
||||
}
|
||||
|
||||
type SyntheticProbeAckPayload struct {
|
||||
ProbeID string `json:"probe_id"`
|
||||
Path []string `json:"path"`
|
||||
AcceptedAt time.Time `json:"accepted_at"`
|
||||
}
|
||||
|
||||
type SyntheticRouteObservation struct {
|
||||
RouteID string `json:"route_id"`
|
||||
State string `json:"state"`
|
||||
LastSuccessAt time.Time `json:"last_success_at,omitempty"`
|
||||
LastFailureAt time.Time `json:"last_failure_at,omitempty"`
|
||||
LastFailureReason string `json:"last_failure_reason,omitempty"`
|
||||
SuccessCount uint64 `json:"success_count"`
|
||||
FailureCount uint64 `json:"failure_count"`
|
||||
LastLatencyMs int64 `json:"last_latency_ms,omitempty"`
|
||||
RouteVersion string `json:"route_version,omitempty"`
|
||||
PolicyVersion string `json:"policy_version,omitempty"`
|
||||
PeerDirectoryVersion string `json:"peer_directory_version,omitempty"`
|
||||
}
|
||||
|
||||
type SyntheticRouteHealthResult struct {
|
||||
RequestedRouteID string `json:"requested_route_id"`
|
||||
SelectedRouteID string `json:"selected_route_id"`
|
||||
FallbackUsed bool `json:"fallback_used"`
|
||||
Ack SyntheticEnvelope `json:"ack"`
|
||||
Observation SyntheticRouteObservation `json:"observation"`
|
||||
}
|
||||
|
||||
type SyntheticTestServiceRequest struct {
|
||||
RequestID string `json:"request_id"`
|
||||
OrganizationID string `json:"organization_id"`
|
||||
ServiceType string `json:"service_type"`
|
||||
Payload string `json:"payload"`
|
||||
SentAt time.Time `json:"sent_at"`
|
||||
}
|
||||
|
||||
type SyntheticTestServiceResponse struct {
|
||||
RequestID string `json:"request_id"`
|
||||
OrganizationID string `json:"organization_id"`
|
||||
ServiceType string `json:"service_type"`
|
||||
EchoPayload string `json:"echo_payload"`
|
||||
Path []string `json:"path"`
|
||||
AcceptedAt time.Time `json:"accepted_at"`
|
||||
}
|
||||
|
||||
type SyntheticTestServiceResult struct {
|
||||
RequestedRouteID string `json:"requested_route_id"`
|
||||
SelectedRouteID string `json:"selected_route_id"`
|
||||
FallbackUsed bool `json:"fallback_used"`
|
||||
Ack SyntheticEnvelope `json:"ack"`
|
||||
Response SyntheticTestServiceResponse `json:"response"`
|
||||
Observation SyntheticRouteObservation `json:"observation"`
|
||||
}
|
||||
|
||||
type SyntheticRouteCacheVersion struct {
|
||||
RouteVersion string `json:"route_version,omitempty"`
|
||||
PolicyVersion string `json:"policy_version,omitempty"`
|
||||
PeerDirectoryVersion string `json:"peer_directory_version,omitempty"`
|
||||
}
|
||||
|
||||
type SyntheticRelayQueuePolicy struct {
|
||||
Channel string `json:"channel"`
|
||||
Capacity int `json:"capacity"`
|
||||
Droppable bool `json:"droppable"`
|
||||
}
|
||||
|
||||
type SyntheticRelayEnqueueResult struct {
|
||||
Channel string `json:"channel"`
|
||||
QueueDepth int `json:"queue_depth"`
|
||||
QueueCapacity int `json:"queue_capacity"`
|
||||
Dropped bool `json:"dropped"`
|
||||
DroppedSequence uint64 `json:"dropped_sequence,omitempty"`
|
||||
AcceptedSequence uint64 `json:"accepted_sequence"`
|
||||
}
|
||||
|
||||
type SyntheticRelayQueueMetrics struct {
|
||||
Enqueued uint64 `json:"enqueued"`
|
||||
Dequeued uint64 `json:"dequeued"`
|
||||
Dropped uint64 `json:"dropped"`
|
||||
Rejected uint64 `json:"rejected"`
|
||||
LastRejectReason string `json:"last_reject_reason,omitempty"`
|
||||
QueueDepths map[string]int `json:"queue_depths"`
|
||||
}
|
||||
|
||||
type HealthMessage struct {
|
||||
ProtocolVersion string `json:"protocol_version"`
|
||||
From PeerIdentity `json:"from"`
|
||||
To PeerIdentity `json:"to"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
LatencyMs *int `json:"latency_ms,omitempty"`
|
||||
QualityScore *int `json:"quality_score,omitempty"`
|
||||
}
|
||||
|
||||
type HealthAck struct {
|
||||
ProtocolVersion string `json:"protocol_version"`
|
||||
Accepted bool `json:"accepted"`
|
||||
By PeerIdentity `json:"by"`
|
||||
}
|
||||
|
||||
type ProductionEnvelope struct {
|
||||
FabricProtocolVersion string `json:"fabric_protocol_version"`
|
||||
MessageID string `json:"message_id"`
|
||||
RouteID string `json:"route_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
SourceNodeID string `json:"source_node_id"`
|
||||
DestinationNodeID string `json:"destination_node_id"`
|
||||
CurrentHopNodeID string `json:"current_hop_node_id"`
|
||||
NextHopNodeID string `json:"next_hop_node_id"`
|
||||
RoutePath []string `json:"route_path,omitempty"`
|
||||
VisitedNodeIDs []string `json:"visited_node_ids,omitempty"`
|
||||
ChannelClass string `json:"channel_class"`
|
||||
MessageType string `json:"message_type"`
|
||||
TTL int `json:"ttl"`
|
||||
HopCount int `json:"hop_count"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
PayloadLength int `json:"payload_length"`
|
||||
PayloadHash string `json:"payload_hash"`
|
||||
Payload json.RawMessage `json:"payload,omitempty"`
|
||||
}
|
||||
|
||||
type ProductionEnvelopeObservation struct {
|
||||
MessageID string `json:"message_id"`
|
||||
RouteID string `json:"route_id"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
SourceNodeID string `json:"source_node_id"`
|
||||
DestinationNodeID string `json:"destination_node_id"`
|
||||
CurrentHopNodeID string `json:"current_hop_node_id"`
|
||||
NextHopNodeID string `json:"next_hop_node_id"`
|
||||
RoutePath []string `json:"route_path,omitempty"`
|
||||
VisitedNodeIDs []string `json:"visited_node_ids,omitempty"`
|
||||
ChannelClass string `json:"channel_class"`
|
||||
MessageType string `json:"message_type"`
|
||||
TTL int `json:"ttl"`
|
||||
HopCount int `json:"hop_count"`
|
||||
PayloadLength int `json:"payload_length"`
|
||||
PayloadHash string `json:"payload_hash"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
}
|
||||
|
||||
type ProductionForwardResult struct {
|
||||
Accepted bool `json:"accepted"`
|
||||
Delivered bool `json:"delivered"`
|
||||
Forwarded bool `json:"forwarded"`
|
||||
By PeerIdentity `json:"by"`
|
||||
MessageID string `json:"message_id"`
|
||||
RouteID string `json:"route_id"`
|
||||
NextNodeID string `json:"next_node_id,omitempty"`
|
||||
}
|
||||
|
||||
type ProductionForwardLogEntry struct {
|
||||
Event string `json:"event"`
|
||||
RouteID string `json:"route_id,omitempty"`
|
||||
MessageID string `json:"message_id,omitempty"`
|
||||
ClusterID string `json:"cluster_id,omitempty"`
|
||||
LocalNodeID string `json:"local_node_id,omitempty"`
|
||||
SourceNodeID string `json:"source_node_id,omitempty"`
|
||||
DestinationNodeID string `json:"destination_node_id,omitempty"`
|
||||
CurrentHopNodeID string `json:"current_hop_node_id,omitempty"`
|
||||
NextHopNodeID string `json:"next_hop_node_id,omitempty"`
|
||||
ChannelClass string `json:"channel_class,omitempty"`
|
||||
MessageType string `json:"message_type,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
StatusCode int `json:"status_code,omitempty"`
|
||||
TTL int `json:"ttl,omitempty"`
|
||||
HopCount int `json:"hop_count,omitempty"`
|
||||
RoutePathLength int `json:"route_path_length,omitempty"`
|
||||
VisitedCount int `json:"visited_count,omitempty"`
|
||||
PayloadLength int `json:"payload_length,omitempty"`
|
||||
OccurredAt time.Time `json:"occurred_at"`
|
||||
}
|
||||
|
||||
func ValidatePeer(local PeerIdentity, remote PeerIdentity) error {
|
||||
if local.ClusterID == "" || remote.ClusterID == "" || local.ClusterID != remote.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if remote.NodeID == "" {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,258 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type EndpointCandidateScoreOptions struct {
|
||||
ChannelClass string
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
MaxVerificationAge time.Duration
|
||||
Observations map[string]EndpointCandidateHealthObservation
|
||||
MaxObservationAge time.Duration
|
||||
}
|
||||
|
||||
type EndpointCandidateHealthObservation struct {
|
||||
EndpointID string `json:"endpoint_id"`
|
||||
LastLatencyMs int64 `json:"last_latency_ms,omitempty"`
|
||||
SuccessCount uint64 `json:"success_count,omitempty"`
|
||||
FailureCount uint64 `json:"failure_count,omitempty"`
|
||||
LastFailureReason string `json:"last_failure_reason,omitempty"`
|
||||
ReliabilityScore int `json:"reliability_score,omitempty"`
|
||||
ObservedAt time.Time `json:"observed_at,omitempty"`
|
||||
}
|
||||
|
||||
type ScoredPeerEndpointCandidate struct {
|
||||
Candidate PeerEndpointCandidate `json:"candidate"`
|
||||
Score int `json:"score"`
|
||||
Reasons []string `json:"reasons,omitempty"`
|
||||
}
|
||||
|
||||
func RankPeerEndpointCandidates(candidates []PeerEndpointCandidate, opts EndpointCandidateScoreOptions) []ScoredPeerEndpointCandidate {
|
||||
if len(candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]ScoredPeerEndpointCandidate, 0, len(candidates))
|
||||
for _, candidate := range candidates {
|
||||
out = append(out, scorePeerEndpointCandidate(candidate, opts))
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
if out[i].Score != out[j].Score {
|
||||
return out[i].Score > out[j].Score
|
||||
}
|
||||
if out[i].Candidate.Priority != out[j].Candidate.Priority {
|
||||
return out[i].Candidate.Priority < out[j].Candidate.Priority
|
||||
}
|
||||
if out[i].Candidate.NodeID != out[j].Candidate.NodeID {
|
||||
return out[i].Candidate.NodeID < out[j].Candidate.NodeID
|
||||
}
|
||||
return out[i].Candidate.EndpointID < out[j].Candidate.EndpointID
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
func scorePeerEndpointCandidate(candidate PeerEndpointCandidate, opts EndpointCandidateScoreOptions) ScoredPeerEndpointCandidate {
|
||||
score := 100
|
||||
reasons := []string{"base"}
|
||||
|
||||
switch candidate.Transport {
|
||||
case "direct_tcp_tls":
|
||||
score += 35
|
||||
reasons = append(reasons, "transport:direct_tcp_tls")
|
||||
case "wss":
|
||||
score += 25
|
||||
reasons = append(reasons, "transport:wss")
|
||||
case "outbound_reverse":
|
||||
score += 10
|
||||
reasons = append(reasons, "transport:outbound_reverse")
|
||||
case "relay":
|
||||
score += 5
|
||||
reasons = append(reasons, "transport:relay")
|
||||
default:
|
||||
score -= 100
|
||||
reasons = append(reasons, "transport:unknown")
|
||||
}
|
||||
|
||||
switch candidate.Reachability {
|
||||
case "public":
|
||||
score += 30
|
||||
reasons = append(reasons, "reachability:public")
|
||||
case "private":
|
||||
score += 15
|
||||
reasons = append(reasons, "reachability:private")
|
||||
case "relay":
|
||||
score += 5
|
||||
reasons = append(reasons, "reachability:relay")
|
||||
case "outbound_only":
|
||||
score -= 5
|
||||
reasons = append(reasons, "reachability:outbound_only")
|
||||
default:
|
||||
score -= 15
|
||||
reasons = append(reasons, "reachability:unknown")
|
||||
}
|
||||
|
||||
switch candidate.ConnectivityMode {
|
||||
case "direct":
|
||||
score += 30
|
||||
reasons = append(reasons, "connectivity:direct")
|
||||
case "outbound_only":
|
||||
score += 5
|
||||
reasons = append(reasons, "connectivity:outbound_only")
|
||||
case "relay_required":
|
||||
score -= 5
|
||||
reasons = append(reasons, "connectivity:relay_required")
|
||||
default:
|
||||
score -= 10
|
||||
reasons = append(reasons, "connectivity:unknown")
|
||||
}
|
||||
|
||||
switch candidate.NATType {
|
||||
case "", "none":
|
||||
score += 15
|
||||
reasons = append(reasons, "nat:none")
|
||||
case "full_cone":
|
||||
score += 10
|
||||
reasons = append(reasons, "nat:full_cone")
|
||||
case "restricted", "port_restricted":
|
||||
score += 3
|
||||
reasons = append(reasons, "nat:restricted")
|
||||
case "symmetric":
|
||||
score -= 20
|
||||
reasons = append(reasons, "nat:symmetric")
|
||||
case "blocked":
|
||||
score -= 60
|
||||
reasons = append(reasons, "nat:blocked")
|
||||
default:
|
||||
score -= 8
|
||||
reasons = append(reasons, "nat:unknown")
|
||||
}
|
||||
|
||||
if candidate.Priority > 0 {
|
||||
score -= candidate.Priority
|
||||
reasons = append(reasons, "priority")
|
||||
}
|
||||
if opts.PreferredRegion != "" && candidate.Region != "" {
|
||||
if strings.EqualFold(candidate.Region, opts.PreferredRegion) {
|
||||
score += 12
|
||||
reasons = append(reasons, "region:preferred")
|
||||
} else {
|
||||
score -= 4
|
||||
reasons = append(reasons, "region:remote")
|
||||
}
|
||||
}
|
||||
if hasPolicyTag(candidate.PolicyTags, "fast-path") {
|
||||
score += 10
|
||||
reasons = append(reasons, "policy:fast-path")
|
||||
}
|
||||
if hasPolicyTag(candidate.PolicyTags, "private-lan") || hasPolicyTag(candidate.PolicyTags, "corp-lan") || hasPolicyTag(candidate.PolicyTags, "same-site") {
|
||||
score += 18
|
||||
reasons = append(reasons, "policy:private-lan")
|
||||
}
|
||||
if hasPolicyTag(candidate.PolicyTags, "costly") {
|
||||
score -= 10
|
||||
reasons = append(reasons, "policy:costly")
|
||||
}
|
||||
if opts.ChannelClass == SyntheticChannelFabricControl || opts.ChannelClass == SyntheticChannelRouteControl {
|
||||
if candidate.ConnectivityMode == "direct" {
|
||||
score += 8
|
||||
reasons = append(reasons, "channel:control-direct")
|
||||
}
|
||||
if candidate.Transport == "relay" {
|
||||
score -= 8
|
||||
reasons = append(reasons, "channel:control-relay-penalty")
|
||||
}
|
||||
}
|
||||
if !opts.Now.IsZero() && candidate.LastVerifiedAt != nil && opts.MaxVerificationAge > 0 {
|
||||
age := opts.Now.Sub(candidate.LastVerifiedAt.UTC())
|
||||
if age >= 0 && age <= opts.MaxVerificationAge {
|
||||
score += 8
|
||||
reasons = append(reasons, "verified:fresh")
|
||||
} else {
|
||||
score -= 12
|
||||
reasons = append(reasons, "verified:stale")
|
||||
}
|
||||
}
|
||||
if observation, ok := opts.Observations[candidate.EndpointID]; ok {
|
||||
observationScore, observationReasons := scoreEndpointCandidateObservation(observation, opts)
|
||||
score += observationScore
|
||||
reasons = append(reasons, observationReasons...)
|
||||
}
|
||||
|
||||
return ScoredPeerEndpointCandidate{
|
||||
Candidate: candidate,
|
||||
Score: score,
|
||||
Reasons: reasons,
|
||||
}
|
||||
}
|
||||
|
||||
func scoreEndpointCandidateObservation(observation EndpointCandidateHealthObservation, opts EndpointCandidateScoreOptions) (int, []string) {
|
||||
score := 0
|
||||
reasons := []string{"observation:present"}
|
||||
if !opts.Now.IsZero() && !observation.ObservedAt.IsZero() && opts.MaxObservationAge > 0 {
|
||||
age := opts.Now.Sub(observation.ObservedAt.UTC())
|
||||
if age < 0 || age > opts.MaxObservationAge {
|
||||
return -12, []string{"observation:stale"}
|
||||
}
|
||||
score += 6
|
||||
reasons = append(reasons, "observation:fresh")
|
||||
}
|
||||
switch {
|
||||
case observation.LastLatencyMs > 0 && observation.LastLatencyMs <= 50:
|
||||
score += 18
|
||||
reasons = append(reasons, "latency:low")
|
||||
case observation.LastLatencyMs <= 150:
|
||||
score += 8
|
||||
reasons = append(reasons, "latency:moderate")
|
||||
case observation.LastLatencyMs > 0:
|
||||
score -= 10
|
||||
reasons = append(reasons, "latency:high")
|
||||
}
|
||||
if observation.ReliabilityScore > 0 {
|
||||
switch {
|
||||
case observation.ReliabilityScore >= 90:
|
||||
score += 15
|
||||
reasons = append(reasons, "reliability:high")
|
||||
case observation.ReliabilityScore >= 70:
|
||||
score += 5
|
||||
reasons = append(reasons, "reliability:moderate")
|
||||
default:
|
||||
score -= 12
|
||||
reasons = append(reasons, "reliability:low")
|
||||
}
|
||||
}
|
||||
if observation.SuccessCount > 0 {
|
||||
score += boundedInt(int(observation.SuccessCount), 1, 10)
|
||||
reasons = append(reasons, "history:success")
|
||||
}
|
||||
if observation.FailureCount > 0 {
|
||||
score -= boundedInt(int(observation.FailureCount)*6, 6, 30)
|
||||
reasons = append(reasons, "history:failure")
|
||||
}
|
||||
if strings.TrimSpace(observation.LastFailureReason) != "" {
|
||||
score -= 8
|
||||
reasons = append(reasons, "failure:recent")
|
||||
}
|
||||
return score, reasons
|
||||
}
|
||||
|
||||
func hasPolicyTag(tags []string, needle string) bool {
|
||||
for _, tag := range tags {
|
||||
if strings.EqualFold(strings.TrimSpace(tag), needle) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func boundedInt(value, minValue, maxValue int) int {
|
||||
if value < minValue {
|
||||
return minValue
|
||||
}
|
||||
if value > maxValue {
|
||||
return maxValue
|
||||
}
|
||||
return value
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRankPeerEndpointCandidatesPrefersDirectFreshPublicPath(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
fresh := now.Add(-time.Minute)
|
||||
stale := now.Add(-2 * time.Hour)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "relay.example.test/node-b",
|
||||
Reachability: "relay",
|
||||
NATType: "symmetric",
|
||||
ConnectivityMode: "relay_required",
|
||||
Region: "us",
|
||||
Priority: 1,
|
||||
LastVerifiedAt: &fresh,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "eu",
|
||||
Priority: 10,
|
||||
PolicyTags: []string{"fast-path"},
|
||||
LastVerifiedAt: &fresh,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-private-stale",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "10.0.0.5:443",
|
||||
Reachability: "private",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "eu",
|
||||
Priority: 5,
|
||||
LastVerifiedAt: &stale,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
ChannelClass: SyntheticChannelFabricControl,
|
||||
PreferredRegion: "eu",
|
||||
Now: now,
|
||||
MaxVerificationAge: time.Hour,
|
||||
})
|
||||
if len(ranked) != 3 {
|
||||
t.Fatalf("ranked length = %d, want 3", len(ranked))
|
||||
}
|
||||
if ranked[0].Candidate.EndpointID != "node-b-public" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-public: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if ranked[0].Score <= ranked[1].Score {
|
||||
t.Fatalf("top score = %d, second = %d", ranked[0].Score, ranked[1].Score)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "policy:fast-path") || !containsReason(ranked[0].Reasons, "verified:fresh") {
|
||||
t.Fatalf("top reasons missing expected hints: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesUsesDeterministicTieBreak(t *testing.T) {
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "endpoint-b",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.21:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "endpoint-a",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{})
|
||||
if ranked[0].Candidate.EndpointID != "endpoint-a" {
|
||||
t.Fatalf("tie top endpoint = %q, want endpoint-a", ranked[0].Candidate.EndpointID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesPrefersCorporatePrivateEndpoint(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "corp-eu",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-corp-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "10.24.10.20:19001",
|
||||
Reachability: "private",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "corp-eu",
|
||||
Priority: 1,
|
||||
PolicyTags: []string{"corp-lan", "same-site"},
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
ChannelClass: SyntheticChannelFabricControl,
|
||||
PreferredRegion: "corp-eu",
|
||||
Now: now,
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-corp-lan" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-corp-lan: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "policy:private-lan") || !containsReason(ranked[0].Reasons, "region:preferred") {
|
||||
t.Fatalf("corp LAN reasons missing: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesDoesNotDropRelayRequiredFallback(t *testing.T) {
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-outbound",
|
||||
NodeID: "node-b",
|
||||
Transport: "outbound_reverse",
|
||||
Address: "node-b.reverse.local",
|
||||
Reachability: "outbound_only",
|
||||
NATType: "symmetric",
|
||||
ConnectivityMode: "outbound_only",
|
||||
Priority: 20,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "relay.example.test/node-b",
|
||||
Reachability: "relay",
|
||||
NATType: "blocked",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 30,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
ChannelClass: SyntheticChannelRouteControl,
|
||||
})
|
||||
if len(ranked) != 2 {
|
||||
t.Fatalf("ranked length = %d, want 2", len(ranked))
|
||||
}
|
||||
for _, item := range ranked {
|
||||
if item.Candidate.EndpointID == "" {
|
||||
t.Fatalf("ranked candidate lost identity: %+v", item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesUsesHealthObservationOverlay(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 13, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-wss",
|
||||
NodeID: "node-b",
|
||||
Transport: "wss",
|
||||
Address: "node-b.example.test",
|
||||
Reachability: "public",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
Now: now,
|
||||
MaxObservationAge: 5 * time.Minute,
|
||||
Observations: map[string]EndpointCandidateHealthObservation{
|
||||
"node-b-direct": {
|
||||
EndpointID: "node-b-direct",
|
||||
LastLatencyMs: 240,
|
||||
FailureCount: 3,
|
||||
LastFailureReason: "connect_timeout",
|
||||
ReliabilityScore: 50,
|
||||
ObservedAt: now.Add(-time.Minute),
|
||||
},
|
||||
"node-b-wss": {
|
||||
EndpointID: "node-b-wss",
|
||||
LastLatencyMs: 35,
|
||||
SuccessCount: 8,
|
||||
ReliabilityScore: 95,
|
||||
ObservedAt: now.Add(-time.Minute),
|
||||
},
|
||||
},
|
||||
})
|
||||
if ranked[0].Candidate.EndpointID != "node-b-wss" {
|
||||
t.Fatalf("top endpoint = %q, want node-b-wss: %+v", ranked[0].Candidate.EndpointID, ranked)
|
||||
}
|
||||
if !containsReason(ranked[0].Reasons, "latency:low") || !containsReason(ranked[0].Reasons, "reliability:high") {
|
||||
t.Fatalf("top reasons missing health hints: %+v", ranked[0].Reasons)
|
||||
}
|
||||
if !containsReason(ranked[1].Reasons, "history:failure") || !containsReason(ranked[1].Reasons, "failure:recent") {
|
||||
t.Fatalf("failed endpoint reasons missing failure hints: %+v", ranked[1].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankPeerEndpointCandidatesTreatsStaleObservationAsPenalty(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 13, 0, 0, 0, time.UTC)
|
||||
candidates := []PeerEndpointCandidate{
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
}
|
||||
|
||||
ranked := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
Now: now,
|
||||
MaxObservationAge: 5 * time.Minute,
|
||||
Observations: map[string]EndpointCandidateHealthObservation{
|
||||
"node-b-direct": {
|
||||
EndpointID: "node-b-direct",
|
||||
LastLatencyMs: 20,
|
||||
ObservedAt: now.Add(-time.Hour),
|
||||
},
|
||||
},
|
||||
})
|
||||
if !containsReason(ranked[0].Reasons, "observation:stale") {
|
||||
t.Fatalf("reasons missing stale observation: %+v", ranked[0].Reasons)
|
||||
}
|
||||
if containsReason(ranked[0].Reasons, "latency:low") {
|
||||
t.Fatalf("stale observation should not contribute latency: %+v", ranked[0].Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func containsReason(reasons []string, reason string) bool {
|
||||
for _, item := range reasons {
|
||||
if item == reason {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// HTTPPeerTransport sends synthetic mesh envelopes to explicitly configured
|
||||
// peer endpoints. It is intentionally narrow: production forwarding remains
|
||||
// disabled and only SyntheticRuntime messages use this transport.
|
||||
type HTTPPeerTransport struct {
|
||||
PeerURLs map[string]string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
func NewHTTPPeerTransport(peerURLs map[string]string) *HTTPPeerTransport {
|
||||
normalized := make(map[string]string, len(peerURLs))
|
||||
for nodeID, baseURL := range peerURLs {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
||||
if nodeID != "" && baseURL != "" {
|
||||
normalized[nodeID] = baseURL
|
||||
}
|
||||
}
|
||||
return &HTTPPeerTransport{PeerURLs: normalized}
|
||||
}
|
||||
|
||||
func (t *HTTPPeerTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
if t == nil {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
||||
if baseURL == "" {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
client := NewClient(baseURL)
|
||||
if t.HTTPClient != nil {
|
||||
client.HTTPClient = t.HTTPClient
|
||||
}
|
||||
return client.SendSynthetic(ctx, envelope)
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHTTPPeerTransportDirectSyntheticProbe(t *testing.T) {
|
||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
defer nodeA.Close()
|
||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
||||
defer nodeB.Close()
|
||||
|
||||
route := liveSyntheticRoute("route-direct", []string{"node-a", "node-b"})
|
||||
routes := []SyntheticRoute{route}
|
||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-b": nodeB.URL})
|
||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
||||
|
||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-direct")
|
||||
if err != nil {
|
||||
t.Fatalf("send live direct probe: %v", err)
|
||||
}
|
||||
if ack.MessageType != SyntheticMessageProbeAck {
|
||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if got, want := payload.Path, []string{"node-a", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("path = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPPeerTransportSingleRelaySyntheticProbe(t *testing.T) {
|
||||
nodeA := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
defer nodeA.Close()
|
||||
nodeR := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"})
|
||||
defer nodeR.Close()
|
||||
nodeB := newLiveSyntheticNode(t, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"})
|
||||
defer nodeB.Close()
|
||||
|
||||
route := liveSyntheticRoute("route-relay", []string{"node-a", "node-r", "node-b"})
|
||||
routes := []SyntheticRoute{route}
|
||||
nodeA.Runtime = newLiveRuntime(nodeA.Local, routes, map[string]string{"node-r": nodeR.URL})
|
||||
nodeR.Runtime = newLiveRuntime(nodeR.Local, routes, map[string]string{"node-b": nodeB.URL})
|
||||
nodeB.Runtime = newLiveRuntime(nodeB.Local, routes, map[string]string{})
|
||||
|
||||
ack, err := nodeA.Runtime.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-live-relay")
|
||||
if err != nil {
|
||||
t.Fatalf("send live relay probe: %v", err)
|
||||
}
|
||||
if ack.MessageType != SyntheticMessageProbeAck {
|
||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if got, want := payload.Path, []string{"node-a", "node-r", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("path = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPPeerTransportMissingPeer(t *testing.T) {
|
||||
transport := NewHTTPPeerTransport(map[string]string{})
|
||||
_, err := transport.SendSynthetic(context.Background(), "node-missing", SyntheticEnvelope{})
|
||||
if !errors.Is(err, ErrSyntheticPeerUnavailable) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticPeerUnavailable", err)
|
||||
}
|
||||
}
|
||||
|
||||
type liveSyntheticNode struct {
|
||||
Local PeerIdentity
|
||||
Runtime *SyntheticRuntime
|
||||
URL string
|
||||
server *httptest.Server
|
||||
}
|
||||
|
||||
func newLiveSyntheticNode(t *testing.T, local PeerIdentity) *liveSyntheticNode {
|
||||
t.Helper()
|
||||
node := &liveSyntheticNode{Local: local}
|
||||
node.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
Server{Local: node.Local, SyntheticRuntime: node.Runtime}.Handler().ServeHTTP(w, r)
|
||||
}))
|
||||
node.URL = node.server.URL
|
||||
return node
|
||||
}
|
||||
|
||||
func (n *liveSyntheticNode) Close() {
|
||||
if n.server != nil {
|
||||
n.server.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func newLiveRuntime(local PeerIdentity, routes []SyntheticRoute, peers map[string]string) *SyntheticRuntime {
|
||||
return NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: local,
|
||||
Routes: routes,
|
||||
Transport: NewHTTPPeerTransport(peers),
|
||||
})
|
||||
}
|
||||
|
||||
func liveSyntheticRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: hops,
|
||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
RouteVersion: "route-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
}
|
||||
}
|
||||
|
||||
func sameStrings(left, right []string) bool {
|
||||
if len(left) != len(right) {
|
||||
return false
|
||||
}
|
||||
for i := range left {
|
||||
if left[i] != right[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -0,0 +1,374 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DefaultWarmPeerLimit = 8
|
||||
|
||||
type PeerCacheConfig struct {
|
||||
Local PeerIdentity
|
||||
PeerEndpoints map[string]string
|
||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate
|
||||
PeerDirectory []PeerDirectoryEntry
|
||||
RecoverySeeds []PeerRecoverySeed
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
Routes []SyntheticRoute
|
||||
WarmPeerLimit int
|
||||
PreferredRegion string
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type PeerCache struct {
|
||||
snapshot PeerCacheSnapshot
|
||||
}
|
||||
|
||||
type PeerCacheSnapshot struct {
|
||||
ClusterID string `json:"cluster_id"`
|
||||
LocalNodeID string `json:"local_node_id"`
|
||||
PeerCount int `json:"peer_count"`
|
||||
WarmPeerCount int `json:"warm_peer_count"`
|
||||
RecoverySeedCount int `json:"recovery_seed_count"`
|
||||
RendezvousLeaseCount int `json:"rendezvous_lease_count"`
|
||||
BuiltAt time.Time `json:"built_at"`
|
||||
Entries []PeerCacheEntry `json:"entries"`
|
||||
}
|
||||
|
||||
type PeerCacheEntry struct {
|
||||
NodeID string `json:"node_id"`
|
||||
RouteIDs []string `json:"route_ids,omitempty"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
EndpointCount int `json:"endpoint_count"`
|
||||
CandidateCount int `json:"candidate_count"`
|
||||
ConnectivityModes []string `json:"connectivity_modes,omitempty"`
|
||||
RecoverySeed bool `json:"recovery_seed"`
|
||||
Warm bool `json:"warm"`
|
||||
WarmReason string `json:"warm_reason,omitempty"`
|
||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||
BestCandidateAddr string `json:"best_candidate_addr,omitempty"`
|
||||
BestTransport string `json:"best_transport,omitempty"`
|
||||
BestReachability string `json:"best_reachability,omitempty"`
|
||||
BestConnectivity string `json:"best_connectivity,omitempty"`
|
||||
BestNATType string `json:"best_nat_type,omitempty"`
|
||||
BestPolicyTags []string `json:"best_policy_tags,omitempty"`
|
||||
BestCandidateScore int `json:"best_candidate_score,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
RelayControl bool `json:"relay_control"`
|
||||
}
|
||||
|
||||
type peerCacheBuildEntry struct {
|
||||
PeerCacheEntry
|
||||
adjacentRoutePeer bool
|
||||
bestScore int
|
||||
}
|
||||
|
||||
func NewPeerCache(cfg PeerCacheConfig) *PeerCache {
|
||||
now := cfg.Now.UTC()
|
||||
if now.IsZero() {
|
||||
now = time.Now().UTC()
|
||||
}
|
||||
limit := cfg.WarmPeerLimit
|
||||
if limit <= 0 {
|
||||
limit = DefaultWarmPeerLimit
|
||||
}
|
||||
entries := map[string]*peerCacheBuildEntry{}
|
||||
for _, item := range cfg.PeerDirectory {
|
||||
nodeID := strings.TrimSpace(item.NodeID)
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID {
|
||||
continue
|
||||
}
|
||||
entry := peerCacheEntry(entries, nodeID)
|
||||
entry.RouteIDs = mergeStrings(entry.RouteIDs, item.RouteIDs)
|
||||
entry.EndpointCount = maxInt(entry.EndpointCount, item.EndpointCount)
|
||||
entry.CandidateCount = maxInt(entry.CandidateCount, item.CandidateCount)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, item.ConnectivityModes)
|
||||
entry.RecoverySeed = entry.RecoverySeed || item.RecoverySeed
|
||||
}
|
||||
for nodeID, endpoint := range cfg.PeerEndpoints {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
endpoint = strings.TrimSpace(endpoint)
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID || endpoint == "" {
|
||||
continue
|
||||
}
|
||||
entry := peerCacheEntry(entries, nodeID)
|
||||
entry.Endpoint = endpoint
|
||||
entry.EndpointCount = maxInt(entry.EndpointCount, 1)
|
||||
}
|
||||
for nodeID, candidates := range cfg.PeerEndpointCandidates {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID || len(candidates) == 0 {
|
||||
continue
|
||||
}
|
||||
entry := peerCacheEntry(entries, nodeID)
|
||||
entry.CandidateCount = maxInt(entry.CandidateCount, len(candidates))
|
||||
for _, candidate := range candidates {
|
||||
if strings.TrimSpace(candidate.ConnectivityMode) != "" {
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{candidate.ConnectivityMode})
|
||||
}
|
||||
}
|
||||
scored := RankPeerEndpointCandidates(candidates, EndpointCandidateScoreOptions{
|
||||
ChannelClass: SyntheticChannelFabricControl,
|
||||
PreferredRegion: cfg.PreferredRegion,
|
||||
Now: now,
|
||||
MaxVerificationAge: time.Hour,
|
||||
})
|
||||
if len(scored) > 0 {
|
||||
entry.BestCandidateID = scored[0].Candidate.EndpointID
|
||||
entry.BestCandidateAddr = scored[0].Candidate.Address
|
||||
entry.BestTransport = scored[0].Candidate.Transport
|
||||
entry.BestReachability = scored[0].Candidate.Reachability
|
||||
entry.BestConnectivity = scored[0].Candidate.ConnectivityMode
|
||||
entry.BestNATType = scored[0].Candidate.NATType
|
||||
entry.BestPolicyTags = append([]string{}, scored[0].Candidate.PolicyTags...)
|
||||
entry.BestCandidateScore = scored[0].Score
|
||||
entry.bestScore = scored[0].Score
|
||||
if strings.TrimSpace(scored[0].Candidate.Address) != "" {
|
||||
entry.Endpoint = strings.TrimSpace(scored[0].Candidate.Address)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, route := range cfg.Routes {
|
||||
path := routePath(route)
|
||||
localIndex := indexOf(path, cfg.Local.NodeID)
|
||||
if localIndex < 0 {
|
||||
continue
|
||||
}
|
||||
for _, nodeID := range path {
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID {
|
||||
continue
|
||||
}
|
||||
entry := peerCacheEntry(entries, nodeID)
|
||||
entry.RouteIDs = mergeStrings(entry.RouteIDs, []string{route.RouteID})
|
||||
}
|
||||
for _, adjacentIndex := range []int{localIndex - 1, localIndex + 1} {
|
||||
if adjacentIndex < 0 || adjacentIndex >= len(path) {
|
||||
continue
|
||||
}
|
||||
nodeID := path[adjacentIndex]
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID {
|
||||
continue
|
||||
}
|
||||
peerCacheEntry(entries, nodeID).adjacentRoutePeer = true
|
||||
}
|
||||
}
|
||||
for _, seed := range cfg.RecoverySeeds {
|
||||
nodeID := strings.TrimSpace(seed.NodeID)
|
||||
if nodeID == "" || nodeID == cfg.Local.NodeID {
|
||||
continue
|
||||
}
|
||||
entry := peerCacheEntry(entries, nodeID)
|
||||
entry.RecoverySeed = true
|
||||
if entry.Endpoint == "" {
|
||||
entry.Endpoint = strings.TrimSpace(seed.Endpoint)
|
||||
}
|
||||
if strings.TrimSpace(seed.ConnectivityMode) != "" {
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{seed.ConnectivityMode})
|
||||
}
|
||||
}
|
||||
rendezvousLeases := 0
|
||||
for _, lease := range cfg.RendezvousLeases {
|
||||
if !leaseUsableForPeerCache(lease, cfg.Local.NodeID, now) {
|
||||
continue
|
||||
}
|
||||
rendezvousLeases++
|
||||
if lease.PeerNodeID != cfg.Local.NodeID {
|
||||
entry := peerCacheEntry(entries, lease.PeerNodeID)
|
||||
useLeaseEndpoint := shouldUseRendezvousEndpoint(*entry)
|
||||
entry.RendezvousLeaseID = lease.LeaseID
|
||||
entry.RelayNodeID = lease.RelayNodeID
|
||||
entry.RelayEndpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
entry.RelayControl = true
|
||||
entry.CandidateCount = maxInt(entry.CandidateCount, 1)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{firstNonEmpty(lease.ConnectivityMode, "relay_required"), "relay_control"})
|
||||
if useLeaseEndpoint {
|
||||
entry.BestTransport = firstNonEmpty(lease.Transport, "relay_control")
|
||||
entry.BestReachability = "relay"
|
||||
entry.BestConnectivity = firstNonEmpty(lease.ConnectivityMode, "relay_required")
|
||||
entry.Endpoint = entry.RelayEndpoint
|
||||
entry.BestCandidateID = lease.LeaseID
|
||||
entry.BestCandidateAddr = entry.RelayEndpoint
|
||||
entry.bestScore = maxInt(entry.bestScore, 500)
|
||||
}
|
||||
}
|
||||
if lease.PeerNodeID == cfg.Local.NodeID && lease.RelayNodeID != "" && lease.RelayNodeID != cfg.Local.NodeID {
|
||||
entry := peerCacheEntry(entries, lease.RelayNodeID)
|
||||
if entry.Endpoint == "" {
|
||||
entry.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
}
|
||||
entry.EndpointCount = maxInt(entry.EndpointCount, 1)
|
||||
entry.ConnectivityModes = mergeStrings(entry.ConnectivityModes, []string{"relay_control"})
|
||||
}
|
||||
}
|
||||
out := make([]peerCacheBuildEntry, 0, len(entries))
|
||||
recoverySeeds := 0
|
||||
for _, entry := range entries {
|
||||
sort.Strings(entry.RouteIDs)
|
||||
sort.Strings(entry.ConnectivityModes)
|
||||
if entry.RecoverySeed {
|
||||
recoverySeeds++
|
||||
}
|
||||
out = append(out, *entry)
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
left := warmPeerPriority(out[i])
|
||||
right := warmPeerPriority(out[j])
|
||||
if left != right {
|
||||
return left > right
|
||||
}
|
||||
return out[i].NodeID < out[j].NodeID
|
||||
})
|
||||
warm := 0
|
||||
for i := range out {
|
||||
if warm >= limit {
|
||||
break
|
||||
}
|
||||
if warmPeerPriority(out[i]) <= 0 {
|
||||
continue
|
||||
}
|
||||
out[i].Warm = true
|
||||
out[i].WarmReason = warmPeerReason(out[i])
|
||||
warm++
|
||||
}
|
||||
sort.SliceStable(out, func(i, j int) bool {
|
||||
return out[i].NodeID < out[j].NodeID
|
||||
})
|
||||
snapshotEntries := make([]PeerCacheEntry, 0, len(out))
|
||||
for _, entry := range out {
|
||||
snapshotEntries = append(snapshotEntries, entry.PeerCacheEntry)
|
||||
}
|
||||
return &PeerCache{snapshot: PeerCacheSnapshot{
|
||||
ClusterID: cfg.Local.ClusterID,
|
||||
LocalNodeID: cfg.Local.NodeID,
|
||||
PeerCount: len(snapshotEntries),
|
||||
WarmPeerCount: warm,
|
||||
RecoverySeedCount: recoverySeeds,
|
||||
RendezvousLeaseCount: rendezvousLeases,
|
||||
BuiltAt: now,
|
||||
Entries: snapshotEntries,
|
||||
}}
|
||||
}
|
||||
|
||||
func (c *PeerCache) Snapshot() PeerCacheSnapshot {
|
||||
if c == nil {
|
||||
return PeerCacheSnapshot{}
|
||||
}
|
||||
snapshot := c.snapshot
|
||||
snapshot.Entries = append([]PeerCacheEntry{}, c.snapshot.Entries...)
|
||||
return snapshot
|
||||
}
|
||||
|
||||
func (c *PeerCache) WarmPeerIDs() []string {
|
||||
snapshot := c.Snapshot()
|
||||
out := make([]string, 0, snapshot.WarmPeerCount)
|
||||
for _, entry := range snapshot.Entries {
|
||||
if entry.Warm {
|
||||
out = append(out, entry.NodeID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func peerCacheEntry(entries map[string]*peerCacheBuildEntry, nodeID string) *peerCacheBuildEntry {
|
||||
if entry, ok := entries[nodeID]; ok {
|
||||
return entry
|
||||
}
|
||||
entry := &peerCacheBuildEntry{PeerCacheEntry: PeerCacheEntry{NodeID: nodeID}}
|
||||
entries[nodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func warmPeerPriority(entry peerCacheBuildEntry) int {
|
||||
score := 0
|
||||
if entry.adjacentRoutePeer {
|
||||
score += 1000
|
||||
}
|
||||
if entry.RecoverySeed {
|
||||
score += 500
|
||||
}
|
||||
if entry.Endpoint != "" {
|
||||
score += 100
|
||||
}
|
||||
if entry.bestScore > 0 {
|
||||
score += entry.bestScore
|
||||
}
|
||||
if entry.RelayControl {
|
||||
score += 300
|
||||
}
|
||||
score += entry.CandidateCount
|
||||
return score
|
||||
}
|
||||
|
||||
func warmPeerReason(entry peerCacheBuildEntry) string {
|
||||
if entry.adjacentRoutePeer {
|
||||
return "route_adjacent"
|
||||
}
|
||||
if entry.RecoverySeed {
|
||||
return "recovery_seed"
|
||||
}
|
||||
if entry.RelayControl {
|
||||
return "rendezvous_lease"
|
||||
}
|
||||
if entry.BestCandidateID != "" {
|
||||
return "endpoint_candidate"
|
||||
}
|
||||
if entry.Endpoint != "" {
|
||||
return "peer_endpoint"
|
||||
}
|
||||
return "scoped_peer"
|
||||
}
|
||||
|
||||
func leaseUsableForPeerCache(lease PeerRendezvousLease, localNodeID string, now time.Time) bool {
|
||||
if strings.TrimSpace(lease.LeaseID) == "" ||
|
||||
strings.TrimSpace(lease.PeerNodeID) == "" ||
|
||||
strings.TrimSpace(lease.RelayNodeID) == "" ||
|
||||
strings.TrimSpace(lease.RelayEndpoint) == "" ||
|
||||
lease.ExpiresAt.IsZero() ||
|
||||
!lease.ExpiresAt.After(now) ||
|
||||
!lease.ControlPlaneOnly {
|
||||
return false
|
||||
}
|
||||
return lease.PeerNodeID != localNodeID || lease.RelayNodeID != localNodeID
|
||||
}
|
||||
|
||||
func shouldUseRendezvousEndpoint(entry peerCacheBuildEntry) bool {
|
||||
if strings.TrimSpace(entry.Endpoint) == "" {
|
||||
return true
|
||||
}
|
||||
transport := strings.ToLower(strings.TrimSpace(entry.BestTransport))
|
||||
reachability := strings.ToLower(strings.TrimSpace(entry.BestReachability))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(entry.BestConnectivity))
|
||||
return strings.Contains(transport, "relay") ||
|
||||
strings.Contains(transport, "outbound") ||
|
||||
reachability == "relay" ||
|
||||
reachability == "outbound_only" ||
|
||||
connectivity == "relay_required" ||
|
||||
connectivity == "outbound_only"
|
||||
}
|
||||
|
||||
func mergeStrings(existing []string, incoming []string) []string {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]string, 0, len(existing)+len(incoming))
|
||||
for _, value := range append(existing, incoming...) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[value]; ok {
|
||||
continue
|
||||
}
|
||||
seen[value] = struct{}{}
|
||||
out = append(out, value)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func maxInt(left, right int) int {
|
||||
if left > right {
|
||||
return left
|
||||
}
|
||||
return right
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPeerCacheSelectsAdjacentWarmPeersWithinLimit(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-a": "http://node-a:19000",
|
||||
"node-r": "http://node-r:19000",
|
||||
"node-c": "http://node-c:19000",
|
||||
},
|
||||
Routes: []SyntheticRoute{
|
||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"}),
|
||||
},
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-seed", Endpoint: "https://seed.example.test", Transport: "direct_tcp_tls", Priority: 10},
|
||||
},
|
||||
WarmPeerLimit: 2,
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
})
|
||||
|
||||
warm := cache.WarmPeerIDs()
|
||||
if len(warm) != 2 || warm[0] != "node-a" || warm[1] != "node-r" {
|
||||
t.Fatalf("warm peers = %+v, want adjacent node-a/node-r", warm)
|
||||
}
|
||||
snapshot := cache.Snapshot()
|
||||
if snapshot.PeerCount != 4 || snapshot.RecoverySeedCount != 1 {
|
||||
t.Fatalf("unexpected snapshot counts: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCachePromotesRecoverySeedAfterRoutePeers(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
Routes: []SyntheticRoute{
|
||||
peerCacheRoute("route-1", []string{"node-a", local.NodeID, "node-r"}),
|
||||
},
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-seed", Endpoint: "wss://seed.example.test/mesh", Transport: "wss", ConnectivityMode: "direct", Priority: 1},
|
||||
},
|
||||
WarmPeerLimit: 3,
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
})
|
||||
|
||||
warm := cache.WarmPeerIDs()
|
||||
if len(warm) != 3 || warm[0] != "node-a" || warm[1] != "node-r" || warm[2] != "node-seed" {
|
||||
t.Fatalf("warm peers = %+v, want adjacent peers then seed", warm)
|
||||
}
|
||||
seed, ok := peerCacheEntryByID(cache.Snapshot(), "node-seed")
|
||||
if !ok || !seed.RecoverySeed || seed.WarmReason != "recovery_seed" {
|
||||
t.Fatalf("unexpected seed entry: %+v", seed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCacheUsesBestEndpointCandidate(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "relay.example.test",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 20,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 1,
|
||||
LastVerifiedAt: &now,
|
||||
},
|
||||
},
|
||||
},
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
|
||||
entry, ok := peerCacheEntryByID(cache.Snapshot(), "node-b")
|
||||
if !ok {
|
||||
t.Fatal("node-b missing from cache")
|
||||
}
|
||||
if entry.BestCandidateID != "node-b-public" || !entry.Warm {
|
||||
t.Fatalf("unexpected candidate selection: %+v", entry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerCacheUsesPreferredCorporateEndpointAddress(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "https://node-b.public.example.test:443",
|
||||
},
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "https://node-b.public.example.test:443",
|
||||
Reachability: "public",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "corp-eu",
|
||||
Priority: 10,
|
||||
},
|
||||
{
|
||||
EndpointID: "node-b-corp-lan",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "http://10.24.10.20:19001",
|
||||
Reachability: "private",
|
||||
NATType: "none",
|
||||
ConnectivityMode: "direct",
|
||||
Region: "corp-eu",
|
||||
Priority: 1,
|
||||
PolicyTags: []string{"corp-lan"},
|
||||
},
|
||||
},
|
||||
},
|
||||
PreferredRegion: "corp-eu",
|
||||
WarmPeerLimit: 1,
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
})
|
||||
|
||||
entry, ok := peerCacheEntryByID(cache.Snapshot(), "node-b")
|
||||
if !ok {
|
||||
t.Fatal("node-b missing from peer cache")
|
||||
}
|
||||
if entry.BestCandidateID != "node-b-corp-lan" || entry.Endpoint != "http://10.24.10.20:19001" {
|
||||
t.Fatalf("peer cache did not choose corp LAN endpoint: %+v", entry)
|
||||
}
|
||||
}
|
||||
|
||||
func peerCacheRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: append([]string{}, hops...),
|
||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
}
|
||||
}
|
||||
|
||||
func peerCacheEntryByID(snapshot PeerCacheSnapshot, nodeID string) (PeerCacheEntry, bool) {
|
||||
for _, entry := range snapshot.Entries {
|
||||
if entry.NodeID == nodeID {
|
||||
return entry, true
|
||||
}
|
||||
}
|
||||
return PeerCacheEntry{}, false
|
||||
}
|
||||
@@ -0,0 +1,303 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/netip"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
PeerConnectionIntentMaintain = "maintain"
|
||||
PeerConnectionIntentProbe = "probe"
|
||||
PeerConnectionIntentRecover = "recover"
|
||||
)
|
||||
|
||||
const (
|
||||
PeerTransportModeDirect = "direct"
|
||||
PeerTransportModePrivateLAN = "private_lan"
|
||||
PeerTransportModeCorporateLAN = "corporate_lan"
|
||||
PeerTransportModeOutboundOnly = "outbound_only"
|
||||
PeerTransportModeRelayRequired = "relay_required"
|
||||
PeerTransportModeRelayControl = "relay_control"
|
||||
PeerTransportModeUnknown = "unknown"
|
||||
)
|
||||
|
||||
type PeerConnectionIntentPlanConfig struct {
|
||||
PeerCache PeerCacheSnapshot
|
||||
RecoveryPlan PeerRecoveryPlan
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type PeerConnectionIntentPlan struct {
|
||||
Mode string `json:"mode"`
|
||||
IntentCount int `json:"intent_count"`
|
||||
MaintainCount int `json:"maintain_count"`
|
||||
ProbeCount int `json:"probe_count"`
|
||||
RecoverCount int `json:"recover_count"`
|
||||
DirectCount int `json:"direct_count"`
|
||||
PrivateLANCount int `json:"private_lan_count"`
|
||||
CorporateLANCount int `json:"corporate_lan_count"`
|
||||
OutboundOnlyCount int `json:"outbound_only_count"`
|
||||
RelayRequiredCount int `json:"relay_required_count"`
|
||||
RelayControlCount int `json:"relay_control_count"`
|
||||
RendezvousRequiredCount int `json:"rendezvous_required_count"`
|
||||
RendezvousResolvedCount int `json:"rendezvous_resolved_count"`
|
||||
RendezvousLeaseCount int `json:"rendezvous_lease_count"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
Intents []PeerConnectionIntent `json:"intents,omitempty"`
|
||||
}
|
||||
|
||||
type PeerConnectionIntent struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
ConnectionState string `json:"connection_state"`
|
||||
Transport string `json:"transport,omitempty"`
|
||||
TransportMode string `json:"transport_mode"`
|
||||
Reachability string `json:"reachability,omitempty"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
NATType string `json:"nat_type,omitempty"`
|
||||
PolicyTags []string `json:"policy_tags,omitempty"`
|
||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||
DirectCandidate bool `json:"direct_candidate"`
|
||||
RelayCandidate bool `json:"relay_candidate"`
|
||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
ControlPlaneOnly bool `json:"control_plane_only"`
|
||||
RecoverySeed bool `json:"recovery_seed"`
|
||||
Priority int `json:"priority"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
}
|
||||
|
||||
func PlanPeerConnectionIntents(cfg PeerConnectionIntentPlanConfig) PeerConnectionIntentPlan {
|
||||
now := normalizedNow(cfg.Now)
|
||||
entryByNode := map[string]PeerCacheEntry{}
|
||||
for _, entry := range cfg.PeerCache.Entries {
|
||||
if strings.TrimSpace(entry.NodeID) == "" {
|
||||
continue
|
||||
}
|
||||
entryByNode[entry.NodeID] = entry
|
||||
}
|
||||
|
||||
intents := make([]PeerConnectionIntent, 0, len(cfg.RecoveryPlan.Candidates))
|
||||
for _, candidate := range cfg.RecoveryPlan.Candidates {
|
||||
if strings.TrimSpace(candidate.NodeID) == "" {
|
||||
continue
|
||||
}
|
||||
entry := entryByNode[candidate.NodeID]
|
||||
intent := PeerConnectionIntent{
|
||||
NodeID: candidate.NodeID,
|
||||
Action: connectionIntentAction(candidate),
|
||||
Reason: candidate.Reason,
|
||||
Endpoint: candidate.Endpoint,
|
||||
ConnectionState: candidate.ConnectionState,
|
||||
Transport: firstNonEmpty(candidate.BestTransport, entry.BestTransport),
|
||||
Reachability: entry.BestReachability,
|
||||
ConnectivityMode: entry.BestConnectivity,
|
||||
NATType: entry.BestNATType,
|
||||
PolicyTags: append([]string{}, entry.BestPolicyTags...),
|
||||
BestCandidateID: firstNonEmpty(candidate.BestCandidateID, entry.BestCandidateID),
|
||||
RendezvousLeaseID: entry.RendezvousLeaseID,
|
||||
RelayNodeID: entry.RelayNodeID,
|
||||
RelayEndpoint: entry.RelayEndpoint,
|
||||
RelayCandidate: entry.RelayControl,
|
||||
ControlPlaneOnly: entry.RelayControl,
|
||||
RecoverySeed: candidate.RecoverySeed || entry.RecoverySeed,
|
||||
Priority: candidate.Priority,
|
||||
GeneratedAt: now,
|
||||
}
|
||||
mode, requiresRendezvous, directCandidate := classifyPeerTransport(intent)
|
||||
intent.TransportMode = mode
|
||||
intent.RequiresRendezvous = requiresRendezvous
|
||||
intent.DirectCandidate = directCandidate
|
||||
if intent.RequiresRendezvous {
|
||||
if lease, ok := rendezvousLeaseForPeer(cfg.RendezvousLeases, intent.NodeID, now); ok {
|
||||
applyRendezvousLease(&intent, lease)
|
||||
}
|
||||
}
|
||||
intents = append(intents, intent)
|
||||
}
|
||||
sort.SliceStable(intents, func(i, j int) bool {
|
||||
if intents[i].Priority != intents[j].Priority {
|
||||
return intents[i].Priority > intents[j].Priority
|
||||
}
|
||||
return intents[i].NodeID < intents[j].NodeID
|
||||
})
|
||||
|
||||
plan := PeerConnectionIntentPlan{
|
||||
Mode: cfg.RecoveryPlan.Mode,
|
||||
IntentCount: len(intents),
|
||||
GeneratedAt: now,
|
||||
Intents: intents,
|
||||
}
|
||||
for _, intent := range intents {
|
||||
switch intent.Action {
|
||||
case PeerConnectionIntentMaintain:
|
||||
plan.MaintainCount++
|
||||
case PeerConnectionIntentProbe:
|
||||
plan.ProbeCount++
|
||||
case PeerConnectionIntentRecover:
|
||||
plan.RecoverCount++
|
||||
}
|
||||
switch intent.TransportMode {
|
||||
case PeerTransportModeDirect:
|
||||
plan.DirectCount++
|
||||
case PeerTransportModePrivateLAN:
|
||||
plan.PrivateLANCount++
|
||||
case PeerTransportModeCorporateLAN:
|
||||
plan.CorporateLANCount++
|
||||
case PeerTransportModeOutboundOnly:
|
||||
plan.OutboundOnlyCount++
|
||||
case PeerTransportModeRelayRequired:
|
||||
plan.RelayRequiredCount++
|
||||
case PeerTransportModeRelayControl:
|
||||
plan.RelayControlCount++
|
||||
}
|
||||
if intent.RequiresRendezvous {
|
||||
plan.RendezvousRequiredCount++
|
||||
}
|
||||
if intent.RendezvousResolved {
|
||||
plan.RendezvousResolvedCount++
|
||||
}
|
||||
if intent.RendezvousLeaseID != "" {
|
||||
plan.RendezvousLeaseCount++
|
||||
}
|
||||
}
|
||||
return plan
|
||||
}
|
||||
|
||||
func connectionIntentAction(candidate PeerRecoveryCandidate) string {
|
||||
switch candidate.Reason {
|
||||
case "maintain_ready":
|
||||
return PeerConnectionIntentMaintain
|
||||
case "recover_degraded", "recover_seed", "recover_warm", "recover_peer":
|
||||
return PeerConnectionIntentRecover
|
||||
default:
|
||||
return PeerConnectionIntentProbe
|
||||
}
|
||||
}
|
||||
|
||||
func classifyPeerTransport(intent PeerConnectionIntent) (string, bool, bool) {
|
||||
transport := strings.ToLower(strings.TrimSpace(intent.Transport))
|
||||
connectivity := strings.ToLower(strings.TrimSpace(intent.ConnectivityMode))
|
||||
reachability := strings.ToLower(strings.TrimSpace(intent.Reachability))
|
||||
tags := lowerStringSet(intent.PolicyTags)
|
||||
|
||||
if strings.Contains(transport, "relay") || connectivity == "relay_required" || reachability == "relay" {
|
||||
return PeerTransportModeRelayRequired, true, false
|
||||
}
|
||||
if connectivity == "outbound_only" || reachability == "outbound_only" {
|
||||
return PeerTransportModeOutboundOnly, true, false
|
||||
}
|
||||
if tags["corp-lan"] || tags["same-site"] {
|
||||
return PeerTransportModeCorporateLAN, false, true
|
||||
}
|
||||
if tags["private-lan"] || reachability == "private" || endpointHasPrivateHost(intent.Endpoint) {
|
||||
return PeerTransportModePrivateLAN, false, true
|
||||
}
|
||||
if strings.Contains(transport, "direct") || reachability == "public" || connectivity == "direct" {
|
||||
return PeerTransportModeDirect, false, true
|
||||
}
|
||||
return PeerTransportModeUnknown, false, false
|
||||
}
|
||||
|
||||
func rendezvousLeaseForPeer(leases []PeerRendezvousLease, peerNodeID string, now time.Time) (PeerRendezvousLease, bool) {
|
||||
now = normalizedNow(now)
|
||||
candidates := make([]PeerRendezvousLease, 0, len(leases))
|
||||
for _, lease := range leases {
|
||||
if strings.TrimSpace(lease.PeerNodeID) != peerNodeID ||
|
||||
strings.TrimSpace(lease.RelayEndpoint) == "" ||
|
||||
strings.TrimSpace(lease.RelayNodeID) == "" ||
|
||||
!lease.ControlPlaneOnly ||
|
||||
lease.ExpiresAt.IsZero() ||
|
||||
!lease.ExpiresAt.After(now) {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, lease)
|
||||
}
|
||||
if len(candidates) == 0 {
|
||||
return PeerRendezvousLease{}, false
|
||||
}
|
||||
sort.SliceStable(candidates, func(i, j int) bool {
|
||||
leftPriority := candidates[i].Priority
|
||||
rightPriority := candidates[j].Priority
|
||||
if leftPriority <= 0 {
|
||||
leftPriority = 100
|
||||
}
|
||||
if rightPriority <= 0 {
|
||||
rightPriority = 100
|
||||
}
|
||||
if leftPriority != rightPriority {
|
||||
return leftPriority < rightPriority
|
||||
}
|
||||
if !candidates[i].ExpiresAt.Equal(candidates[j].ExpiresAt) {
|
||||
return candidates[i].ExpiresAt.After(candidates[j].ExpiresAt)
|
||||
}
|
||||
return candidates[i].LeaseID < candidates[j].LeaseID
|
||||
})
|
||||
return candidates[0], true
|
||||
}
|
||||
|
||||
func applyRendezvousLease(intent *PeerConnectionIntent, lease PeerRendezvousLease) {
|
||||
intent.Endpoint = strings.TrimRight(strings.TrimSpace(lease.RelayEndpoint), "/")
|
||||
intent.Transport = firstNonEmpty(lease.Transport, "relay_control")
|
||||
intent.TransportMode = PeerTransportModeRelayControl
|
||||
intent.RequiresRendezvous = false
|
||||
intent.RendezvousResolved = true
|
||||
intent.DirectCandidate = false
|
||||
intent.RelayCandidate = true
|
||||
intent.RendezvousLeaseID = lease.LeaseID
|
||||
intent.RelayNodeID = lease.RelayNodeID
|
||||
intent.RelayEndpoint = intent.Endpoint
|
||||
intent.ControlPlaneOnly = true
|
||||
if lease.ConnectivityMode != "" {
|
||||
intent.ConnectivityMode = lease.ConnectivityMode
|
||||
}
|
||||
}
|
||||
|
||||
func endpointHasPrivateHost(rawEndpoint string) bool {
|
||||
rawEndpoint = strings.TrimSpace(rawEndpoint)
|
||||
if rawEndpoint == "" {
|
||||
return false
|
||||
}
|
||||
host := rawEndpoint
|
||||
if parsed, err := url.Parse(rawEndpoint); err == nil && parsed.Host != "" {
|
||||
host = parsed.Host
|
||||
}
|
||||
if splitHost, _, err := net.SplitHostPort(host); err == nil {
|
||||
host = splitHost
|
||||
}
|
||||
addr, err := netip.ParseAddr(strings.Trim(host, "[]"))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return addr.IsPrivate() || addr.IsLoopback() || addr.IsLinkLocalUnicast()
|
||||
}
|
||||
|
||||
func lowerStringSet(values []string) map[string]bool {
|
||||
out := map[string]bool{}
|
||||
for _, value := range values {
|
||||
value = strings.ToLower(strings.TrimSpace(value))
|
||||
if value != "" {
|
||||
out[value] = true
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func firstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPeerConnectionIntentsClassifyCorporateDirect(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://10.24.10.20:19001",
|
||||
BestTransport: "direct_tcp_tls",
|
||||
BestReachability: "private",
|
||||
BestConnectivity: "direct",
|
||||
BestPolicyTags: []string{"corp-lan", "same-site"},
|
||||
},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{
|
||||
Mode: PeerRecoveryModeSteady,
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://10.24.10.20:19001",
|
||||
ConnectionState: PeerConnectionReady,
|
||||
Reason: "maintain_ready",
|
||||
Priority: 100,
|
||||
},
|
||||
},
|
||||
},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.IntentCount != 1 || plan.MaintainCount != 1 || plan.CorporateLANCount != 1 {
|
||||
t.Fatalf("unexpected plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.Action != PeerConnectionIntentMaintain || intent.TransportMode != PeerTransportModeCorporateLAN || intent.RequiresRendezvous {
|
||||
t.Fatalf("unexpected corporate intent: %+v", intent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsClassifyOutboundAndRelayAsRendezvousRequired(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
BestTransport: "direct_tcp_tls",
|
||||
BestReachability: "outbound_only",
|
||||
BestConnectivity: "outbound_only",
|
||||
},
|
||||
{
|
||||
NodeID: "node-c",
|
||||
Endpoint: "relay://fabric-relay/node-c",
|
||||
BestTransport: "relay",
|
||||
BestReachability: "relay",
|
||||
BestConnectivity: "relay_required",
|
||||
},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{
|
||||
Mode: PeerRecoveryModeRecovery,
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_warm",
|
||||
Priority: 90,
|
||||
},
|
||||
{
|
||||
NodeID: "node-c",
|
||||
Endpoint: "relay://fabric-relay/node-c",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_seed",
|
||||
Priority: 80,
|
||||
},
|
||||
},
|
||||
},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.RecoverCount != 2 || plan.OutboundOnlyCount != 1 || plan.RelayRequiredCount != 1 || plan.RendezvousRequiredCount != 2 {
|
||||
t.Fatalf("unexpected rendezvous counts: %+v", plan)
|
||||
}
|
||||
if plan.Intents[0].Action != PeerConnectionIntentRecover || plan.Intents[1].Action != PeerConnectionIntentRecover {
|
||||
t.Fatalf("unexpected actions: %+v", plan.Intents)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsResolveRendezvousWithRelayLease(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "relay://fabric/node-b",
|
||||
BestTransport: "relay",
|
||||
BestReachability: "relay",
|
||||
BestConnectivity: "relay_required",
|
||||
},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{
|
||||
Mode: PeerRecoveryModeRecovery,
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "relay://fabric/node-b",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_warm",
|
||||
Priority: 100,
|
||||
},
|
||||
},
|
||||
},
|
||||
RendezvousLeases: []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
},
|
||||
},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.IntentCount != 1 || plan.RelayControlCount != 1 || plan.RendezvousResolvedCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected relay-control plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.TransportMode != PeerTransportModeRelayControl ||
|
||||
intent.Endpoint != "http://node-r:19000" ||
|
||||
intent.RelayNodeID != "node-r" ||
|
||||
intent.RendezvousLeaseID != "lease-node-b-via-node-r" ||
|
||||
!intent.RelayCandidate ||
|
||||
!intent.RendezvousResolved ||
|
||||
intent.RequiresRendezvous {
|
||||
t.Fatalf("unexpected resolved rendezvous intent: %+v", intent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsSkipExpiredRendezvousLeaseAndReselect(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "relay://fabric/node-b",
|
||||
BestTransport: "relay",
|
||||
BestReachability: "relay",
|
||||
BestConnectivity: "relay_required",
|
||||
},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{
|
||||
Mode: PeerRecoveryModeRecovery,
|
||||
Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "relay://fabric/node-b",
|
||||
ConnectionState: PeerConnectionWaiting,
|
||||
Reason: "recover_warm",
|
||||
Priority: 100,
|
||||
},
|
||||
},
|
||||
},
|
||||
RendezvousLeases: []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-expired-preferred",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r-old",
|
||||
RelayEndpoint: "http://node-r-old:19000",
|
||||
Transport: "relay_control",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 1,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-10 * time.Minute),
|
||||
ExpiresAt: now.Add(-time.Second),
|
||||
},
|
||||
{
|
||||
LeaseID: "lease-active-reselected",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r-new",
|
||||
RelayEndpoint: "http://node-r-new:19000",
|
||||
Transport: "relay_control",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 20,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
},
|
||||
},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.RendezvousResolvedCount != 1 || plan.RelayControlCount != 1 || plan.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected reselected plan counts: %+v", plan)
|
||||
}
|
||||
intent := plan.Intents[0]
|
||||
if intent.RendezvousLeaseID != "lease-active-reselected" ||
|
||||
intent.RelayNodeID != "node-r-new" ||
|
||||
intent.Endpoint != "http://node-r-new:19000" {
|
||||
t.Fatalf("expired lease was not skipped: %+v", intent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionIntentsClassifyPrivateEndpointWithoutCandidateHints(t *testing.T) {
|
||||
plan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-b", Endpoint: "http://192.168.10.20:19001"},
|
||||
}},
|
||||
RecoveryPlan: PeerRecoveryPlan{Candidates: []PeerRecoveryCandidate{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "http://192.168.10.20:19001",
|
||||
ConnectionState: PeerConnectionDisconnected,
|
||||
Reason: "recover_peer",
|
||||
Priority: 10,
|
||||
},
|
||||
}},
|
||||
Now: time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC),
|
||||
})
|
||||
|
||||
if plan.PrivateLANCount != 1 || plan.Intents[0].TransportMode != PeerTransportModePrivateLAN || !plan.Intents[0].DirectCandidate {
|
||||
t.Fatalf("unexpected private endpoint classification: %+v", plan)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
PeerConnectionProbeReachable = "reachable"
|
||||
PeerConnectionProbeUnreachable = "unreachable"
|
||||
PeerConnectionProbeDeferred = "deferred"
|
||||
PeerConnectionProbeSkipped = "skipped"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultPeerConnectionProbeTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
type PeerConnectionManagerConfig struct {
|
||||
Local PeerIdentity
|
||||
PeerCache *PeerCache
|
||||
Tracker *PeerConnectionTracker
|
||||
RendezvousLeases []PeerRendezvousLease
|
||||
HTTPClient *http.Client
|
||||
ProbeTimeout time.Duration
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type PeerConnectionManager struct {
|
||||
local PeerIdentity
|
||||
peerCache *PeerCache
|
||||
tracker *PeerConnectionTracker
|
||||
rendezvousLeases []PeerRendezvousLease
|
||||
httpClient *http.Client
|
||||
probeTimeout time.Duration
|
||||
now func() time.Time
|
||||
|
||||
mu sync.Mutex
|
||||
lastCycle PeerConnectionManagerCycle
|
||||
}
|
||||
|
||||
type PeerConnectionManagerCycle struct {
|
||||
Mode string `json:"mode"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
ProbeTimeoutMs int `json:"probe_timeout_ms"`
|
||||
IntentCount int `json:"intent_count"`
|
||||
Attempted int `json:"attempted"`
|
||||
Succeeded int `json:"succeeded"`
|
||||
Failed int `json:"failed"`
|
||||
Deferred int `json:"deferred"`
|
||||
Skipped int `json:"skipped"`
|
||||
RendezvousRequiredCount int `json:"rendezvous_required_count"`
|
||||
RendezvousResolvedCount int `json:"rendezvous_resolved_count"`
|
||||
RelayControlCount int `json:"relay_control_count"`
|
||||
RecoveryPlan PeerRecoveryPlan `json:"recovery_plan"`
|
||||
IntentPlan PeerConnectionIntentPlan `json:"intent_plan"`
|
||||
Results []PeerConnectionProbeResult `json:"results,omitempty"`
|
||||
}
|
||||
|
||||
type PeerConnectionManagerSnapshot struct {
|
||||
LastCycle PeerConnectionManagerCycle `json:"last_cycle"`
|
||||
}
|
||||
|
||||
type PeerConnectionProbeResult struct {
|
||||
NodeID string `json:"node_id"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
ConnectionState PeerConnectionState `json:"connection_state"`
|
||||
TransportMode string `json:"transport_mode"`
|
||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||
DirectCandidate bool `json:"direct_candidate"`
|
||||
RelayCandidate bool `json:"relay_candidate"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
LatencyMs int `json:"latency_ms,omitempty"`
|
||||
FailureReason string `json:"failure_reason,omitempty"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
}
|
||||
|
||||
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
|
||||
probeTimeout := cfg.ProbeTimeout
|
||||
if probeTimeout <= 0 {
|
||||
probeTimeout = DefaultPeerConnectionProbeTimeout
|
||||
}
|
||||
httpClient := cfg.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
MaxIdleConns: 64,
|
||||
MaxIdleConnsPerHost: 8,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
},
|
||||
Timeout: probeTimeout + time.Second,
|
||||
}
|
||||
}
|
||||
now := cfg.Now
|
||||
if now == nil {
|
||||
now = func() time.Time { return time.Now().UTC() }
|
||||
}
|
||||
return &PeerConnectionManager{
|
||||
local: cfg.Local,
|
||||
peerCache: cfg.PeerCache,
|
||||
tracker: cfg.Tracker,
|
||||
rendezvousLeases: append([]PeerRendezvousLease{}, cfg.RendezvousLeases...),
|
||||
httpClient: httpClient,
|
||||
probeTimeout: probeTimeout,
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionManagerCycle {
|
||||
peerCache, rendezvousLeases := m.peerConfigSnapshot()
|
||||
if m == nil || peerCache == nil || m.tracker == nil {
|
||||
return PeerConnectionManagerCycle{}
|
||||
}
|
||||
startedAt := normalizedNow(m.now())
|
||||
peerSnapshot := peerCache.Snapshot()
|
||||
recoveryPlan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: peerSnapshot,
|
||||
Connections: m.tracker.Snapshot(),
|
||||
TargetReadyPeers: DefaultStablePeerTarget,
|
||||
MaxProbeCandidates: DefaultRecoveryProbeLimit,
|
||||
Now: startedAt,
|
||||
})
|
||||
intentPlan := PlanPeerConnectionIntents(PeerConnectionIntentPlanConfig{
|
||||
PeerCache: peerSnapshot,
|
||||
RecoveryPlan: recoveryPlan,
|
||||
RendezvousLeases: rendezvousLeases,
|
||||
Now: startedAt,
|
||||
})
|
||||
cycle := PeerConnectionManagerCycle{
|
||||
Mode: recoveryPlan.Mode,
|
||||
StartedAt: startedAt,
|
||||
ProbeTimeoutMs: int(m.probeTimeout.Milliseconds()),
|
||||
IntentCount: intentPlan.IntentCount,
|
||||
RendezvousRequiredCount: intentPlan.RendezvousRequiredCount,
|
||||
RendezvousResolvedCount: intentPlan.RendezvousResolvedCount,
|
||||
RelayControlCount: intentPlan.RelayControlCount,
|
||||
RecoveryPlan: recoveryPlan,
|
||||
IntentPlan: intentPlan,
|
||||
Results: make([]PeerConnectionProbeResult, 0, len(intentPlan.Intents)),
|
||||
}
|
||||
for _, intent := range intentPlan.Intents {
|
||||
result := m.probeIntent(ctx, intent)
|
||||
cycle.Results = append(cycle.Results, result)
|
||||
switch result.LinkStatus {
|
||||
case PeerConnectionProbeReachable:
|
||||
cycle.Attempted++
|
||||
cycle.Succeeded++
|
||||
case PeerConnectionProbeUnreachable:
|
||||
cycle.Attempted++
|
||||
cycle.Failed++
|
||||
case PeerConnectionProbeDeferred:
|
||||
cycle.Deferred++
|
||||
case PeerConnectionProbeSkipped:
|
||||
cycle.Skipped++
|
||||
}
|
||||
}
|
||||
cycle.CompletedAt = normalizedNow(m.now())
|
||||
m.mu.Lock()
|
||||
m.lastCycle = cycle
|
||||
m.mu.Unlock()
|
||||
return cycle
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) Snapshot() PeerConnectionManagerSnapshot {
|
||||
if m == nil {
|
||||
return PeerConnectionManagerSnapshot{}
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return PeerConnectionManagerSnapshot{LastCycle: m.lastCycle}
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) UpdatePeerConfig(peerCache *PeerCache, rendezvousLeases []PeerRendezvousLease) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.peerCache = peerCache
|
||||
m.rendezvousLeases = append([]PeerRendezvousLease{}, rendezvousLeases...)
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvousLease) {
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.peerCache, append([]PeerRendezvousLease{}, m.rendezvousLeases...)
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConnectionIntent) PeerConnectionProbeResult {
|
||||
startedAt := normalizedNow(m.now())
|
||||
result := PeerConnectionProbeResult{
|
||||
NodeID: intent.NodeID,
|
||||
Action: intent.Action,
|
||||
Reason: intent.Reason,
|
||||
Endpoint: intent.Endpoint,
|
||||
TransportMode: intent.TransportMode,
|
||||
RequiresRendezvous: intent.RequiresRendezvous,
|
||||
RendezvousResolved: intent.RendezvousResolved,
|
||||
DirectCandidate: intent.DirectCandidate,
|
||||
RelayCandidate: intent.RelayCandidate,
|
||||
RendezvousLeaseID: intent.RendezvousLeaseID,
|
||||
RelayNodeID: intent.RelayNodeID,
|
||||
RelayEndpoint: intent.RelayEndpoint,
|
||||
StartedAt: startedAt,
|
||||
}
|
||||
peer := PeerCacheEntry{
|
||||
NodeID: intent.NodeID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Warm: true,
|
||||
WarmReason: intent.Reason,
|
||||
RecoverySeed: intent.RecoverySeed,
|
||||
BestCandidateID: intent.BestCandidateID,
|
||||
BestTransport: intent.Transport,
|
||||
RendezvousLeaseID: intent.RendezvousLeaseID,
|
||||
RelayNodeID: intent.RelayNodeID,
|
||||
RelayEndpoint: intent.RelayEndpoint,
|
||||
RelayControl: intent.RelayCandidate,
|
||||
}
|
||||
if intent.RequiresRendezvous {
|
||||
result.LinkStatus = PeerConnectionProbeDeferred
|
||||
result.FailureReason = "rendezvous_required"
|
||||
result.ConnectionState = m.tracker.RecordDeferred(peer, result.FailureReason, startedAt)
|
||||
result.CompletedAt = normalizedNow(m.now())
|
||||
return result
|
||||
}
|
||||
if strings.TrimSpace(intent.Endpoint) == "" || (!intent.DirectCandidate && !intent.RelayCandidate) {
|
||||
result.LinkStatus = PeerConnectionProbeDeferred
|
||||
result.FailureReason = "direct_candidate_unavailable"
|
||||
if intent.RelayCandidate {
|
||||
result.FailureReason = "relay_candidate_unavailable"
|
||||
}
|
||||
result.ConnectionState = m.tracker.RecordDeferred(peer, result.FailureReason, startedAt)
|
||||
result.CompletedAt = normalizedNow(m.now())
|
||||
return result
|
||||
}
|
||||
if !m.tracker.ShouldProbe(intent.NodeID, startedAt) {
|
||||
result.LinkStatus = PeerConnectionProbeSkipped
|
||||
result.FailureReason = "backoff_active"
|
||||
result.ConnectionState = m.connectionState(intent.NodeID)
|
||||
result.CompletedAt = normalizedNow(m.now())
|
||||
return result
|
||||
}
|
||||
m.tracker.BeginProbe(peer, startedAt)
|
||||
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
||||
defer cancel()
|
||||
target := PeerIdentity{
|
||||
ClusterID: m.local.ClusterID,
|
||||
NodeID: intent.NodeID,
|
||||
}
|
||||
if intent.RelayCandidate && intent.RelayNodeID != "" {
|
||||
target.NodeID = intent.RelayNodeID
|
||||
}
|
||||
_, err := NewClient(strings.TrimRight(intent.Endpoint, "/")).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
|
||||
completedAt := normalizedNow(m.now())
|
||||
if err != nil {
|
||||
result.LinkStatus = PeerConnectionProbeUnreachable
|
||||
result.FailureReason = err.Error()
|
||||
result.ConnectionState = m.tracker.RecordFailure(intent.NodeID, err.Error(), completedAt)
|
||||
result.CompletedAt = completedAt
|
||||
return result
|
||||
}
|
||||
latency := int(completedAt.Sub(startedAt).Milliseconds())
|
||||
if latency < 0 {
|
||||
latency = 0
|
||||
}
|
||||
result.LinkStatus = PeerConnectionProbeReachable
|
||||
result.LatencyMs = latency
|
||||
if intent.RelayCandidate {
|
||||
result.ConnectionState = m.tracker.RecordRelayReady(peer, latency, completedAt)
|
||||
} else {
|
||||
result.ConnectionState = m.tracker.RecordSuccess(intent.NodeID, latency, completedAt)
|
||||
}
|
||||
result.CompletedAt = completedAt
|
||||
return result
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
|
||||
snapshot := m.tracker.Snapshot()
|
||||
for _, entry := range snapshot.Entries {
|
||||
if entry.NodeID == nodeID {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
return PeerConnectionState{NodeID: nodeID, State: PeerConnectionDisconnected}
|
||||
}
|
||||
|
||||
func (c Client) withHTTPClient(httpClient *http.Client) Client {
|
||||
c.HTTPClient = httpClient
|
||||
return c
|
||||
}
|
||||
@@ -0,0 +1,190 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPeerConnectionManagerProbesDirectAndDefersRendezvous(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-direct",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: server.URL,
|
||||
Reachability: "private",
|
||||
ConnectivityMode: "direct",
|
||||
PolicyTags: []string{"corp-lan", "same-site"},
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
"node-c": {
|
||||
{
|
||||
EndpointID: "node-c-relay",
|
||||
NodeID: "node-c",
|
||||
Transport: "relay",
|
||||
Address: "relay://fabric/node-c",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
WarmPeerLimit: 2,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 1 || cycle.Succeeded != 1 || cycle.Deferred != 1 || cycle.RendezvousRequiredCount != 1 {
|
||||
t.Fatalf("unexpected cycle: %+v", cycle)
|
||||
}
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.Ready != 1 || snapshot.Waiting != 1 {
|
||||
t.Fatalf("unexpected tracker snapshot: %+v", snapshot)
|
||||
}
|
||||
if cycle.Results[0].NodeID != "node-b" || cycle.Results[0].LinkStatus != PeerConnectionProbeReachable {
|
||||
t.Fatalf("direct peer was not probed first: %+v", cycle.Results)
|
||||
}
|
||||
if cycle.Results[1].NodeID != "node-c" || cycle.Results[1].LinkStatus != PeerConnectionProbeDeferred {
|
||||
t.Fatalf("relay peer was not deferred: %+v", cycle.Results)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerRecordsFailureAndSuppressesActiveBackoff(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpoints: map[string]string{
|
||||
"node-b": "http://127.0.0.1:1",
|
||||
},
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
HTTPClient: &http.Client{Timeout: 20 * time.Millisecond},
|
||||
ProbeTimeout: 20 * time.Millisecond,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
manager.ProbeOnce(context.Background())
|
||||
}
|
||||
backoff := tracker.Snapshot()
|
||||
if backoff.Backoff != 1 {
|
||||
t.Fatalf("expected backoff after repeated failures: %+v", backoff)
|
||||
}
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 0 || len(cycle.Results) != 0 {
|
||||
t.Fatalf("active backoff peer should not be attempted: %+v", cycle)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionManagerProbesRelayControlLease(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
current := now
|
||||
server := httptest.NewServer(Server{
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"}
|
||||
leases := []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: server.URL,
|
||||
Transport: "relay_control",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: now.Add(-time.Minute),
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
},
|
||||
}
|
||||
cache := NewPeerCache(PeerCacheConfig{
|
||||
Local: local,
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-relay",
|
||||
NodeID: "node-b",
|
||||
Transport: "relay",
|
||||
Address: "relay://fabric/node-b",
|
||||
Reachability: "relay",
|
||||
ConnectivityMode: "relay_required",
|
||||
Priority: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
RendezvousLeases: leases,
|
||||
WarmPeerLimit: 1,
|
||||
Now: now,
|
||||
})
|
||||
tracker := NewPeerConnectionTracker(cache.Snapshot(), now)
|
||||
manager := NewPeerConnectionManager(PeerConnectionManagerConfig{
|
||||
Local: local,
|
||||
PeerCache: cache,
|
||||
Tracker: tracker,
|
||||
RendezvousLeases: leases,
|
||||
ProbeTimeout: time.Second,
|
||||
Now: func() time.Time {
|
||||
current = current.Add(10 * time.Millisecond)
|
||||
return current
|
||||
},
|
||||
})
|
||||
|
||||
cycle := manager.ProbeOnce(context.Background())
|
||||
if cycle.Attempted != 1 ||
|
||||
cycle.Succeeded != 1 ||
|
||||
cycle.Deferred != 0 ||
|
||||
cycle.RelayControlCount != 1 ||
|
||||
cycle.RendezvousResolvedCount != 1 ||
|
||||
cycle.RendezvousRequiredCount != 0 {
|
||||
t.Fatalf("unexpected relay-control cycle: %+v", cycle)
|
||||
}
|
||||
if len(cycle.Results) != 1 ||
|
||||
cycle.Results[0].NodeID != "node-b" ||
|
||||
cycle.Results[0].RelayNodeID != "node-r" ||
|
||||
cycle.Results[0].ConnectionState.State != PeerConnectionRelayReady {
|
||||
t.Fatalf("unexpected relay-control result: %+v", cycle.Results)
|
||||
}
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.RelayReady != 1 || snapshot.Waiting != 0 {
|
||||
t.Fatalf("unexpected tracker snapshot: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,284 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
PeerConnectionDisconnected = "disconnected"
|
||||
PeerConnectionConnecting = "connecting"
|
||||
PeerConnectionReady = "ready"
|
||||
PeerConnectionRelayReady = "relay_ready"
|
||||
PeerConnectionDegraded = "degraded"
|
||||
PeerConnectionBackoff = "backoff"
|
||||
PeerConnectionWaiting = "waiting_rendezvous"
|
||||
)
|
||||
|
||||
const (
|
||||
peerConnectionBackoffBase = 5 * time.Second
|
||||
peerConnectionBackoffMax = time.Minute
|
||||
)
|
||||
|
||||
type PeerConnectionTracker struct {
|
||||
mu sync.Mutex
|
||||
entries map[string]PeerConnectionState
|
||||
}
|
||||
|
||||
type PeerConnectionState struct {
|
||||
NodeID string `json:"node_id"`
|
||||
State string `json:"state"`
|
||||
Warm bool `json:"warm"`
|
||||
WarmReason string `json:"warm_reason,omitempty"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
RelayControl bool `json:"relay_control"`
|
||||
ConsecutiveSuccesses int `json:"consecutive_successes"`
|
||||
ConsecutiveFailures int `json:"consecutive_failures"`
|
||||
LastLatencyMs int `json:"last_latency_ms,omitempty"`
|
||||
LastFailureReason string `json:"last_failure_reason,omitempty"`
|
||||
LastTransitionAt time.Time `json:"last_transition_at"`
|
||||
LastProbeAt time.Time `json:"last_probe_at,omitempty"`
|
||||
BackoffUntil time.Time `json:"backoff_until,omitempty"`
|
||||
}
|
||||
|
||||
type PeerConnectionSnapshot struct {
|
||||
Total int `json:"total"`
|
||||
Ready int `json:"ready"`
|
||||
RelayReady int `json:"relay_ready"`
|
||||
Degraded int `json:"degraded"`
|
||||
Backoff int `json:"backoff"`
|
||||
Waiting int `json:"waiting_rendezvous"`
|
||||
Connecting int `json:"connecting"`
|
||||
Disconnected int `json:"disconnected"`
|
||||
StateCounts map[string]int `json:"state_counts"`
|
||||
Entries []PeerConnectionState `json:"entries"`
|
||||
LastTransitionAt time.Time `json:"last_transition_at,omitempty"`
|
||||
}
|
||||
|
||||
func NewPeerConnectionTracker(peerSnapshot PeerCacheSnapshot, now time.Time) *PeerConnectionTracker {
|
||||
now = normalizedNow(now)
|
||||
tracker := &PeerConnectionTracker{entries: map[string]PeerConnectionState{}}
|
||||
for _, peer := range peerSnapshot.Entries {
|
||||
if !peer.Warm || peer.NodeID == "" {
|
||||
continue
|
||||
}
|
||||
tracker.entries[peer.NodeID] = PeerConnectionState{
|
||||
NodeID: peer.NodeID,
|
||||
State: PeerConnectionDisconnected,
|
||||
Warm: peer.Warm,
|
||||
WarmReason: peer.WarmReason,
|
||||
Endpoint: peer.Endpoint,
|
||||
BestCandidateID: peer.BestCandidateID,
|
||||
LastTransitionAt: now,
|
||||
}
|
||||
}
|
||||
return tracker
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) ShouldProbe(nodeID string, now time.Time) bool {
|
||||
if t == nil {
|
||||
return true
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
entry, ok := t.entries[nodeID]
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
now = normalizedNow(now)
|
||||
return entry.State != PeerConnectionBackoff || entry.BackoffUntil.IsZero() || !entry.BackoffUntil.After(now)
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) BeginProbe(peer PeerCacheEntry, now time.Time) PeerConnectionState {
|
||||
if t == nil {
|
||||
return PeerConnectionState{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
now = normalizedNow(now)
|
||||
entry := t.entry(peer, now)
|
||||
if entry.State != PeerConnectionReady && entry.State != PeerConnectionDegraded {
|
||||
entry.State = PeerConnectionConnecting
|
||||
entry.LastTransitionAt = now
|
||||
}
|
||||
entry.LastProbeAt = now
|
||||
t.entries[peer.NodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) RecordSuccess(nodeID string, latencyMs int, now time.Time) PeerConnectionState {
|
||||
if t == nil {
|
||||
return PeerConnectionState{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
now = normalizedNow(now)
|
||||
entry := t.entries[nodeID]
|
||||
entry.NodeID = nodeID
|
||||
entry.ConsecutiveSuccesses++
|
||||
entry.ConsecutiveFailures = 0
|
||||
entry.LastLatencyMs = latencyMs
|
||||
entry.LastFailureReason = ""
|
||||
entry.LastProbeAt = now
|
||||
entry.BackoffUntil = time.Time{}
|
||||
nextState := PeerConnectionReady
|
||||
if latencyMs >= 500 {
|
||||
nextState = PeerConnectionDegraded
|
||||
}
|
||||
if entry.State != nextState {
|
||||
entry.State = nextState
|
||||
entry.LastTransitionAt = now
|
||||
}
|
||||
t.entries[nodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) RecordRelayReady(peer PeerCacheEntry, latencyMs int, now time.Time) PeerConnectionState {
|
||||
if t == nil {
|
||||
return PeerConnectionState{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
now = normalizedNow(now)
|
||||
entry := t.entry(peer, now)
|
||||
entry.ConsecutiveSuccesses++
|
||||
entry.ConsecutiveFailures = 0
|
||||
entry.LastLatencyMs = latencyMs
|
||||
entry.LastFailureReason = ""
|
||||
entry.LastProbeAt = now
|
||||
entry.BackoffUntil = time.Time{}
|
||||
if entry.State != PeerConnectionRelayReady {
|
||||
entry.State = PeerConnectionRelayReady
|
||||
entry.LastTransitionAt = now
|
||||
}
|
||||
t.entries[peer.NodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) RecordFailure(nodeID string, reason string, now time.Time) PeerConnectionState {
|
||||
if t == nil {
|
||||
return PeerConnectionState{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
now = normalizedNow(now)
|
||||
entry := t.entries[nodeID]
|
||||
entry.NodeID = nodeID
|
||||
entry.ConsecutiveFailures++
|
||||
entry.ConsecutiveSuccesses = 0
|
||||
entry.LastFailureReason = reason
|
||||
entry.LastProbeAt = now
|
||||
nextState := PeerConnectionDegraded
|
||||
if entry.ConsecutiveFailures >= 3 {
|
||||
nextState = PeerConnectionBackoff
|
||||
entry.BackoffUntil = now.Add(peerConnectionBackoffDuration(entry.ConsecutiveFailures))
|
||||
}
|
||||
if entry.State != nextState {
|
||||
entry.State = nextState
|
||||
entry.LastTransitionAt = now
|
||||
}
|
||||
t.entries[nodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) RecordDeferred(peer PeerCacheEntry, reason string, now time.Time) PeerConnectionState {
|
||||
if t == nil {
|
||||
return PeerConnectionState{}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
now = normalizedNow(now)
|
||||
entry := t.entry(peer, now)
|
||||
entry.State = PeerConnectionWaiting
|
||||
entry.LastFailureReason = reason
|
||||
entry.LastProbeAt = time.Time{}
|
||||
entry.LastTransitionAt = now
|
||||
entry.BackoffUntil = time.Time{}
|
||||
t.entries[peer.NodeID] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) Snapshot() PeerConnectionSnapshot {
|
||||
if t == nil {
|
||||
return PeerConnectionSnapshot{StateCounts: map[string]int{}}
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
entries := make([]PeerConnectionState, 0, len(t.entries))
|
||||
counts := map[string]int{
|
||||
PeerConnectionDisconnected: 0,
|
||||
PeerConnectionConnecting: 0,
|
||||
PeerConnectionReady: 0,
|
||||
PeerConnectionRelayReady: 0,
|
||||
PeerConnectionDegraded: 0,
|
||||
PeerConnectionBackoff: 0,
|
||||
PeerConnectionWaiting: 0,
|
||||
}
|
||||
var lastTransition time.Time
|
||||
for _, entry := range t.entries {
|
||||
entries = append(entries, entry)
|
||||
counts[entry.State]++
|
||||
if entry.LastTransitionAt.After(lastTransition) {
|
||||
lastTransition = entry.LastTransitionAt
|
||||
}
|
||||
}
|
||||
sort.SliceStable(entries, func(i, j int) bool {
|
||||
return entries[i].NodeID < entries[j].NodeID
|
||||
})
|
||||
return PeerConnectionSnapshot{
|
||||
Total: len(entries),
|
||||
Ready: counts[PeerConnectionReady],
|
||||
RelayReady: counts[PeerConnectionRelayReady],
|
||||
Degraded: counts[PeerConnectionDegraded],
|
||||
Backoff: counts[PeerConnectionBackoff],
|
||||
Waiting: counts[PeerConnectionWaiting],
|
||||
Connecting: counts[PeerConnectionConnecting],
|
||||
Disconnected: counts[PeerConnectionDisconnected],
|
||||
StateCounts: counts,
|
||||
Entries: entries,
|
||||
LastTransitionAt: lastTransition,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *PeerConnectionTracker) entry(peer PeerCacheEntry, now time.Time) PeerConnectionState {
|
||||
entry, ok := t.entries[peer.NodeID]
|
||||
if !ok {
|
||||
entry = PeerConnectionState{
|
||||
NodeID: peer.NodeID,
|
||||
State: PeerConnectionDisconnected,
|
||||
LastTransitionAt: now,
|
||||
}
|
||||
}
|
||||
entry.Warm = peer.Warm
|
||||
entry.WarmReason = peer.WarmReason
|
||||
entry.Endpoint = peer.Endpoint
|
||||
entry.BestCandidateID = peer.BestCandidateID
|
||||
entry.RendezvousLeaseID = peer.RendezvousLeaseID
|
||||
entry.RelayNodeID = peer.RelayNodeID
|
||||
entry.RelayEndpoint = peer.RelayEndpoint
|
||||
entry.RelayControl = peer.RelayControl
|
||||
return entry
|
||||
}
|
||||
|
||||
func peerConnectionBackoffDuration(failures int) time.Duration {
|
||||
if failures < 3 {
|
||||
return 0
|
||||
}
|
||||
backoff := peerConnectionBackoffBase * time.Duration(failures-2)
|
||||
if backoff > peerConnectionBackoffMax {
|
||||
return peerConnectionBackoffMax
|
||||
}
|
||||
return backoff
|
||||
}
|
||||
|
||||
func normalizedNow(now time.Time) time.Time {
|
||||
if now.IsZero() {
|
||||
return time.Now().UTC()
|
||||
}
|
||||
return now.UTC()
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPeerConnectionTrackerTransitionsReadyAndDegraded(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-b", Warm: true, WarmReason: "route_adjacent", Endpoint: "http://node-b:19000"},
|
||||
},
|
||||
}, now)
|
||||
|
||||
begin := tracker.BeginProbe(PeerCacheEntry{NodeID: "node-b", Warm: true}, now.Add(time.Second))
|
||||
if begin.State != PeerConnectionConnecting {
|
||||
t.Fatalf("begin state = %q, want connecting", begin.State)
|
||||
}
|
||||
ready := tracker.RecordSuccess("node-b", 42, now.Add(2*time.Second))
|
||||
if ready.State != PeerConnectionReady || ready.ConsecutiveSuccesses != 1 || ready.ConsecutiveFailures != 0 {
|
||||
t.Fatalf("ready state unexpected: %+v", ready)
|
||||
}
|
||||
degraded := tracker.RecordSuccess("node-b", 800, now.Add(3*time.Second))
|
||||
if degraded.State != PeerConnectionDegraded || degraded.LastLatencyMs != 800 {
|
||||
t.Fatalf("degraded state unexpected: %+v", degraded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionTrackerBackoffAfterRepeatedFailures(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{{NodeID: "node-b", Warm: true}},
|
||||
}, now)
|
||||
|
||||
first := tracker.RecordFailure("node-b", "timeout", now.Add(time.Second))
|
||||
if first.State != PeerConnectionDegraded {
|
||||
t.Fatalf("first failure state = %q, want degraded", first.State)
|
||||
}
|
||||
_ = tracker.RecordFailure("node-b", "timeout", now.Add(2*time.Second))
|
||||
third := tracker.RecordFailure("node-b", "timeout", now.Add(3*time.Second))
|
||||
if third.State != PeerConnectionBackoff || third.BackoffUntil.IsZero() {
|
||||
t.Fatalf("third failure did not enter backoff: %+v", third)
|
||||
}
|
||||
if tracker.ShouldProbe("node-b", now.Add(4*time.Second)) {
|
||||
t.Fatal("ShouldProbe returned true during backoff")
|
||||
}
|
||||
if !tracker.ShouldProbe("node-b", third.BackoffUntil.Add(time.Millisecond)) {
|
||||
t.Fatal("ShouldProbe returned false after backoff")
|
||||
}
|
||||
recovered := tracker.RecordSuccess("node-b", 12, third.BackoffUntil.Add(time.Second))
|
||||
if recovered.State != PeerConnectionReady || recovered.ConsecutiveFailures != 0 || !recovered.BackoffUntil.IsZero() {
|
||||
t.Fatalf("success did not recover from backoff: %+v", recovered)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerConnectionTrackerSnapshotCountsStates(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
tracker := NewPeerConnectionTracker(PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-a", Warm: true},
|
||||
{NodeID: "node-b", Warm: true},
|
||||
{NodeID: "node-c", Warm: true},
|
||||
},
|
||||
}, now)
|
||||
tracker.RecordSuccess("node-a", 25, now.Add(time.Second))
|
||||
tracker.RecordFailure("node-b", "timeout", now.Add(time.Second))
|
||||
tracker.RecordFailure("node-c", "timeout", now.Add(time.Second))
|
||||
tracker.RecordFailure("node-c", "timeout", now.Add(2*time.Second))
|
||||
tracker.RecordFailure("node-c", "timeout", now.Add(3*time.Second))
|
||||
|
||||
snapshot := tracker.Snapshot()
|
||||
if snapshot.Total != 3 || snapshot.Ready != 1 || snapshot.Degraded != 1 || snapshot.Backoff != 1 {
|
||||
t.Fatalf("unexpected snapshot: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,276 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
PeerRecoveryModeSteady = "steady"
|
||||
PeerRecoveryModeRecovery = "recovery"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultStablePeerTarget = 3
|
||||
DefaultRecoveryProbeLimit = 6
|
||||
)
|
||||
|
||||
type PeerRecoveryPlanConfig struct {
|
||||
PeerCache PeerCacheSnapshot
|
||||
Connections PeerConnectionSnapshot
|
||||
TargetReadyPeers int
|
||||
MaxProbeCandidates int
|
||||
Now time.Time
|
||||
}
|
||||
|
||||
type PeerRecoveryPlan struct {
|
||||
Mode string `json:"mode"`
|
||||
Healthy bool `json:"healthy"`
|
||||
TargetReadyPeers int `json:"target_ready_peers"`
|
||||
ReadyPeerCount int `json:"ready_peer_count"`
|
||||
DegradedPeerCount int `json:"degraded_peer_count"`
|
||||
BackoffPeerCount int `json:"backoff_peer_count"`
|
||||
ConnectablePeerCount int `json:"connectable_peer_count"`
|
||||
Deficit int `json:"deficit"`
|
||||
ProbeCandidateCount int `json:"probe_candidate_count"`
|
||||
RecoverySeedCandidateCount int `json:"recovery_seed_candidate_count"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
Candidates []PeerRecoveryCandidate `json:"candidates,omitempty"`
|
||||
}
|
||||
|
||||
type PeerRecoveryCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
Warm bool `json:"warm"`
|
||||
WarmReason string `json:"warm_reason,omitempty"`
|
||||
RecoverySeed bool `json:"recovery_seed"`
|
||||
BestCandidateID string `json:"best_candidate_id,omitempty"`
|
||||
BestTransport string `json:"best_transport,omitempty"`
|
||||
ConnectionState string `json:"connection_state"`
|
||||
ConsecutiveFailures int `json:"consecutive_failures,omitempty"`
|
||||
LastLatencyMs int `json:"last_latency_ms,omitempty"`
|
||||
BackoffUntil time.Time `json:"backoff_until,omitempty"`
|
||||
Reason string `json:"reason"`
|
||||
Priority int `json:"priority"`
|
||||
}
|
||||
|
||||
type peerRecoveryCandidateBuild struct {
|
||||
PeerRecoveryCandidate
|
||||
}
|
||||
|
||||
func PlanPeerRecovery(cfg PeerRecoveryPlanConfig) PeerRecoveryPlan {
|
||||
now := normalizedNow(cfg.Now)
|
||||
target := cfg.TargetReadyPeers
|
||||
if target <= 0 {
|
||||
target = DefaultStablePeerTarget
|
||||
}
|
||||
limit := cfg.MaxProbeCandidates
|
||||
if limit <= 0 {
|
||||
limit = DefaultRecoveryProbeLimit
|
||||
}
|
||||
connectable := connectablePeerCount(cfg.PeerCache)
|
||||
if target > connectable {
|
||||
target = connectable
|
||||
}
|
||||
if limit < target {
|
||||
limit = target
|
||||
}
|
||||
|
||||
connectionByNode := map[string]PeerConnectionState{}
|
||||
for _, connection := range cfg.Connections.Entries {
|
||||
if strings.TrimSpace(connection.NodeID) == "" {
|
||||
continue
|
||||
}
|
||||
connectionByNode[connection.NodeID] = connection
|
||||
}
|
||||
|
||||
entryByNode := map[string]PeerCacheEntry{}
|
||||
for _, entry := range cfg.PeerCache.Entries {
|
||||
if strings.TrimSpace(entry.NodeID) == "" {
|
||||
continue
|
||||
}
|
||||
entryByNode[entry.NodeID] = entry
|
||||
}
|
||||
|
||||
ready := 0
|
||||
degraded := 0
|
||||
backoff := 0
|
||||
for nodeID, connection := range connectionByNode {
|
||||
entry, ok := entryByNode[nodeID]
|
||||
if !ok || strings.TrimSpace(entry.Endpoint) == "" {
|
||||
continue
|
||||
}
|
||||
switch connection.State {
|
||||
case PeerConnectionReady, PeerConnectionRelayReady:
|
||||
ready++
|
||||
case PeerConnectionDegraded:
|
||||
degraded++
|
||||
case PeerConnectionBackoff:
|
||||
backoff++
|
||||
}
|
||||
}
|
||||
|
||||
deficit := target - ready
|
||||
if deficit < 0 {
|
||||
deficit = 0
|
||||
}
|
||||
mode := PeerRecoveryModeSteady
|
||||
if deficit > 0 {
|
||||
mode = PeerRecoveryModeRecovery
|
||||
}
|
||||
if mode == PeerRecoveryModeSteady {
|
||||
limit = target
|
||||
}
|
||||
|
||||
candidates := make([]peerRecoveryCandidateBuild, 0, len(cfg.PeerCache.Entries))
|
||||
for _, entry := range cfg.PeerCache.Entries {
|
||||
if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" {
|
||||
continue
|
||||
}
|
||||
connection := connectionByNode[entry.NodeID]
|
||||
if connection.State == "" {
|
||||
connection.State = PeerConnectionDisconnected
|
||||
}
|
||||
if connection.State == PeerConnectionBackoff && connection.BackoffUntil.After(now) {
|
||||
continue
|
||||
}
|
||||
reason, ok := peerRecoveryCandidateReason(mode, entry, connection)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
candidate := PeerRecoveryCandidate{
|
||||
NodeID: entry.NodeID,
|
||||
Endpoint: strings.TrimSpace(entry.Endpoint),
|
||||
Warm: entry.Warm,
|
||||
WarmReason: entry.WarmReason,
|
||||
RecoverySeed: entry.RecoverySeed,
|
||||
BestCandidateID: entry.BestCandidateID,
|
||||
BestTransport: entry.BestTransport,
|
||||
ConnectionState: connection.State,
|
||||
ConsecutiveFailures: connection.ConsecutiveFailures,
|
||||
LastLatencyMs: connection.LastLatencyMs,
|
||||
BackoffUntil: connection.BackoffUntil,
|
||||
Reason: reason,
|
||||
Priority: peerRecoveryCandidatePriority(entry, connection, reason),
|
||||
}
|
||||
candidates = append(candidates, peerRecoveryCandidateBuild{PeerRecoveryCandidate: candidate})
|
||||
}
|
||||
sort.SliceStable(candidates, func(i, j int) bool {
|
||||
if candidates[i].Priority != candidates[j].Priority {
|
||||
return candidates[i].Priority > candidates[j].Priority
|
||||
}
|
||||
return candidates[i].NodeID < candidates[j].NodeID
|
||||
})
|
||||
if len(candidates) > limit {
|
||||
candidates = candidates[:limit]
|
||||
}
|
||||
|
||||
outCandidates := make([]PeerRecoveryCandidate, 0, len(candidates))
|
||||
recoverySeedCandidates := 0
|
||||
for _, candidate := range candidates {
|
||||
outCandidates = append(outCandidates, candidate.PeerRecoveryCandidate)
|
||||
if candidate.RecoverySeed {
|
||||
recoverySeedCandidates++
|
||||
}
|
||||
}
|
||||
|
||||
return PeerRecoveryPlan{
|
||||
Mode: mode,
|
||||
Healthy: deficit == 0,
|
||||
TargetReadyPeers: target,
|
||||
ReadyPeerCount: ready,
|
||||
DegradedPeerCount: degraded,
|
||||
BackoffPeerCount: backoff,
|
||||
ConnectablePeerCount: connectable,
|
||||
Deficit: deficit,
|
||||
ProbeCandidateCount: len(outCandidates),
|
||||
RecoverySeedCandidateCount: recoverySeedCandidates,
|
||||
GeneratedAt: now,
|
||||
Candidates: outCandidates,
|
||||
}
|
||||
}
|
||||
|
||||
func peerRecoveryCandidateReason(mode string, entry PeerCacheEntry, connection PeerConnectionState) (string, bool) {
|
||||
if mode == PeerRecoveryModeSteady {
|
||||
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
|
||||
return "maintain_ready", true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
if connection.State == PeerConnectionReady || connection.State == PeerConnectionRelayReady {
|
||||
return "maintain_ready", true
|
||||
}
|
||||
if connection.State == PeerConnectionDegraded {
|
||||
return "recover_degraded", true
|
||||
}
|
||||
if entry.Warm {
|
||||
return "recover_warm", true
|
||||
}
|
||||
if entry.RecoverySeed {
|
||||
return "recover_seed", true
|
||||
}
|
||||
return "recover_peer", true
|
||||
}
|
||||
|
||||
func peerRecoveryCandidatePriority(entry PeerCacheEntry, connection PeerConnectionState, reason string) int {
|
||||
score := 0
|
||||
if entry.Warm {
|
||||
score += 1000
|
||||
}
|
||||
switch entry.WarmReason {
|
||||
case "route_adjacent":
|
||||
score += 500
|
||||
case "recovery_seed":
|
||||
score += 350
|
||||
case "endpoint_candidate":
|
||||
score += 200
|
||||
case "peer_endpoint":
|
||||
score += 100
|
||||
}
|
||||
if entry.RecoverySeed {
|
||||
score += 250
|
||||
}
|
||||
if entry.BestCandidateID != "" {
|
||||
score += 150
|
||||
}
|
||||
score += entry.BestCandidateScore / 10
|
||||
switch connection.State {
|
||||
case PeerConnectionReady, PeerConnectionRelayReady:
|
||||
score += 600
|
||||
case PeerConnectionDegraded:
|
||||
score += 350
|
||||
case PeerConnectionConnecting:
|
||||
score += 200
|
||||
case PeerConnectionDisconnected:
|
||||
score += 100
|
||||
}
|
||||
switch reason {
|
||||
case "maintain_ready":
|
||||
score += 500
|
||||
case "recover_degraded":
|
||||
score += 300
|
||||
case "recover_seed":
|
||||
score += 250
|
||||
case "recover_warm":
|
||||
score += 150
|
||||
}
|
||||
if connection.LastLatencyMs > 0 {
|
||||
score -= connection.LastLatencyMs / 10
|
||||
}
|
||||
if score < 0 {
|
||||
return 0
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func connectablePeerCount(snapshot PeerCacheSnapshot) int {
|
||||
count := 0
|
||||
for _, entry := range snapshot.Entries {
|
||||
if strings.TrimSpace(entry.NodeID) == "" || strings.TrimSpace(entry.Endpoint) == "" {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPeerRecoveryPlanMaintainsBoundedReadyPeers(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
recoveryPlanPeer("node-a", true, false, "route_adjacent"),
|
||||
recoveryPlanPeer("node-b", true, false, "route_adjacent"),
|
||||
recoveryPlanPeer("node-c", true, false, "peer_endpoint"),
|
||||
recoveryPlanPeer("node-d", true, false, "peer_endpoint"),
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-a", State: PeerConnectionReady, LastLatencyMs: 40},
|
||||
{NodeID: "node-b", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-c", State: PeerConnectionReady, LastLatencyMs: 30},
|
||||
{NodeID: "node-d", State: PeerConnectionReady, LastLatencyMs: 10},
|
||||
}},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.Mode != PeerRecoveryModeSteady || !plan.Healthy {
|
||||
t.Fatalf("unexpected plan health: %+v", plan)
|
||||
}
|
||||
if plan.TargetReadyPeers != DefaultStablePeerTarget || len(plan.Candidates) != DefaultStablePeerTarget {
|
||||
t.Fatalf("unexpected bounded candidates: %+v", plan)
|
||||
}
|
||||
for _, candidate := range plan.Candidates {
|
||||
if candidate.Reason != "maintain_ready" {
|
||||
t.Fatalf("unexpected candidate reason: %+v", candidate)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanAddsRecoverySeedWhenReadyDeficit(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
recoveryPlanPeer("node-a", true, false, "route_adjacent"),
|
||||
recoveryPlanPeer("node-b", true, false, "route_adjacent"),
|
||||
recoveryPlanPeer("node-seed", false, true, ""),
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-a", State: PeerConnectionReady, LastLatencyMs: 20},
|
||||
{NodeID: "node-b", State: PeerConnectionBackoff, BackoffUntil: now.Add(time.Minute)},
|
||||
}},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.Mode != PeerRecoveryModeRecovery || plan.Healthy {
|
||||
t.Fatalf("unexpected recovery mode: %+v", plan)
|
||||
}
|
||||
if plan.Deficit != 2 || plan.RecoverySeedCandidateCount != 1 {
|
||||
t.Fatalf("unexpected deficit/seed count: %+v", plan)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-seed", "recover_seed") {
|
||||
t.Fatalf("recovery seed was not selected: %+v", plan.Candidates)
|
||||
}
|
||||
if recoveryPlanHasCandidate(plan, "node-b", "") {
|
||||
t.Fatalf("active backoff peer should not be selected: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanMaintainsRelayReadyPeersInSteadyMode(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{
|
||||
Entries: []PeerCacheEntry{
|
||||
{
|
||||
NodeID: "node-c",
|
||||
Endpoint: "http://relay:19001",
|
||||
Warm: true,
|
||||
WarmReason: "rendezvous_lease",
|
||||
RendezvousLeaseID: "lease-1",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://relay:19001",
|
||||
RelayControl: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-c", State: PeerConnectionRelayReady, LastLatencyMs: 15},
|
||||
}},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.Mode != PeerRecoveryModeSteady || !plan.Healthy {
|
||||
t.Fatalf("unexpected steady plan: %+v", plan)
|
||||
}
|
||||
if !recoveryPlanHasCandidate(plan, "node-c", "maintain_ready") {
|
||||
t.Fatalf("relay-ready peer was not maintained: %+v", plan.Candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerRecoveryPlanCapsTargetByConnectablePeers(t *testing.T) {
|
||||
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
|
||||
plan := PlanPeerRecovery(PeerRecoveryPlanConfig{
|
||||
PeerCache: PeerCacheSnapshot{Entries: []PeerCacheEntry{
|
||||
{NodeID: "node-a", Warm: true, WarmReason: "route_adjacent"},
|
||||
recoveryPlanPeer("node-b", true, false, "route_adjacent"),
|
||||
}},
|
||||
Connections: PeerConnectionSnapshot{Entries: []PeerConnectionState{
|
||||
{NodeID: "node-b", State: PeerConnectionReady},
|
||||
}},
|
||||
Now: now,
|
||||
})
|
||||
|
||||
if plan.TargetReadyPeers != 1 || !plan.Healthy {
|
||||
t.Fatalf("target should be capped by connectable peers: %+v", plan)
|
||||
}
|
||||
}
|
||||
|
||||
func recoveryPlanPeer(nodeID string, warm bool, recoverySeed bool, warmReason string) PeerCacheEntry {
|
||||
return PeerCacheEntry{
|
||||
NodeID: nodeID,
|
||||
Endpoint: "http://" + nodeID + ":19001",
|
||||
Warm: warm,
|
||||
WarmReason: warmReason,
|
||||
RecoverySeed: recoverySeed,
|
||||
}
|
||||
}
|
||||
|
||||
func recoveryPlanHasCandidate(plan PeerRecoveryPlan, nodeID string, reason string) bool {
|
||||
for _, candidate := range plan.Candidates {
|
||||
if candidate.NodeID != nodeID {
|
||||
continue
|
||||
}
|
||||
return reason == "" || candidate.Reason == reason
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
func ValidateProductionEnvelope(local PeerIdentity, envelope ProductionEnvelope, now time.Time) error {
|
||||
if envelope.FabricProtocolVersion != ProtocolVersion {
|
||||
return fmt.Errorf("%w: unsupported fabric_protocol_version", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.MessageID == "" {
|
||||
return fmt.Errorf("%w: message_id is required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.RouteID == "" {
|
||||
return fmt.Errorf("%w: route_id is required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.ClusterID == "" || envelope.ClusterID != local.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if envelope.SourceNodeID == "" || envelope.DestinationNodeID == "" {
|
||||
return fmt.Errorf("%w: source_node_id and destination_node_id are required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.CurrentHopNodeID != local.NodeID {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
if envelope.NextHopNodeID == "" {
|
||||
return fmt.Errorf("%w: next_hop_node_id is required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if len(envelope.RoutePath) > 0 {
|
||||
if err := validateProductionRoutePath(local, envelope); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if envelope.ChannelClass != ProductionChannelFabricControl {
|
||||
return ErrUnauthorizedChannel
|
||||
}
|
||||
if envelope.MessageType != ProductionMessageFabricControl {
|
||||
return fmt.Errorf("%w: unsupported message_type", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.TTL <= 0 {
|
||||
return ErrTTLExhausted
|
||||
}
|
||||
if envelope.HopCount < 0 {
|
||||
return fmt.Errorf("%w: hop_count must not be negative", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.CreatedAt.IsZero() || envelope.ExpiresAt.IsZero() {
|
||||
return fmt.Errorf("%w: created_at and expires_at are required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.CreatedAt.After(now.UTC().Add(MaxProductionEnvelopeFutureSkew)) {
|
||||
return fmt.Errorf("%w: created_at exceeds allowed future skew", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if !envelope.ExpiresAt.After(now.UTC()) {
|
||||
return ErrRouteExpired
|
||||
}
|
||||
if envelope.PayloadLength != len(envelope.Payload) {
|
||||
return fmt.Errorf("%w: payload_length mismatch", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.PayloadLength > MaxProductionEnvelopePayloadBytes {
|
||||
return fmt.Errorf("%w: payload exceeds fabric-control limit", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if envelope.PayloadHash == "" {
|
||||
return fmt.Errorf("%w: payload_hash is required", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
sum := sha256.Sum256(envelope.Payload)
|
||||
if envelope.PayloadHash != hex.EncodeToString(sum[:]) {
|
||||
return fmt.Errorf("%w: payload_hash mismatch", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateProductionRoutePath(local PeerIdentity, envelope ProductionEnvelope) error {
|
||||
if len(envelope.RoutePath) < 2 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if envelope.RoutePath[0] != envelope.SourceNodeID || envelope.RoutePath[len(envelope.RoutePath)-1] != envelope.DestinationNodeID {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
currentIndex := -1
|
||||
seen := map[string]struct{}{}
|
||||
for index, nodeID := range envelope.RoutePath {
|
||||
if nodeID == "" {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if _, duplicate := seen[nodeID]; duplicate {
|
||||
return ErrLoopDetected
|
||||
}
|
||||
seen[nodeID] = struct{}{}
|
||||
if nodeID == local.NodeID {
|
||||
currentIndex = index
|
||||
}
|
||||
}
|
||||
if currentIndex < 0 || envelope.CurrentHopNodeID != local.NodeID {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
if containsProductionNodeID(envelope.VisitedNodeIDs, local.NodeID) {
|
||||
return ErrLoopDetected
|
||||
}
|
||||
for _, visitedNodeID := range envelope.VisitedNodeIDs {
|
||||
if visitedNodeID == "" || !containsProductionNodeID(envelope.RoutePath, visitedNodeID) {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
}
|
||||
if envelope.DestinationNodeID == local.NodeID {
|
||||
if envelope.NextHopNodeID != local.NodeID {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if currentIndex >= len(envelope.RoutePath)-1 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if envelope.NextHopNodeID != envelope.RoutePath[currentIndex+1] {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func containsProductionNodeID(values []string, needle string) bool {
|
||||
for _, value := range values {
|
||||
if value == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func NewProductionEnvelopeObservation(envelope ProductionEnvelope, observedAt time.Time) ProductionEnvelopeObservation {
|
||||
return ProductionEnvelopeObservation{
|
||||
MessageID: envelope.MessageID,
|
||||
RouteID: envelope.RouteID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
SourceNodeID: envelope.SourceNodeID,
|
||||
DestinationNodeID: envelope.DestinationNodeID,
|
||||
CurrentHopNodeID: envelope.CurrentHopNodeID,
|
||||
NextHopNodeID: envelope.NextHopNodeID,
|
||||
RoutePath: append([]string{}, envelope.RoutePath...),
|
||||
VisitedNodeIDs: append([]string{}, envelope.VisitedNodeIDs...),
|
||||
ChannelClass: envelope.ChannelClass,
|
||||
MessageType: envelope.MessageType,
|
||||
TTL: envelope.TTL,
|
||||
HopCount: envelope.HopCount,
|
||||
PayloadLength: envelope.PayloadLength,
|
||||
PayloadHash: envelope.PayloadHash,
|
||||
ObservedAt: observedAt.UTC(),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type ProductionEnvelopeObservationSink struct {
|
||||
mu sync.Mutex
|
||||
capacity int
|
||||
items []ProductionEnvelopeObservation
|
||||
accepted uint64
|
||||
dropped uint64
|
||||
}
|
||||
|
||||
type ProductionEnvelopeObservationSinkMetrics struct {
|
||||
Capacity int `json:"capacity"`
|
||||
CurrentDepth int `json:"current_depth"`
|
||||
AcceptedTotal uint64 `json:"accepted_total"`
|
||||
DroppedOldest uint64 `json:"dropped_oldest"`
|
||||
}
|
||||
|
||||
func NewProductionEnvelopeObservationSink(capacity int) *ProductionEnvelopeObservationSink {
|
||||
if capacity < 1 {
|
||||
capacity = 1
|
||||
}
|
||||
return &ProductionEnvelopeObservationSink{
|
||||
capacity: capacity,
|
||||
items: make([]ProductionEnvelopeObservation, 0, capacity),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ProductionEnvelopeObservationSink) Observe(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.accepted++
|
||||
if len(s.items) == s.capacity {
|
||||
copy(s.items, s.items[1:])
|
||||
s.items[len(s.items)-1] = observation
|
||||
s.dropped++
|
||||
return nil
|
||||
}
|
||||
s.items = append(s.items, observation)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *ProductionEnvelopeObservationSink) Snapshot() []ProductionEnvelopeObservation {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
out := make([]ProductionEnvelopeObservation, len(s.items))
|
||||
copy(out, s.items)
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *ProductionEnvelopeObservationSink) Len() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
return len(s.items)
|
||||
}
|
||||
|
||||
func (s *ProductionEnvelopeObservationSink) Capacity() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
return s.capacity
|
||||
}
|
||||
|
||||
func (s *ProductionEnvelopeObservationSink) Metrics() ProductionEnvelopeObservationSinkMetrics {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
return ProductionEnvelopeObservationSinkMetrics{
|
||||
Capacity: s.capacity,
|
||||
CurrentDepth: len(s.items),
|
||||
AcceptedTotal: s.accepted,
|
||||
DroppedOldest: s.dropped,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
func ValidateProductionEnvelopeRouteConfig(local PeerIdentity, envelope ProductionEnvelope, routes []SyntheticRoute, now time.Time) error {
|
||||
if len(routes) == 0 {
|
||||
return nil
|
||||
}
|
||||
route, ok := productionRouteByID(routes, envelope.RouteID)
|
||||
if !ok {
|
||||
return ErrRouteNotFound
|
||||
}
|
||||
if route.ClusterID != envelope.ClusterID || route.ClusterID != local.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if route.SourceNodeID != envelope.SourceNodeID || route.DestinationNodeID != envelope.DestinationNodeID {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if route.ExpiresAt.IsZero() || !route.ExpiresAt.After(now.UTC()) || envelope.ExpiresAt.After(route.ExpiresAt) {
|
||||
return ErrRouteExpired
|
||||
}
|
||||
if !contains(route.AllowedChannels, ProductionChannelFabricControl) {
|
||||
return ErrUnauthorizedChannel
|
||||
}
|
||||
path := routePath(route)
|
||||
if len(path) < 2 || path[0] != route.SourceNodeID || path[len(path)-1] != route.DestinationNodeID {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if len(envelope.RoutePath) > 0 && !sameNodePath(envelope.RoutePath, path) {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if len(path) > 2 && len(envelope.RoutePath) == 0 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
currentIndex := indexOf(path, local.NodeID)
|
||||
if currentIndex < 0 || envelope.CurrentHopNodeID != local.NodeID {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
expectedNextHop := local.NodeID
|
||||
if local.NodeID != envelope.DestinationNodeID {
|
||||
if currentIndex >= len(path)-1 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
expectedNextHop = path[currentIndex+1]
|
||||
}
|
||||
if envelope.NextHopNodeID != expectedNextHop {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if route.MaxTTL > 0 && envelope.TTL > route.MaxTTL {
|
||||
return fmt.Errorf("%w: ttl exceeds configured route max_ttl", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
if route.MaxHops > 0 && envelope.HopCount > route.MaxHops {
|
||||
return fmt.Errorf("%w: hop_count exceeds configured route max_hops", ErrForwardEnvelopeInvalid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func productionRouteByID(routes []SyntheticRoute, routeID string) (SyntheticRoute, bool) {
|
||||
for _, route := range routes {
|
||||
if route.RouteID == routeID {
|
||||
return route, true
|
||||
}
|
||||
}
|
||||
return SyntheticRoute{}, false
|
||||
}
|
||||
|
||||
func sameNodePath(a []string, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ProductionForwardTransport interface {
|
||||
SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error)
|
||||
}
|
||||
|
||||
type HTTPProductionForwardTransport struct {
|
||||
PeerURLs map[string]string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
func NewHTTPProductionForwardTransport(peerURLs map[string]string) *HTTPProductionForwardTransport {
|
||||
normalized := make(map[string]string, len(peerURLs))
|
||||
for nodeID, baseURL := range peerURLs {
|
||||
nodeID = strings.TrimSpace(nodeID)
|
||||
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
|
||||
if nodeID != "" && baseURL != "" {
|
||||
normalized[nodeID] = baseURL
|
||||
}
|
||||
}
|
||||
return &HTTPProductionForwardTransport{PeerURLs: normalized}
|
||||
}
|
||||
|
||||
func (t *HTTPProductionForwardTransport) SendProduction(ctx context.Context, nextNodeID string, envelope ProductionEnvelope) (ProductionForwardResult, error) {
|
||||
if t == nil {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
}
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(t.PeerURLs[nextNodeID]), "/")
|
||||
if baseURL == "" {
|
||||
return ProductionForwardResult{}, ErrForwardPeerUnavailable
|
||||
}
|
||||
client := NewClient(baseURL)
|
||||
if t.HTTPClient != nil {
|
||||
client.HTTPClient = t.HTTPClient
|
||||
}
|
||||
return client.SendProduction(ctx, envelope)
|
||||
}
|
||||
@@ -0,0 +1,241 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ScopedSyntheticConfig struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
LocalNodeID string `json:"local_node_id"`
|
||||
ConfigVersion string `json:"config_version,omitempty"`
|
||||
PeerDirectoryVersion string `json:"peer_directory_version,omitempty"`
|
||||
PolicyVersion string `json:"policy_version,omitempty"`
|
||||
PeerEndpoints map[string]string `json:"peer_endpoints"`
|
||||
PeerEndpointCandidates map[string][]PeerEndpointCandidate `json:"peer_endpoint_candidates,omitempty"`
|
||||
PeerDirectory []PeerDirectoryEntry `json:"peer_directory,omitempty"`
|
||||
RecoverySeeds []PeerRecoverySeed `json:"recovery_seeds,omitempty"`
|
||||
RendezvousLeases []PeerRendezvousLease `json:"rendezvous_leases,omitempty"`
|
||||
Routes []SyntheticRoute `json:"routes"`
|
||||
}
|
||||
|
||||
type PeerDirectoryEntry struct {
|
||||
NodeID string `json:"node_id"`
|
||||
RouteIDs []string `json:"route_ids,omitempty"`
|
||||
EndpointCount int `json:"endpoint_count"`
|
||||
CandidateCount int `json:"candidate_count"`
|
||||
ConnectivityModes []string `json:"connectivity_modes,omitempty"`
|
||||
RecoverySeed bool `json:"recovery_seed"`
|
||||
}
|
||||
|
||||
type PeerRecoverySeed struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
Transport string `json:"transport"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
Priority int `json:"priority"`
|
||||
LastVerifiedAt *time.Time `json:"last_verified_at,omitempty"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
type PeerRendezvousLease struct {
|
||||
LeaseID string `json:"lease_id"`
|
||||
PeerNodeID string `json:"peer_node_id"`
|
||||
RelayNodeID string `json:"relay_node_id"`
|
||||
RelayEndpoint string `json:"relay_endpoint"`
|
||||
Transport string `json:"transport"`
|
||||
ConnectivityMode string `json:"connectivity_mode,omitempty"`
|
||||
RouteIDs []string `json:"route_ids,omitempty"`
|
||||
AllowedChannels []string `json:"allowed_channels,omitempty"`
|
||||
Priority int `json:"priority"`
|
||||
ControlPlaneOnly bool `json:"control_plane_only"`
|
||||
IssuedAt time.Time `json:"issued_at"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
type PeerEndpointCandidate struct {
|
||||
EndpointID string `json:"endpoint_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
Transport string `json:"transport"`
|
||||
Address string `json:"address"`
|
||||
AddressFamily string `json:"address_family,omitempty"`
|
||||
Reachability string `json:"reachability"`
|
||||
NATType string `json:"nat_type,omitempty"`
|
||||
ConnectivityMode string `json:"connectivity_mode"`
|
||||
Region string `json:"region,omitempty"`
|
||||
Priority int `json:"priority"`
|
||||
PolicyTags []string `json:"policy_tags,omitempty"`
|
||||
LastVerifiedAt *time.Time `json:"last_verified_at,omitempty"`
|
||||
Metadata json.RawMessage `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
func LoadScopedSyntheticConfig(path string, local PeerIdentity) (ScopedSyntheticConfig, error) {
|
||||
payload, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return ScopedSyntheticConfig{}, err
|
||||
}
|
||||
var cfg ScopedSyntheticConfig
|
||||
if err := json.Unmarshal(payload, &cfg); err != nil {
|
||||
return ScopedSyntheticConfig{}, fmt.Errorf("parse scoped synthetic mesh config: %w", err)
|
||||
}
|
||||
if err := cfg.Validate(local); err != nil {
|
||||
return ScopedSyntheticConfig{}, err
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (cfg ScopedSyntheticConfig) Validate(local PeerIdentity) error {
|
||||
if cfg.SchemaVersion == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config schema_version is required")
|
||||
}
|
||||
if cfg.ClusterID == "" || cfg.ClusterID != local.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if cfg.LocalNodeID == "" || cfg.LocalNodeID != local.NodeID {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
for nodeID, endpoint := range cfg.PeerEndpoints {
|
||||
if strings.TrimSpace(nodeID) == "" || strings.TrimSpace(endpoint) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint")
|
||||
}
|
||||
}
|
||||
for nodeID, candidates := range cfg.PeerEndpointCandidates {
|
||||
if strings.TrimSpace(nodeID) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains empty peer endpoint candidate node")
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if strings.TrimSpace(candidate.EndpointID) == "" ||
|
||||
strings.TrimSpace(candidate.NodeID) == "" ||
|
||||
candidate.NodeID != nodeID ||
|
||||
strings.TrimSpace(candidate.Transport) == "" ||
|
||||
strings.TrimSpace(candidate.Address) == "" ||
|
||||
strings.TrimSpace(candidate.Reachability) == "" ||
|
||||
strings.TrimSpace(candidate.ConnectivityMode) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer endpoint candidate")
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := validatePeerDirectory(cfg.PeerDirectory, cfg.LocalNodeID); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateRecoverySeeds(cfg.RecoverySeeds); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateRendezvousLeases(cfg.RendezvousLeases, cfg.Routes, cfg.LocalNodeID); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, route := range cfg.Routes {
|
||||
if route.ClusterID != cfg.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
path := routePath(route)
|
||||
if len(path) < 2 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if !contains(path, cfg.LocalNodeID) {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
if route.ExpiresAt.IsZero() {
|
||||
return fmt.Errorf("scoped synthetic route %q expires_at is required", route.RouteID)
|
||||
}
|
||||
if !route.ExpiresAt.After(time.Now().UTC()) {
|
||||
return ErrRouteExpired
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validatePeerDirectory(entries []PeerDirectoryEntry, localNodeID string) error {
|
||||
seen := map[string]struct{}{}
|
||||
for _, entry := range entries {
|
||||
nodeID := strings.TrimSpace(entry.NodeID)
|
||||
if nodeID == "" || nodeID == localNodeID {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer directory entry")
|
||||
}
|
||||
if _, duplicate := seen[nodeID]; duplicate {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate peer directory entry")
|
||||
}
|
||||
seen[nodeID] = struct{}{}
|
||||
if entry.EndpointCount < 0 || entry.CandidateCount < 0 {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid peer directory count")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateRecoverySeeds(seeds []PeerRecoverySeed) error {
|
||||
if len(seeds) > 20 {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains too many recovery seeds")
|
||||
}
|
||||
seen := map[string]struct{}{}
|
||||
for _, seed := range seeds {
|
||||
key := strings.TrimSpace(seed.NodeID) + "\x00" + strings.TrimSpace(seed.Endpoint)
|
||||
if strings.TrimSpace(seed.NodeID) == "" ||
|
||||
strings.TrimSpace(seed.Endpoint) == "" ||
|
||||
strings.TrimSpace(seed.Transport) == "" {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid recovery seed")
|
||||
}
|
||||
if _, duplicate := seen[key]; duplicate {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate recovery seed")
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateRendezvousLeases(leases []PeerRendezvousLease, routes []SyntheticRoute, localNodeID string) error {
|
||||
if len(leases) > 20 {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains too many rendezvous leases")
|
||||
}
|
||||
routesByID := map[string]SyntheticRoute{}
|
||||
for _, route := range routes {
|
||||
if strings.TrimSpace(route.RouteID) != "" {
|
||||
routesByID[route.RouteID] = route
|
||||
}
|
||||
}
|
||||
seen := map[string]struct{}{}
|
||||
now := time.Now().UTC()
|
||||
for _, lease := range leases {
|
||||
if strings.TrimSpace(lease.LeaseID) == "" ||
|
||||
strings.TrimSpace(lease.PeerNodeID) == "" ||
|
||||
strings.TrimSpace(lease.RelayNodeID) == "" ||
|
||||
strings.TrimSpace(lease.RelayEndpoint) == "" ||
|
||||
strings.TrimSpace(lease.Transport) == "" ||
|
||||
lease.PeerNodeID == lease.RelayNodeID ||
|
||||
!lease.ControlPlaneOnly ||
|
||||
lease.ExpiresAt.IsZero() ||
|
||||
!lease.ExpiresAt.After(now) ||
|
||||
(len(lease.Metadata) > 0 && !json.Valid(lease.Metadata)) {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains invalid rendezvous lease")
|
||||
}
|
||||
if _, duplicate := seen[lease.LeaseID]; duplicate {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains duplicate rendezvous lease")
|
||||
}
|
||||
seen[lease.LeaseID] = struct{}{}
|
||||
if len(lease.RouteIDs) == 0 {
|
||||
continue
|
||||
}
|
||||
visible := false
|
||||
for _, routeID := range lease.RouteIDs {
|
||||
route, ok := routesByID[routeID]
|
||||
if !ok {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains rendezvous lease for unknown route")
|
||||
}
|
||||
path := routePath(route)
|
||||
if contains(path, localNodeID) && contains(path, lease.PeerNodeID) && contains(path, lease.RelayNodeID) {
|
||||
visible = true
|
||||
}
|
||||
}
|
||||
if !visible {
|
||||
return fmt.Errorf("scoped synthetic mesh config contains out-of-scope rendezvous lease")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,235 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLoadScopedSyntheticConfig(t *testing.T) {
|
||||
expiresAt := time.Now().UTC().Add(time.Hour)
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
ConfigVersion: "config-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerEndpoints: map[string]string{"node-b": "http://127.0.0.1:19002"},
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-b",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
NATType: "restricted",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
PeerDirectory: []PeerDirectoryEntry{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
EndpointCount: 1,
|
||||
CandidateCount: 1,
|
||||
ConnectivityModes: []string{"direct"},
|
||||
RecoverySeed: true,
|
||||
},
|
||||
},
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{
|
||||
NodeID: "node-b",
|
||||
Endpoint: "https://node-b.example.test:443",
|
||||
Transport: "direct_tcp_tls",
|
||||
ConnectivityMode: "direct",
|
||||
Priority: 10,
|
||||
},
|
||||
},
|
||||
RendezvousLeases: []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
ConnectivityMode: "relay_required",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
AllowedChannels: []string{"fabric_control", "route_control"},
|
||||
Priority: 10,
|
||||
ControlPlaneOnly: true,
|
||||
IssuedAt: expiresAt.Add(-time.Minute),
|
||||
ExpiresAt: expiresAt,
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-r", "node-b"})},
|
||||
})
|
||||
|
||||
cfg, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err != nil {
|
||||
t.Fatalf("load scoped config: %v", err)
|
||||
}
|
||||
if cfg.ConfigVersion != "config-v1" || cfg.PeerEndpoints["node-b"] == "" || len(cfg.Routes) != 1 {
|
||||
t.Fatalf("unexpected config: %+v", cfg)
|
||||
}
|
||||
if got := cfg.PeerEndpointCandidates["node-b"]; len(got) != 1 || got[0].EndpointID != "node-b-public" {
|
||||
t.Fatalf("unexpected endpoint candidates: %+v", cfg.PeerEndpointCandidates)
|
||||
}
|
||||
if len(cfg.PeerDirectory) != 1 || cfg.PeerDirectory[0].NodeID != "node-b" || !cfg.PeerDirectory[0].RecoverySeed {
|
||||
t.Fatalf("unexpected peer directory: %+v", cfg.PeerDirectory)
|
||||
}
|
||||
if len(cfg.RecoverySeeds) != 1 || cfg.RecoverySeeds[0].NodeID != "node-b" {
|
||||
t.Fatalf("unexpected recovery seeds: %+v", cfg.RecoverySeeds)
|
||||
}
|
||||
if len(cfg.RendezvousLeases) != 1 || cfg.RendezvousLeases[0].RelayNodeID != "node-r" {
|
||||
t.Fatalf("unexpected rendezvous leases: %+v", cfg.RendezvousLeases)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsWrongCluster(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-2",
|
||||
LocalNodeID: "node-a",
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if !errors.Is(err, ErrClusterMismatch) {
|
||||
t.Fatalf("err = %v, want ErrClusterMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsWrongNode(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-x",
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if !errors.Is(err, ErrNodeMismatch) {
|
||||
t.Fatalf("err = %v, want ErrNodeMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsExpiredRoute(t *testing.T) {
|
||||
route := liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})
|
||||
route.ExpiresAt = time.Now().UTC().Add(-time.Minute)
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
Routes: []SyntheticRoute{route},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if !errors.Is(err, ErrRouteExpired) {
|
||||
t.Fatalf("err = %v, want ErrRouteExpired", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidPeerEndpointCandidate(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
PeerEndpointCandidates: map[string][]PeerEndpointCandidate{
|
||||
"node-b": {
|
||||
{
|
||||
EndpointID: "node-b-public",
|
||||
NodeID: "node-c",
|
||||
Transport: "direct_tcp_tls",
|
||||
Address: "203.0.113.20:443",
|
||||
Reachability: "public",
|
||||
ConnectivityMode: "direct",
|
||||
},
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected invalid peer endpoint candidate error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidPeerDirectory(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
PeerDirectory: []PeerDirectoryEntry{
|
||||
{NodeID: "node-a"},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected invalid peer directory error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidRecoverySeed(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17f.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RecoverySeeds: []PeerRecoverySeed{
|
||||
{NodeID: "node-b", Endpoint: "", Transport: "direct_tcp_tls"},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected invalid recovery seed error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadScopedSyntheticConfigRejectsInvalidRendezvousLease(t *testing.T) {
|
||||
path := writeScopedConfig(t, ScopedSyntheticConfig{
|
||||
SchemaVersion: "c17z12.synthetic.v1",
|
||||
ClusterID: "cluster-1",
|
||||
LocalNodeID: "node-a",
|
||||
RendezvousLeases: []PeerRendezvousLease{
|
||||
{
|
||||
LeaseID: "lease-node-b-via-node-r",
|
||||
PeerNodeID: "node-b",
|
||||
RelayNodeID: "node-r",
|
||||
RelayEndpoint: "http://node-r:19000",
|
||||
Transport: "relay_control",
|
||||
RouteIDs: []string{"route-a-b"},
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
},
|
||||
},
|
||||
Routes: []SyntheticRoute{liveSyntheticRoute("route-a-b", []string{"node-a", "node-r", "node-b"})},
|
||||
})
|
||||
|
||||
_, err := LoadScopedSyntheticConfig(path, PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"})
|
||||
if err == nil {
|
||||
t.Fatal("expected invalid rendezvous lease error")
|
||||
}
|
||||
}
|
||||
|
||||
func writeScopedConfig(t *testing.T, cfg ScopedSyntheticConfig) string {
|
||||
t.Helper()
|
||||
payload, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal config: %v", err)
|
||||
}
|
||||
path := filepath.Join(t.TempDir(), "mesh-config.json")
|
||||
if err := os.WriteFile(path, payload, 0o600); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
@@ -0,0 +1,291 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ProductionEnvelopeObserver func(context.Context, ProductionEnvelopeObservation) error
|
||||
type ProductionForwardLogger func(ProductionForwardLogEntry)
|
||||
|
||||
type Server struct {
|
||||
Local PeerIdentity
|
||||
SyntheticRuntime *SyntheticRuntime
|
||||
ProductionForwardingEnabled bool
|
||||
ProductionEnvelopeObserver ProductionEnvelopeObserver
|
||||
ProductionForwardTransport ProductionForwardTransport
|
||||
ProductionForwardLogger ProductionForwardLogger
|
||||
ProductionRoutes []SyntheticRoute
|
||||
}
|
||||
|
||||
func (s Server) Handler() http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/mesh/v1/health", s.handleHealth)
|
||||
mux.HandleFunc("/mesh/v1/forward", s.handleForward)
|
||||
mux.HandleFunc("/mesh/v1/synthetic/probe", s.handleSyntheticProbe)
|
||||
return mux
|
||||
}
|
||||
|
||||
func (s Server) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
var message HealthMessage
|
||||
if err := json.NewDecoder(r.Body).Decode(&message); err != nil {
|
||||
http.Error(w, "invalid health message", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if message.ProtocolVersion != ProtocolVersion {
|
||||
http.Error(w, "unsupported mesh protocol version", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if err := ValidatePeer(s.Local, message.From); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if message.To.NodeID != "" && message.To.NodeID != s.Local.NodeID {
|
||||
http.Error(w, ErrNodeMismatch.Error(), http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(HealthAck{
|
||||
ProtocolVersion: ProtocolVersion,
|
||||
Accepted: true,
|
||||
By: s.Local,
|
||||
})
|
||||
}
|
||||
|
||||
func (s Server) handleForward(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if !s.ProductionForwardingEnabled {
|
||||
s.logProductionForward(ProductionForwardLogEntry{
|
||||
Event: "production_forward_rejected",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
LocalNodeID: s.Local.NodeID,
|
||||
Reason: ErrForwardDisabled.Error(),
|
||||
StatusCode: http.StatusNotImplemented,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
})
|
||||
http.Error(w, ErrForwardDisabled.Error(), http.StatusNotImplemented)
|
||||
return
|
||||
}
|
||||
var envelope ProductionEnvelope
|
||||
if err := json.NewDecoder(r.Body).Decode(&envelope); err != nil {
|
||||
s.logProductionForward(ProductionForwardLogEntry{
|
||||
Event: "production_forward_rejected",
|
||||
ClusterID: s.Local.ClusterID,
|
||||
LocalNodeID: s.Local.NodeID,
|
||||
Reason: "invalid production mesh envelope",
|
||||
StatusCode: http.StatusBadRequest,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
})
|
||||
http.Error(w, "invalid production mesh envelope", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if err := ValidateProductionEnvelope(s.Local, envelope, time.Now().UTC()); err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
}
|
||||
if err := ValidateProductionEnvelopeRouteConfig(s.Local, envelope, s.ProductionRoutes, time.Now().UTC()); err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
}
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_accepted", s.Local, envelope, "", 0))
|
||||
if s.ProductionEnvelopeObserver != nil {
|
||||
observation := NewProductionEnvelopeObservation(envelope, time.Now().UTC())
|
||||
if err := observeProductionEnvelope(r.Context(), s.ProductionEnvelopeObserver, observation); err != nil {
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, ErrForwardObservationFailed.Error(), http.StatusInternalServerError))
|
||||
http.Error(w, ErrForwardObservationFailed.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
if envelope.DestinationNodeID == s.Local.NodeID {
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_delivered", s.Local, envelope, "", http.StatusOK))
|
||||
writeProductionForwardResult(w, ProductionForwardResult{
|
||||
Accepted: true,
|
||||
Delivered: true,
|
||||
By: s.Local,
|
||||
MessageID: envelope.MessageID,
|
||||
RouteID: envelope.RouteID,
|
||||
})
|
||||
return
|
||||
}
|
||||
if envelope.NextHopNodeID == s.Local.NodeID {
|
||||
s.rejectProductionForward(w, envelope, ErrLoopDetected, forwardStatusCode(ErrLoopDetected))
|
||||
return
|
||||
}
|
||||
if len(envelope.RoutePath) == 0 && envelope.NextHopNodeID != envelope.DestinationNodeID {
|
||||
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
|
||||
return
|
||||
}
|
||||
if s.ProductionForwardTransport == nil {
|
||||
s.rejectProductionForward(w, envelope, ErrForwardRuntimeUnavailable, http.StatusNotImplemented)
|
||||
return
|
||||
}
|
||||
if envelope.TTL <= 1 {
|
||||
s.rejectProductionForward(w, envelope, ErrTTLExhausted, forwardStatusCode(ErrTTLExhausted))
|
||||
return
|
||||
}
|
||||
forwarded := envelope
|
||||
forwarded.CurrentHopNodeID = envelope.NextHopNodeID
|
||||
forwarded.NextHopNodeID = nextProductionHopAfter(envelope.RoutePath, envelope.NextHopNodeID, envelope.DestinationNodeID)
|
||||
forwarded.TTL = envelope.TTL - 1
|
||||
forwarded.HopCount = envelope.HopCount + 1
|
||||
forwarded.VisitedNodeIDs = append(append([]string{}, envelope.VisitedNodeIDs...), s.Local.NodeID)
|
||||
result, err := s.ProductionForwardTransport.SendProduction(r.Context(), envelope.NextHopNodeID, forwarded)
|
||||
if err != nil {
|
||||
s.rejectProductionForward(w, envelope, err, forwardStatusCode(err))
|
||||
return
|
||||
}
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_forwarded", s.Local, envelope, "", http.StatusOK))
|
||||
result.Accepted = true
|
||||
result.Forwarded = true
|
||||
result.By = s.Local
|
||||
result.MessageID = envelope.MessageID
|
||||
result.RouteID = envelope.RouteID
|
||||
result.NextNodeID = envelope.NextHopNodeID
|
||||
writeProductionForwardResult(w, result)
|
||||
}
|
||||
|
||||
func (s Server) rejectProductionForward(w http.ResponseWriter, envelope ProductionEnvelope, err error, statusCode int) {
|
||||
s.logProductionForward(productionForwardLogEntry("production_forward_rejected", s.Local, envelope, err.Error(), statusCode))
|
||||
http.Error(w, err.Error(), statusCode)
|
||||
}
|
||||
|
||||
func (s Server) logProductionForward(entry ProductionForwardLogEntry) {
|
||||
if s.ProductionForwardLogger == nil {
|
||||
return
|
||||
}
|
||||
if entry.OccurredAt.IsZero() {
|
||||
entry.OccurredAt = time.Now().UTC()
|
||||
}
|
||||
s.ProductionForwardLogger(entry)
|
||||
}
|
||||
|
||||
func productionForwardLogEntry(event string, local PeerIdentity, envelope ProductionEnvelope, reason string, statusCode int) ProductionForwardLogEntry {
|
||||
return ProductionForwardLogEntry{
|
||||
Event: event,
|
||||
RouteID: envelope.RouteID,
|
||||
MessageID: envelope.MessageID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
LocalNodeID: local.NodeID,
|
||||
SourceNodeID: envelope.SourceNodeID,
|
||||
DestinationNodeID: envelope.DestinationNodeID,
|
||||
CurrentHopNodeID: envelope.CurrentHopNodeID,
|
||||
NextHopNodeID: envelope.NextHopNodeID,
|
||||
ChannelClass: envelope.ChannelClass,
|
||||
MessageType: envelope.MessageType,
|
||||
Reason: reason,
|
||||
StatusCode: statusCode,
|
||||
TTL: envelope.TTL,
|
||||
HopCount: envelope.HopCount,
|
||||
RoutePathLength: len(envelope.RoutePath),
|
||||
VisitedCount: len(envelope.VisitedNodeIDs),
|
||||
PayloadLength: envelope.PayloadLength,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
}
|
||||
}
|
||||
|
||||
func nextProductionHopAfter(routePath []string, currentNodeID string, destinationNodeID string) string {
|
||||
if len(routePath) == 0 {
|
||||
return destinationNodeID
|
||||
}
|
||||
for index, nodeID := range routePath {
|
||||
if nodeID == currentNodeID {
|
||||
if index >= len(routePath)-1 {
|
||||
return currentNodeID
|
||||
}
|
||||
return routePath[index+1]
|
||||
}
|
||||
}
|
||||
return destinationNodeID
|
||||
}
|
||||
|
||||
func writeProductionForwardResult(w http.ResponseWriter, result ProductionForwardResult) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
func observeProductionEnvelope(ctx context.Context, observer ProductionEnvelopeObserver, observation ProductionEnvelopeObservation) (err error) {
|
||||
if observer == nil {
|
||||
return nil
|
||||
}
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
err = ErrForwardObservationFailed
|
||||
}
|
||||
}()
|
||||
return observer(ctx, observation)
|
||||
}
|
||||
|
||||
func (s Server) handleSyntheticProbe(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
if s.SyntheticRuntime == nil {
|
||||
http.Error(w, ErrMeshRuntimeDisabled.Error(), http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
var envelope SyntheticEnvelope
|
||||
if err := json.NewDecoder(r.Body).Decode(&envelope); err != nil {
|
||||
http.Error(w, "invalid synthetic mesh envelope", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
ack, err := s.SyntheticRuntime.Receive(r.Context(), envelope)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), syntheticStatusCode(err))
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(ack)
|
||||
}
|
||||
|
||||
func NewHealthMessage(from, to PeerIdentity) HealthMessage {
|
||||
status := "reachable"
|
||||
return HealthMessage{
|
||||
ProtocolVersion: ProtocolVersion,
|
||||
From: from,
|
||||
To: to,
|
||||
ObservedAt: time.Now().UTC(),
|
||||
LinkStatus: status,
|
||||
}
|
||||
}
|
||||
|
||||
func syntheticStatusCode(err error) int {
|
||||
switch err {
|
||||
case ErrClusterMismatch, ErrNodeMismatch, ErrUnauthorizedChannel, ErrLoopDetected:
|
||||
return http.StatusForbidden
|
||||
case ErrMeshRuntimeDisabled:
|
||||
return http.StatusServiceUnavailable
|
||||
case ErrRouteExpired, ErrTTLExhausted, ErrInvalidRoutePath, ErrUnsupportedSyntheticMessage, ErrRouteIDRequired:
|
||||
return http.StatusBadRequest
|
||||
case ErrRouteNotFound, ErrSyntheticPeerUnavailable:
|
||||
return http.StatusNotFound
|
||||
default:
|
||||
return http.StatusBadRequest
|
||||
}
|
||||
}
|
||||
|
||||
func forwardStatusCode(err error) int {
|
||||
switch err {
|
||||
case ErrClusterMismatch, ErrNodeMismatch, ErrUnauthorizedChannel, ErrLoopDetected:
|
||||
return http.StatusForbidden
|
||||
case ErrRouteExpired, ErrTTLExhausted, ErrInvalidRoutePath, ErrRouteIDRequired:
|
||||
return http.StatusBadRequest
|
||||
case ErrForwardRuntimeUnavailable:
|
||||
return http.StatusNotImplemented
|
||||
case ErrRouteNotFound:
|
||||
return http.StatusNotFound
|
||||
case ErrForwardPeerUnavailable:
|
||||
return http.StatusBadGateway
|
||||
default:
|
||||
return http.StatusBadRequest
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,802 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestMeshHealthAcceptsSameCluster(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{Local: local}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL)
|
||||
ack, err := client.SendHealth(context.Background(), NewHealthMessage(
|
||||
PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
local,
|
||||
))
|
||||
if err != nil {
|
||||
t.Fatalf("send health: %v", err)
|
||||
}
|
||||
if !ack.Accepted || ack.By.NodeID != "node-b" {
|
||||
t.Fatalf("unexpected ack: %+v", ack)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshHealthRejectsClusterMismatch(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{Local: local}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
message := NewHealthMessage(PeerIdentity{ClusterID: "cluster-2", NodeID: "node-a"}, local)
|
||||
payload, err := json.Marshal(message)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal message: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/health", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post health: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusForbidden {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingDisabled(t *testing.T) {
|
||||
server := httptest.NewServer(Server{Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/octet-stream", bytes.NewReader([]byte("payload")))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotImplemented)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateEnabledStillHasNoProductionRuntime(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
payload, err := json.Marshal(validProductionEnvelope(local))
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotImplemented)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateDeliversFabricControlAtDestination(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
var events []ProductionForwardLogEntry
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
events = append(events, entry)
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = local.NodeID
|
||||
envelope.CurrentHopNodeID = local.NodeID
|
||||
envelope.NextHopNodeID = local.NodeID
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK)
|
||||
}
|
||||
var result ProductionForwardResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("decode result: %v", err)
|
||||
}
|
||||
if !result.Accepted || !result.Delivered || result.Forwarded || result.By.NodeID != local.NodeID {
|
||||
t.Fatalf("unexpected result: %+v", result)
|
||||
}
|
||||
if !hasProductionForwardEvent(events, "production_forward_accepted") || !hasProductionForwardEvent(events, "production_forward_delivered") {
|
||||
t.Fatalf("missing production forward events: %+v", events)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateForwardsDirectFabricControlToNextHop(t *testing.T) {
|
||||
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
serverC := httptest.NewServer(Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
}.Handler())
|
||||
defer serverB.Close()
|
||||
|
||||
envelope := validProductionEnvelope(nodeB)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = nodeC.NodeID
|
||||
envelope.CurrentHopNodeID = nodeB.NodeID
|
||||
envelope.NextHopNodeID = nodeC.NodeID
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(serverB.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK)
|
||||
}
|
||||
var result ProductionForwardResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("decode result: %v", err)
|
||||
}
|
||||
if !result.Accepted || !result.Forwarded || !result.Delivered || result.NextNodeID != nodeC.NodeID || result.By.NodeID != nodeB.NodeID {
|
||||
t.Fatalf("unexpected forward result: %+v", result)
|
||||
}
|
||||
if deliveredObservation.CurrentHopNodeID != nodeC.NodeID || deliveredObservation.MessageID != envelope.MessageID {
|
||||
t.Fatalf("destination did not observe forwarded envelope: %+v", deliveredObservation)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateForwardsMultiHopFabricControlByRoutePath(t *testing.T) {
|
||||
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
var nodeREvents []ProductionForwardLogEntry
|
||||
var nodeBEvents []ProductionForwardLogEntry
|
||||
serverC := httptest.NewServer(Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
|
||||
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
|
||||
serverR := httptest.NewServer(Server{
|
||||
Local: nodeR,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
nodeREvents = append(nodeREvents, entry)
|
||||
},
|
||||
}.Handler())
|
||||
defer serverR.Close()
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeR.NodeID: serverR.URL,
|
||||
}),
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
nodeBEvents = append(nodeBEvents, entry)
|
||||
},
|
||||
}.Handler())
|
||||
defer serverB.Close()
|
||||
|
||||
envelope := validProductionEnvelope(nodeB)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = nodeC.NodeID
|
||||
envelope.CurrentHopNodeID = nodeB.NodeID
|
||||
envelope.NextHopNodeID = nodeR.NodeID
|
||||
envelope.RoutePath = []string{"node-a", nodeB.NodeID, nodeR.NodeID, nodeC.NodeID}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(serverB.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK)
|
||||
}
|
||||
var result ProductionForwardResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("decode result: %v", err)
|
||||
}
|
||||
if !result.Accepted || !result.Forwarded || !result.Delivered || result.NextNodeID != nodeR.NodeID || result.By.NodeID != nodeB.NodeID {
|
||||
t.Fatalf("unexpected multi-hop result: %+v", result)
|
||||
}
|
||||
if deliveredObservation.CurrentHopNodeID != nodeC.NodeID || deliveredObservation.NextHopNodeID != nodeC.NodeID {
|
||||
t.Fatalf("destination did not observe final hop: %+v", deliveredObservation)
|
||||
}
|
||||
if len(deliveredObservation.VisitedNodeIDs) != 2 || deliveredObservation.VisitedNodeIDs[0] != nodeB.NodeID || deliveredObservation.VisitedNodeIDs[1] != nodeR.NodeID {
|
||||
t.Fatalf("visited path not propagated: %+v", deliveredObservation.VisitedNodeIDs)
|
||||
}
|
||||
if !hasProductionForwardEvent(nodeBEvents, "production_forward_forwarded") || !hasProductionForwardEvent(nodeREvents, "production_forward_forwarded") {
|
||||
t.Fatalf("missing relay forward events: nodeB=%+v nodeR=%+v", nodeBEvents, nodeREvents)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateForwardsConfiguredProductionRoute(t *testing.T) {
|
||||
nodeC := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-c"}
|
||||
route := configuredProductionRoute("route-1", []string{"node-a", "node-b", "node-r", nodeC.NodeID})
|
||||
var deliveredObservation ProductionEnvelopeObservation
|
||||
serverC := httptest.NewServer(Server{
|
||||
Local: nodeC,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
deliveredObservation = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer serverC.Close()
|
||||
|
||||
nodeR := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"}
|
||||
serverR := httptest.NewServer(Server{
|
||||
Local: nodeR,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeC.NodeID: serverC.URL,
|
||||
}),
|
||||
}.Handler())
|
||||
defer serverR.Close()
|
||||
|
||||
nodeB := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
serverB := httptest.NewServer(Server{
|
||||
Local: nodeB,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
ProductionForwardTransport: NewHTTPProductionForwardTransport(map[string]string{
|
||||
nodeR.NodeID: serverR.URL,
|
||||
}),
|
||||
}.Handler())
|
||||
defer serverB.Close()
|
||||
|
||||
envelope := validProductionEnvelope(nodeB)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = nodeC.NodeID
|
||||
envelope.CurrentHopNodeID = nodeB.NodeID
|
||||
envelope.NextHopNodeID = nodeR.NodeID
|
||||
envelope.RoutePath = route.Hops
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(serverB.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK)
|
||||
}
|
||||
if deliveredObservation.RouteID != route.RouteID || deliveredObservation.CurrentHopNodeID != nodeC.NodeID {
|
||||
t.Fatalf("configured route was not delivered: %+v", deliveredObservation)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsUnknownConfiguredProductionRoute(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{
|
||||
configuredProductionRoute("route-other", []string{"node-a", local.NodeID, "node-c"}),
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusNotFound {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsConfiguredProductionRouteWrongNextHop(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
route := configuredProductionRoute("route-1", []string{"node-a", local.NodeID, "node-r", "node-c"})
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionRoutes: []SyntheticRoute{route},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = "node-c"
|
||||
envelope.CurrentHopNodeID = local.NodeID
|
||||
envelope.NextHopNodeID = "node-c"
|
||||
envelope.RoutePath = route.Hops
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsRoutePathWrongNextHop(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
var events []ProductionForwardLogEntry
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionForwardLogger: func(entry ProductionForwardLogEntry) {
|
||||
events = append(events, entry)
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = "node-c"
|
||||
envelope.CurrentHopNodeID = local.NodeID
|
||||
envelope.NextHopNodeID = "node-x"
|
||||
envelope.RoutePath = []string{"node-a", local.NodeID, "node-r", "node-c"}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest)
|
||||
}
|
||||
if !hasProductionForwardEvent(events, "production_forward_rejected") {
|
||||
t.Fatalf("missing reject event: %+v", events)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsRoutePathLoop(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.SourceNodeID = "node-a"
|
||||
envelope.DestinationNodeID = "node-c"
|
||||
envelope.CurrentHopNodeID = local.NodeID
|
||||
envelope.NextHopNodeID = "node-r"
|
||||
envelope.RoutePath = []string{"node-a", local.NodeID, "node-r", local.NodeID, "node-c"}
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusForbidden {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsInvalidProductionEnvelope(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.PayloadHash = "bad-hash"
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsOversizedProductionEnvelopePayload(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
observed := false
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(context.Context, ProductionEnvelopeObservation) error {
|
||||
observed = true
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.Payload = json.RawMessage(`"` + string(bytes.Repeat([]byte("a"), MaxProductionEnvelopePayloadBytes+1)) + `"`)
|
||||
sum := sha256.Sum256(envelope.Payload)
|
||||
envelope.PayloadLength = len(envelope.Payload)
|
||||
envelope.PayloadHash = hex.EncodeToString(sum[:])
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest)
|
||||
}
|
||||
if observed {
|
||||
t.Fatal("observer called for oversized envelope")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsFutureCreatedAt(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
observed := false
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(context.Context, ProductionEnvelopeObservation) error {
|
||||
observed = true
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.CreatedAt = time.Now().UTC().Add(MaxProductionEnvelopeFutureSkew + time.Second)
|
||||
envelope.ExpiresAt = envelope.CreatedAt.Add(time.Minute)
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest)
|
||||
}
|
||||
if observed {
|
||||
t.Fatal("observer called for future-created envelope")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateObservesValidEnvelopeWithoutPayload(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
var observed ProductionEnvelopeObservation
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(_ context.Context, observation ProductionEnvelopeObservation) error {
|
||||
observed = observation
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotImplemented)
|
||||
}
|
||||
if observed.MessageID != envelope.MessageID || observed.RouteID != envelope.RouteID {
|
||||
t.Fatalf("unexpected observation: %+v", observed)
|
||||
}
|
||||
if observed.PayloadHash != envelope.PayloadHash || observed.PayloadLength != envelope.PayloadLength {
|
||||
t.Fatalf("payload metadata missing from observation: %+v", observed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateDoesNotObserveRejectedEnvelope(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
observed := false
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(context.Context, ProductionEnvelopeObservation) error {
|
||||
observed = true
|
||||
return nil
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.ClusterID = "wrong-cluster"
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusForbidden {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusForbidden)
|
||||
}
|
||||
if observed {
|
||||
t.Fatal("observer called for rejected envelope")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateFailsClosedWhenObservationFails(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(context.Context, ProductionEnvelopeObservation) error {
|
||||
return errors.New("observer down")
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
payload, err := json.Marshal(validProductionEnvelope(local))
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusInternalServerError {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateFailsClosedWhenObservationPanics(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: func(context.Context, ProductionEnvelopeObservation) error {
|
||||
panic("observer panic")
|
||||
},
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
payload, err := json.Marshal(validProductionEnvelope(local))
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusInternalServerError {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
func TestObserveProductionEnvelopeAllowsNilObserver(t *testing.T) {
|
||||
if err := observeProductionEnvelope(context.Background(), nil, ProductionEnvelopeObservation{}); err != nil {
|
||||
t.Fatalf("observeProductionEnvelope nil observer err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProductionEnvelopeObservationSinkKeepsBoundedMetadata(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
sink := NewProductionEnvelopeObservationSink(2)
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
ProductionEnvelopeObserver: sink.Observe,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
for i := 1; i <= 3; i++ {
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.MessageID = "message-" + string(rune('0'+i))
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotImplemented)
|
||||
}
|
||||
}
|
||||
|
||||
observations := sink.Snapshot()
|
||||
if len(observations) != 2 {
|
||||
t.Fatalf("observation count = %d, want 2", len(observations))
|
||||
}
|
||||
if observations[0].MessageID != "message-2" || observations[1].MessageID != "message-3" {
|
||||
t.Fatalf("unexpected bounded observations: %+v", observations)
|
||||
}
|
||||
if observations[0].PayloadHash == "" || observations[0].PayloadLength == 0 {
|
||||
t.Fatalf("payload metadata missing from bounded observation: %+v", observations[0])
|
||||
}
|
||||
metrics := sink.Metrics()
|
||||
if metrics.Capacity != 2 || metrics.CurrentDepth != 2 || metrics.AcceptedTotal != 3 || metrics.DroppedOldest != 1 {
|
||||
t.Fatalf("unexpected sink metrics: %+v", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProductionEnvelopeObservationSinkMetricsStartEmpty(t *testing.T) {
|
||||
sink := NewProductionEnvelopeObservationSink(3)
|
||||
metrics := sink.Metrics()
|
||||
if metrics.Capacity != 3 || metrics.CurrentDepth != 0 || metrics.AcceptedTotal != 0 || metrics.DroppedOldest != 0 {
|
||||
t.Fatalf("unexpected empty metrics: %+v", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingGateRejectsServiceChannel(t *testing.T) {
|
||||
local := PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}
|
||||
server := httptest.NewServer(Server{
|
||||
Local: local,
|
||||
ProductionForwardingEnabled: true,
|
||||
}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
envelope := validProductionEnvelope(local)
|
||||
envelope.ChannelClass = "render"
|
||||
payload, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/forward", "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
t.Fatalf("post forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusForbidden {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeshForwardingRequiresPost(t *testing.T) {
|
||||
server := httptest.NewServer(Server{Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL + "/mesh/v1/forward")
|
||||
if err != nil {
|
||||
t.Fatalf("get forward: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusMethodNotAllowed {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusMethodNotAllowed)
|
||||
}
|
||||
}
|
||||
|
||||
func validProductionEnvelope(local PeerIdentity) ProductionEnvelope {
|
||||
payload := json.RawMessage(`{"kind":"control"}`)
|
||||
sum := sha256.Sum256(payload)
|
||||
now := time.Now().UTC()
|
||||
return ProductionEnvelope{
|
||||
FabricProtocolVersion: ProtocolVersion,
|
||||
MessageID: "message-1",
|
||||
RouteID: "route-1",
|
||||
ClusterID: local.ClusterID,
|
||||
SourceNodeID: "node-a",
|
||||
DestinationNodeID: "node-c",
|
||||
CurrentHopNodeID: local.NodeID,
|
||||
NextHopNodeID: "node-c",
|
||||
ChannelClass: ProductionChannelFabricControl,
|
||||
MessageType: ProductionMessageFabricControl,
|
||||
TTL: 4,
|
||||
HopCount: 1,
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(time.Minute),
|
||||
PayloadLength: len(payload),
|
||||
PayloadHash: hex.EncodeToString(sum[:]),
|
||||
Payload: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func configuredProductionRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: append([]string{}, hops...),
|
||||
AllowedChannels: []string{ProductionChannelFabricControl},
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
}
|
||||
}
|
||||
|
||||
func hasProductionForwardEvent(events []ProductionForwardLogEntry, event string) bool {
|
||||
for _, item := range events {
|
||||
if item.Event == event {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func TestSyntheticEndpointDisabledByDefault(t *testing.T) {
|
||||
server := httptest.NewServer(Server{Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"}}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/synthetic/probe", "application/json", bytes.NewReader([]byte(`{}`)))
|
||||
if err != nil {
|
||||
t.Fatalf("post synthetic probe: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusServiceUnavailable {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusServiceUnavailable)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SyntheticRelaySchedulerConfig struct {
|
||||
Enabled bool
|
||||
Local PeerIdentity
|
||||
QueuePolicies []SyntheticRelayQueuePolicy
|
||||
AllowedChannels []string
|
||||
AllowedMessageTypes []string
|
||||
Now func() time.Time
|
||||
Logger func(SyntheticLogEntry)
|
||||
}
|
||||
|
||||
type SyntheticRelayScheduler struct {
|
||||
enabled bool
|
||||
local PeerIdentity
|
||||
policies map[string]SyntheticRelayQueuePolicy
|
||||
allowedChannels map[string]struct{}
|
||||
allowedMessageTypes map[string]struct{}
|
||||
priorityOrder []string
|
||||
now func() time.Time
|
||||
logger func(SyntheticLogEntry)
|
||||
|
||||
mu sync.Mutex
|
||||
queues map[string][]SyntheticEnvelope
|
||||
metrics SyntheticRelayQueueMetrics
|
||||
}
|
||||
|
||||
func NewSyntheticRelayScheduler(cfg SyntheticRelaySchedulerConfig) *SyntheticRelayScheduler {
|
||||
policies := cfg.QueuePolicies
|
||||
if len(policies) == 0 {
|
||||
policies = []SyntheticRelayQueuePolicy{
|
||||
{Channel: SyntheticChannelFabricControl, Capacity: 64, Droppable: false},
|
||||
{Channel: SyntheticChannelRouteControl, Capacity: 64, Droppable: false},
|
||||
{Channel: SyntheticChannelTelemetry, Capacity: 16, Droppable: true},
|
||||
}
|
||||
}
|
||||
policyMap := map[string]SyntheticRelayQueuePolicy{}
|
||||
allowedChannels := map[string]struct{}{}
|
||||
priorityOrder := make([]string, 0, len(policies))
|
||||
for _, policy := range policies {
|
||||
if policy.Channel == "" {
|
||||
continue
|
||||
}
|
||||
if policy.Capacity <= 0 {
|
||||
policy.Capacity = 1
|
||||
}
|
||||
policyMap[policy.Channel] = policy
|
||||
allowedChannels[policy.Channel] = struct{}{}
|
||||
priorityOrder = append(priorityOrder, policy.Channel)
|
||||
}
|
||||
for _, channel := range cfg.AllowedChannels {
|
||||
if channel != "" {
|
||||
allowedChannels[channel] = struct{}{}
|
||||
}
|
||||
}
|
||||
messageTypes := cfg.AllowedMessageTypes
|
||||
if len(messageTypes) == 0 {
|
||||
messageTypes = []string{
|
||||
SyntheticMessageProbe,
|
||||
SyntheticMessageProbeAck,
|
||||
SyntheticMessageRouteHealth,
|
||||
SyntheticMessageRouteHealthAck,
|
||||
SyntheticMessageTelemetry,
|
||||
SyntheticMessageTestService,
|
||||
SyntheticMessageTestServiceAck,
|
||||
}
|
||||
}
|
||||
allowedMessageTypes := map[string]struct{}{}
|
||||
for _, messageType := range messageTypes {
|
||||
if messageType != "" {
|
||||
allowedMessageTypes[messageType] = struct{}{}
|
||||
}
|
||||
}
|
||||
now := cfg.Now
|
||||
if now == nil {
|
||||
now = func() time.Time { return time.Now().UTC() }
|
||||
}
|
||||
return &SyntheticRelayScheduler{
|
||||
enabled: cfg.Enabled,
|
||||
local: cfg.Local,
|
||||
policies: policyMap,
|
||||
allowedChannels: allowedChannels,
|
||||
allowedMessageTypes: allowedMessageTypes,
|
||||
priorityOrder: priorityOrder,
|
||||
now: now,
|
||||
logger: cfg.Logger,
|
||||
queues: map[string][]SyntheticEnvelope{},
|
||||
metrics: SyntheticRelayQueueMetrics{
|
||||
QueueDepths: map[string]int{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) Enqueue(envelope SyntheticEnvelope) (SyntheticRelayEnqueueResult, error) {
|
||||
if err := s.validateEnvelope(envelope); err != nil {
|
||||
s.reject(envelope, err)
|
||||
return SyntheticRelayEnqueueResult{}, err
|
||||
}
|
||||
policy := s.policies[envelope.Channel]
|
||||
result := SyntheticRelayEnqueueResult{
|
||||
Channel: envelope.Channel,
|
||||
QueueCapacity: policy.Capacity,
|
||||
AcceptedSequence: envelope.Sequence,
|
||||
}
|
||||
s.mu.Lock()
|
||||
queue := s.queues[envelope.Channel]
|
||||
if len(queue) >= policy.Capacity {
|
||||
if !policy.Droppable {
|
||||
s.metrics.Rejected++
|
||||
s.metrics.LastRejectReason = ErrSyntheticRelayQueueFull.Error()
|
||||
s.mu.Unlock()
|
||||
s.log(SyntheticLogEntry{
|
||||
Event: "fabric_relay_rejected",
|
||||
RouteID: envelope.RouteID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
LocalNodeID: s.local.NodeID,
|
||||
Channel: envelope.Channel,
|
||||
MessageType: envelope.MessageType,
|
||||
Reason: ErrSyntheticRelayQueueFull.Error(),
|
||||
QueueDepth: len(queue),
|
||||
QueueCapacity: policy.Capacity,
|
||||
OccurredAt: s.now(),
|
||||
})
|
||||
return SyntheticRelayEnqueueResult{}, ErrSyntheticRelayQueueFull
|
||||
}
|
||||
result.Dropped = true
|
||||
result.DroppedSequence = queue[0].Sequence
|
||||
queue = queue[1:]
|
||||
s.metrics.Dropped++
|
||||
}
|
||||
queue = append(queue, envelope)
|
||||
s.queues[envelope.Channel] = queue
|
||||
result.QueueDepth = len(queue)
|
||||
s.metrics.Enqueued++
|
||||
s.metrics.QueueDepths[envelope.Channel] = len(queue)
|
||||
s.mu.Unlock()
|
||||
s.log(SyntheticLogEntry{
|
||||
Event: "fabric_relay_enqueued",
|
||||
RouteID: envelope.RouteID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
LocalNodeID: s.local.NodeID,
|
||||
Channel: envelope.Channel,
|
||||
MessageType: envelope.MessageType,
|
||||
QueueDepth: result.QueueDepth,
|
||||
QueueCapacity: result.QueueCapacity,
|
||||
Dropped: result.Dropped,
|
||||
DroppedSequence: result.DroppedSequence,
|
||||
OccurredAt: s.now(),
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) Dequeue() (SyntheticEnvelope, error) {
|
||||
if !s.enabled {
|
||||
return SyntheticEnvelope{}, ErrMeshRuntimeDisabled
|
||||
}
|
||||
s.mu.Lock()
|
||||
for _, channel := range s.priorityOrder {
|
||||
queue := s.queues[channel]
|
||||
if len(queue) == 0 {
|
||||
continue
|
||||
}
|
||||
envelope := queue[0]
|
||||
queue = queue[1:]
|
||||
s.queues[channel] = queue
|
||||
s.metrics.Dequeued++
|
||||
s.metrics.QueueDepths[channel] = len(queue)
|
||||
s.mu.Unlock()
|
||||
s.log(SyntheticLogEntry{
|
||||
Event: "fabric_relay_dequeued",
|
||||
RouteID: envelope.RouteID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
LocalNodeID: s.local.NodeID,
|
||||
Channel: envelope.Channel,
|
||||
MessageType: envelope.MessageType,
|
||||
QueueDepth: len(queue),
|
||||
QueueCapacity: s.policies[channel].Capacity,
|
||||
OccurredAt: s.now(),
|
||||
})
|
||||
return envelope, nil
|
||||
}
|
||||
s.mu.Unlock()
|
||||
return SyntheticEnvelope{}, ErrSyntheticRelayQueueEmpty
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) SnapshotQueueMetrics() SyntheticRelayQueueMetrics {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
depths := map[string]int{}
|
||||
for channel, depth := range s.metrics.QueueDepths {
|
||||
depths[channel] = depth
|
||||
}
|
||||
for channel, queue := range s.queues {
|
||||
depths[channel] = len(queue)
|
||||
}
|
||||
return SyntheticRelayQueueMetrics{
|
||||
Enqueued: s.metrics.Enqueued,
|
||||
Dequeued: s.metrics.Dequeued,
|
||||
Dropped: s.metrics.Dropped,
|
||||
Rejected: s.metrics.Rejected,
|
||||
LastRejectReason: s.metrics.LastRejectReason,
|
||||
QueueDepths: depths,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) validateEnvelope(envelope SyntheticEnvelope) error {
|
||||
if s == nil || !s.enabled {
|
||||
return ErrMeshRuntimeDisabled
|
||||
}
|
||||
if envelope.ProtocolVersion != ProtocolVersion {
|
||||
return ErrUnsupportedSyntheticMessage
|
||||
}
|
||||
if envelope.RouteID == "" {
|
||||
return ErrRouteIDRequired
|
||||
}
|
||||
if envelope.ClusterID == "" || envelope.ClusterID != s.local.ClusterID {
|
||||
return ErrClusterMismatch
|
||||
}
|
||||
if envelope.From.ClusterID != s.local.ClusterID || envelope.From.NodeID == "" {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
if envelope.To.ClusterID != s.local.ClusterID || envelope.To.NodeID != s.local.NodeID {
|
||||
return ErrNodeMismatch
|
||||
}
|
||||
if envelope.TTL <= 0 {
|
||||
return ErrTTLExhausted
|
||||
}
|
||||
if envelope.HopCount <= 0 {
|
||||
return ErrInvalidRoutePath
|
||||
}
|
||||
if contains(envelope.Visited, s.local.NodeID) {
|
||||
return ErrLoopDetected
|
||||
}
|
||||
if _, ok := s.allowedChannels[envelope.Channel]; !ok {
|
||||
return ErrUnauthorizedChannel
|
||||
}
|
||||
if _, ok := s.policies[envelope.Channel]; !ok {
|
||||
return ErrUnauthorizedChannel
|
||||
}
|
||||
if _, ok := s.allowedMessageTypes[envelope.MessageType]; !ok {
|
||||
return ErrUnsupportedSyntheticMessage
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) reject(envelope SyntheticEnvelope, err error) {
|
||||
reason := ""
|
||||
if err != nil {
|
||||
reason = err.Error()
|
||||
}
|
||||
if s != nil {
|
||||
s.mu.Lock()
|
||||
s.metrics.Rejected++
|
||||
s.metrics.LastRejectReason = reason
|
||||
s.mu.Unlock()
|
||||
}
|
||||
if s != nil {
|
||||
s.log(SyntheticLogEntry{
|
||||
Event: "fabric_relay_rejected",
|
||||
RouteID: envelope.RouteID,
|
||||
ClusterID: envelope.ClusterID,
|
||||
LocalNodeID: s.local.NodeID,
|
||||
Channel: envelope.Channel,
|
||||
MessageType: envelope.MessageType,
|
||||
Reason: reason,
|
||||
OccurredAt: s.now(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SyntheticRelayScheduler) log(entry SyntheticLogEntry) {
|
||||
if s.logger != nil {
|
||||
s.logger(entry)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSyntheticRelaySchedulerDequeuesByQoSPriority(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
telemetry := testRelayEnvelope(SyntheticChannelTelemetry, SyntheticMessageTelemetry, 1)
|
||||
routeControl := testRelayEnvelope(SyntheticChannelRouteControl, SyntheticMessageRouteHealth, 2)
|
||||
fabricControl := testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 3)
|
||||
|
||||
if _, err := scheduler.Enqueue(telemetry); err != nil {
|
||||
t.Fatalf("enqueue telemetry: %v", err)
|
||||
}
|
||||
if _, err := scheduler.Enqueue(routeControl); err != nil {
|
||||
t.Fatalf("enqueue route control: %v", err)
|
||||
}
|
||||
if _, err := scheduler.Enqueue(fabricControl); err != nil {
|
||||
t.Fatalf("enqueue fabric control: %v", err)
|
||||
}
|
||||
|
||||
first, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue first: %v", err)
|
||||
}
|
||||
second, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue second: %v", err)
|
||||
}
|
||||
third, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue third: %v", err)
|
||||
}
|
||||
if first.Channel != SyntheticChannelFabricControl {
|
||||
t.Fatalf("first channel = %q, want fabric_control", first.Channel)
|
||||
}
|
||||
if second.Channel != SyntheticChannelRouteControl {
|
||||
t.Fatalf("second channel = %q, want route_control", second.Channel)
|
||||
}
|
||||
if third.Channel != SyntheticChannelTelemetry {
|
||||
t.Fatalf("third channel = %q, want telemetry", third.Channel)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerDropsOldestTelemetryOnly(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
first := testRelayEnvelope(SyntheticChannelTelemetry, SyntheticMessageTelemetry, 1)
|
||||
second := testRelayEnvelope(SyntheticChannelTelemetry, SyntheticMessageTelemetry, 2)
|
||||
|
||||
if result, err := scheduler.Enqueue(first); err != nil || result.Dropped {
|
||||
t.Fatalf("enqueue first result=%+v err=%v", result, err)
|
||||
}
|
||||
result, err := scheduler.Enqueue(second)
|
||||
if err != nil {
|
||||
t.Fatalf("enqueue second: %v", err)
|
||||
}
|
||||
if !result.Dropped || result.DroppedSequence != 1 {
|
||||
t.Fatalf("result = %+v, want dropped sequence 1", result)
|
||||
}
|
||||
dequeued, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue: %v", err)
|
||||
}
|
||||
if dequeued.Sequence != 2 {
|
||||
t.Fatalf("dequeued sequence = %d, want 2", dequeued.Sequence)
|
||||
}
|
||||
metrics := scheduler.SnapshotQueueMetrics()
|
||||
if metrics.Dropped != 1 || metrics.Enqueued != 2 {
|
||||
t.Fatalf("metrics = %+v, want one drop and two enqueues", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerRejectsFullReliableQueue(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
first := testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 1)
|
||||
second := testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 2)
|
||||
|
||||
if _, err := scheduler.Enqueue(first); err != nil {
|
||||
t.Fatalf("enqueue first: %v", err)
|
||||
}
|
||||
_, err := scheduler.Enqueue(second)
|
||||
if !errors.Is(err, ErrSyntheticRelayQueueFull) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticRelayQueueFull", err)
|
||||
}
|
||||
dequeued, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue: %v", err)
|
||||
}
|
||||
if dequeued.Sequence != 1 {
|
||||
t.Fatalf("dequeued sequence = %d, want 1", dequeued.Sequence)
|
||||
}
|
||||
metrics := scheduler.SnapshotQueueMetrics()
|
||||
if metrics.Dropped != 0 || metrics.Rejected != 1 {
|
||||
t.Fatalf("metrics = %+v, want no drop and one rejection", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerRejectsInvalidEnvelopes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
mutate func(*SyntheticEnvelope)
|
||||
want error
|
||||
}{
|
||||
{
|
||||
name: "wrong cluster",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.ClusterID = "cluster-2"
|
||||
},
|
||||
want: ErrClusterMismatch,
|
||||
},
|
||||
{
|
||||
name: "wrong node",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.To.NodeID = "node-x"
|
||||
},
|
||||
want: ErrNodeMismatch,
|
||||
},
|
||||
{
|
||||
name: "unauthorized channel",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.Channel = "rdp_render"
|
||||
},
|
||||
want: ErrUnauthorizedChannel,
|
||||
},
|
||||
{
|
||||
name: "unsupported message",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.MessageType = "rdp.input"
|
||||
},
|
||||
want: ErrUnsupportedSyntheticMessage,
|
||||
},
|
||||
{
|
||||
name: "ttl exhausted",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.TTL = 0
|
||||
},
|
||||
want: ErrTTLExhausted,
|
||||
},
|
||||
{
|
||||
name: "loop detected",
|
||||
mutate: func(envelope *SyntheticEnvelope) {
|
||||
envelope.Visited = append(envelope.Visited, "node-r")
|
||||
},
|
||||
want: ErrLoopDetected,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
envelope := testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 1)
|
||||
tt.mutate(&envelope)
|
||||
_, err := scheduler.Enqueue(envelope)
|
||||
if !errors.Is(err, tt.want) {
|
||||
t.Fatalf("err = %v, want %v", err, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerDisabledRejects(t *testing.T) {
|
||||
scheduler := NewSyntheticRelayScheduler(SyntheticRelaySchedulerConfig{
|
||||
Enabled: false,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
|
||||
})
|
||||
_, err := scheduler.Enqueue(testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 1))
|
||||
if !errors.Is(err, ErrMeshRuntimeDisabled) {
|
||||
t.Fatalf("err = %v, want ErrMeshRuntimeDisabled", err)
|
||||
}
|
||||
if _, err := scheduler.Dequeue(); !errors.Is(err, ErrMeshRuntimeDisabled) {
|
||||
t.Fatalf("dequeue err = %v, want ErrMeshRuntimeDisabled", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerQueueDepthSnapshot(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
if _, err := scheduler.Enqueue(testRelayEnvelope(SyntheticChannelFabricControl, SyntheticMessageProbe, 1)); err != nil {
|
||||
t.Fatalf("enqueue fabric control: %v", err)
|
||||
}
|
||||
if _, err := scheduler.Enqueue(testRelayEnvelope(SyntheticChannelRouteControl, SyntheticMessageRouteHealth, 2)); err != nil {
|
||||
t.Fatalf("enqueue route control: %v", err)
|
||||
}
|
||||
metrics := scheduler.SnapshotQueueMetrics()
|
||||
if metrics.QueueDepths[SyntheticChannelFabricControl] != 1 {
|
||||
t.Fatalf("fabric_control depth = %d, want 1", metrics.QueueDepths[SyntheticChannelFabricControl])
|
||||
}
|
||||
if metrics.QueueDepths[SyntheticChannelRouteControl] != 1 {
|
||||
t.Fatalf("route_control depth = %d, want 1", metrics.QueueDepths[SyntheticChannelRouteControl])
|
||||
}
|
||||
}
|
||||
|
||||
func testRelayScheduler() *SyntheticRelayScheduler {
|
||||
return NewSyntheticRelayScheduler(SyntheticRelaySchedulerConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-r"},
|
||||
QueuePolicies: []SyntheticRelayQueuePolicy{
|
||||
{Channel: SyntheticChannelFabricControl, Capacity: 1, Droppable: false},
|
||||
{Channel: SyntheticChannelRouteControl, Capacity: 1, Droppable: false},
|
||||
{Channel: SyntheticChannelTelemetry, Capacity: 1, Droppable: true},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func testRelayEnvelope(channel string, messageType string, sequence uint64) SyntheticEnvelope {
|
||||
route := testRoute("route-relay-scheduler", []string{"node-a", "node-r", "node-b"})
|
||||
envelope := testEnvelope(route, "node-a", "node-r")
|
||||
envelope.Channel = channel
|
||||
envelope.MessageType = messageType
|
||||
envelope.Sequence = sequence
|
||||
return envelope
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,432 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type syntheticTestTransport struct {
|
||||
nodes map[string]*SyntheticRuntime
|
||||
}
|
||||
|
||||
func (t syntheticTestTransport) SendSynthetic(ctx context.Context, nextNodeID string, envelope SyntheticEnvelope) (SyntheticEnvelope, error) {
|
||||
next := t.nodes[nextNodeID]
|
||||
if next == nil {
|
||||
return SyntheticEnvelope{}, ErrSyntheticPeerUnavailable
|
||||
}
|
||||
return next.Receive(ctx, envelope)
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeDirectProbe(t *testing.T) {
|
||||
route := testRoute("route-direct", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
ack, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-direct")
|
||||
if err != nil {
|
||||
t.Fatalf("send probe: %v", err)
|
||||
}
|
||||
if ack.MessageType != SyntheticMessageProbeAck {
|
||||
t.Fatalf("MessageType = %q, want %q", ack.MessageType, SyntheticMessageProbeAck)
|
||||
}
|
||||
if ack.From.NodeID != "node-b" || ack.To.NodeID != "node-a" {
|
||||
t.Fatalf("unexpected ack peers: from=%+v to=%+v", ack.From, ack.To)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if len(payload.Path) != 2 || payload.Path[0] != "node-a" || payload.Path[1] != "node-b" {
|
||||
t.Fatalf("Path = %#v, want node-a -> node-b", payload.Path)
|
||||
}
|
||||
if nodeB.SnapshotMetrics().ProbeAcksCreated != 1 {
|
||||
t.Fatalf("ProbeAcksCreated = %d, want 1", nodeB.SnapshotMetrics().ProbeAcksCreated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeSingleRelayProbe(t *testing.T) {
|
||||
route := testRoute("route-relay", []string{"node-a", "node-r", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeR := testRuntime("node-r", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-r"] = nodeR
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
ack, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-relay")
|
||||
if err != nil {
|
||||
t.Fatalf("send probe: %v", err)
|
||||
}
|
||||
payload := decodeAckPayload(t, ack)
|
||||
if len(payload.Path) != 3 || payload.Path[0] != "node-a" || payload.Path[1] != "node-r" || payload.Path[2] != "node-b" {
|
||||
t.Fatalf("Path = %#v, want node-a -> node-r -> node-b", payload.Path)
|
||||
}
|
||||
if nodeR.SnapshotMetrics().ProbesForwarded != 1 {
|
||||
t.Fatalf("ProbesForwarded = %d, want 1", nodeR.SnapshotMetrics().ProbesForwarded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeDisabledRejectsProbe(t *testing.T) {
|
||||
route := testRoute("route-disabled", []string{"node-a", "node-b"})
|
||||
nodeA := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: false,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
})
|
||||
_, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-disabled")
|
||||
if !errors.Is(err, ErrMeshRuntimeDisabled) {
|
||||
t.Fatalf("err = %v, want ErrMeshRuntimeDisabled", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsWrongCluster(t *testing.T) {
|
||||
route := testRoute("route-wrong-cluster", []string{"node-a", "node-b"})
|
||||
nodeB := testRuntime("node-b", syntheticTestTransport{}, route)
|
||||
envelope := testEnvelope(route, "node-a", "node-b")
|
||||
envelope.ClusterID = "cluster-2"
|
||||
|
||||
_, err := nodeB.Receive(context.Background(), envelope)
|
||||
if !errors.Is(err, ErrClusterMismatch) {
|
||||
t.Fatalf("err = %v, want ErrClusterMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsWrongNode(t *testing.T) {
|
||||
route := testRoute("route-wrong-node", []string{"node-a", "node-b"})
|
||||
nodeB := testRuntime("node-b", syntheticTestTransport{}, route)
|
||||
envelope := testEnvelope(route, "node-a", "node-c")
|
||||
|
||||
_, err := nodeB.Receive(context.Background(), envelope)
|
||||
if !errors.Is(err, ErrNodeMismatch) {
|
||||
t.Fatalf("err = %v, want ErrNodeMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsUnauthorizedChannel(t *testing.T) {
|
||||
route := testRoute("route-unauthorized", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
|
||||
_, err := nodeA.SendProbe(context.Background(), route.RouteID, "rdp_render", "probe-unauthorized")
|
||||
if !errors.Is(err, ErrUnauthorizedChannel) {
|
||||
t.Fatalf("err = %v, want ErrUnauthorizedChannel", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsExpiredRoute(t *testing.T) {
|
||||
route := testRoute("route-expired", []string{"node-a", "node-b"})
|
||||
route.ExpiresAt = time.Now().UTC().Add(-time.Minute)
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
|
||||
_, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-expired")
|
||||
if !errors.Is(err, ErrRouteExpired) {
|
||||
t.Fatalf("err = %v, want ErrRouteExpired", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsTTLExhaustion(t *testing.T) {
|
||||
route := testRoute("route-ttl", []string{"node-a", "node-r", "node-b"})
|
||||
route.MaxTTL = 1
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeR := testRuntime("node-r", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-r"] = nodeR
|
||||
|
||||
_, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-ttl")
|
||||
if !errors.Is(err, ErrTTLExhausted) {
|
||||
t.Fatalf("err = %v, want ErrTTLExhausted", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsLoop(t *testing.T) {
|
||||
route := testRoute("route-loop", []string{"node-a", "node-b"})
|
||||
nodeB := testRuntime("node-b", syntheticTestTransport{}, route)
|
||||
envelope := testEnvelope(route, "node-a", "node-b")
|
||||
envelope.Visited = []string{"node-a", "node-b"}
|
||||
|
||||
_, err := nodeB.Receive(context.Background(), envelope)
|
||||
if !errors.Is(err, ErrLoopDetected) {
|
||||
t.Fatalf("err = %v, want ErrLoopDetected", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRejectsUnavailablePeer(t *testing.T) {
|
||||
route := testRoute("route-missing-peer", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}, route)
|
||||
|
||||
_, err := nodeA.SendProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-missing-peer")
|
||||
if !errors.Is(err, ErrSyntheticPeerUnavailable) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticPeerUnavailable", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteHealthProbeRecordsSuccess(t *testing.T) {
|
||||
route := testRoute("route-health", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendRouteHealthProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-health")
|
||||
if err != nil {
|
||||
t.Fatalf("send route health probe: %v", err)
|
||||
}
|
||||
if result.Ack.MessageType != SyntheticMessageRouteHealthAck {
|
||||
t.Fatalf("MessageType = %q, want %q", result.Ack.MessageType, SyntheticMessageRouteHealthAck)
|
||||
}
|
||||
if result.FallbackUsed {
|
||||
t.Fatal("FallbackUsed = true, want false")
|
||||
}
|
||||
observation, ok := nodeA.SnapshotRouteObservation(route.RouteID)
|
||||
if !ok {
|
||||
t.Fatal("route observation missing")
|
||||
}
|
||||
if observation.State != SyntheticRouteStateHealthy || observation.SuccessCount != 1 {
|
||||
t.Fatalf("observation = %+v, want healthy success", observation)
|
||||
}
|
||||
if observation.PolicyVersion != "policy-v1" || observation.PeerDirectoryVersion != "peers-v1" || observation.RouteVersion != "route-v1" {
|
||||
t.Fatalf("observation versions = %+v", observation)
|
||||
}
|
||||
metrics := nodeA.SnapshotMetrics()
|
||||
if metrics.RouteHealthProbesSent != 1 || metrics.RouteDeliveriesSucceeded != 1 {
|
||||
t.Fatalf("metrics = %+v, want health probe success", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteHealthUsesDedicatedRouteConfig(t *testing.T) {
|
||||
base := testRoute("route-effective-health", []string{"node-a", "node-old", "node-b"})
|
||||
effective := testRoute("route-effective-health", []string{"node-a", "node-new", "node-b"})
|
||||
effective.RouteVersion = "decision-v1"
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntimeWithRouteHealth("node-a", transport, []SyntheticRoute{base}, []SyntheticRoute{effective})
|
||||
nodeOld := testRuntimeWithRouteHealth("node-old", transport, []SyntheticRoute{base}, []SyntheticRoute{effective})
|
||||
nodeNew := testRuntimeWithRouteHealth("node-new", transport, []SyntheticRoute{base}, []SyntheticRoute{effective})
|
||||
nodeB := testRuntimeWithRouteHealth("node-b", transport, []SyntheticRoute{base}, []SyntheticRoute{effective})
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-old"] = nodeOld
|
||||
transport.nodes["node-new"] = nodeNew
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
health, err := nodeA.SendRouteHealthProbe(context.Background(), base.RouteID, SyntheticChannelFabricControl, "probe-health-effective")
|
||||
if err != nil {
|
||||
t.Fatalf("send route health probe: %v", err)
|
||||
}
|
||||
healthPayload := decodeAckPayload(t, health.Ack)
|
||||
if got, want := healthPayload.Path, []string{"node-a", "node-new", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("route health path = %v, want %v", got, want)
|
||||
}
|
||||
if nodeNew.SnapshotMetrics().ProbesForwarded != 1 {
|
||||
t.Fatalf("node-new forwarded = %d, want 1", nodeNew.SnapshotMetrics().ProbesForwarded)
|
||||
}
|
||||
if nodeOld.SnapshotMetrics().ProbesForwarded != 0 {
|
||||
t.Fatalf("node-old forwarded = %d, want 0 before regular probe", nodeOld.SnapshotMetrics().ProbesForwarded)
|
||||
}
|
||||
observation, ok := nodeA.SnapshotRouteObservation(base.RouteID)
|
||||
if !ok || observation.RouteVersion != "decision-v1" {
|
||||
t.Fatalf("route health observation = %+v, want decision route version", observation)
|
||||
}
|
||||
|
||||
probe, err := nodeA.SendProbe(context.Background(), base.RouteID, SyntheticChannelFabricControl, "probe-regular")
|
||||
if err != nil {
|
||||
t.Fatalf("send regular probe: %v", err)
|
||||
}
|
||||
probePayload := decodeAckPayload(t, probe)
|
||||
if got, want := probePayload.Path, []string{"node-a", "node-old", "node-b"}; !sameStrings(got, want) {
|
||||
t.Fatalf("regular probe path = %v, want %v", got, want)
|
||||
}
|
||||
if nodeOld.SnapshotMetrics().ProbesForwarded != 1 {
|
||||
t.Fatalf("node-old forwarded = %d, want 1 after regular probe", nodeOld.SnapshotMetrics().ProbesForwarded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteHealthUsesFallbackWhenPreferredUnavailable(t *testing.T) {
|
||||
preferred := testRoute("route-preferred", []string{"node-a", "node-r", "node-b"})
|
||||
fallback := testRoute("route-fallback", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, preferred, fallback)
|
||||
nodeB := testRuntime("node-b", transport, preferred, fallback)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendRouteHealthProbeWithFallback(
|
||||
context.Background(),
|
||||
preferred.RouteID,
|
||||
[]string{fallback.RouteID},
|
||||
SyntheticChannelFabricControl,
|
||||
"probe-fallback",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("send route health probe with fallback: %v", err)
|
||||
}
|
||||
if !result.FallbackUsed {
|
||||
t.Fatal("FallbackUsed = false, want true")
|
||||
}
|
||||
if result.SelectedRouteID != fallback.RouteID {
|
||||
t.Fatalf("SelectedRouteID = %q, want %q", result.SelectedRouteID, fallback.RouteID)
|
||||
}
|
||||
preferredObservation, ok := nodeA.SnapshotRouteObservation(preferred.RouteID)
|
||||
if !ok {
|
||||
t.Fatal("preferred route observation missing")
|
||||
}
|
||||
if preferredObservation.State != SyntheticRouteStateFailed || preferredObservation.FailureCount != 1 {
|
||||
t.Fatalf("preferred observation = %+v, want failed", preferredObservation)
|
||||
}
|
||||
fallbackObservation, ok := nodeA.SnapshotRouteObservation(fallback.RouteID)
|
||||
if !ok {
|
||||
t.Fatal("fallback route observation missing")
|
||||
}
|
||||
if fallbackObservation.State != SyntheticRouteStateHealthy || fallbackObservation.SuccessCount != 1 {
|
||||
t.Fatalf("fallback observation = %+v, want healthy", fallbackObservation)
|
||||
}
|
||||
metrics := nodeA.SnapshotMetrics()
|
||||
if metrics.FallbackRoutesUsed != 1 || metrics.WarmRoutesPromoted != 1 || metrics.RouteDeliveriesFailed != 1 {
|
||||
t.Fatalf("metrics = %+v, want fallback promotion and one failed delivery", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteCacheInvalidatesOnVersionChange(t *testing.T) {
|
||||
route := testRoute("route-cache", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
if _, err := nodeA.SendRouteHealthProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-cache"); err != nil {
|
||||
t.Fatalf("send route health probe: %v", err)
|
||||
}
|
||||
if _, ok := nodeA.SnapshotRouteObservation(route.RouteID); !ok {
|
||||
t.Fatal("route observation missing before invalidation")
|
||||
}
|
||||
|
||||
invalidated := nodeA.InvalidateRouteCache("policy_changed", SyntheticRouteCacheVersion{PolicyVersion: "policy-v2"})
|
||||
if invalidated != 1 {
|
||||
t.Fatalf("invalidated = %d, want 1", invalidated)
|
||||
}
|
||||
if _, ok := nodeA.SnapshotRouteObservation(route.RouteID); ok {
|
||||
t.Fatal("route observation still present after invalidation")
|
||||
}
|
||||
if nodeA.SnapshotMetrics().RouteCacheInvalidations != 1 {
|
||||
t.Fatalf("RouteCacheInvalidations = %d, want 1", nodeA.SnapshotMetrics().RouteCacheInvalidations)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteCacheKeepsCurrentVersion(t *testing.T) {
|
||||
route := testRoute("route-cache-current", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
if _, err := nodeA.SendRouteHealthProbe(context.Background(), route.RouteID, SyntheticChannelFabricControl, "probe-cache-current"); err != nil {
|
||||
t.Fatalf("send route health probe: %v", err)
|
||||
}
|
||||
invalidated := nodeA.InvalidateRouteCache("same_versions", SyntheticRouteCacheVersion{
|
||||
RouteVersion: "route-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
})
|
||||
if invalidated != 0 {
|
||||
t.Fatalf("invalidated = %d, want 0", invalidated)
|
||||
}
|
||||
if _, ok := nodeA.SnapshotRouteObservation(route.RouteID); !ok {
|
||||
t.Fatal("route observation missing after same-version invalidation")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeRouteHealthDisabledRejects(t *testing.T) {
|
||||
route := testRoute("route-health-disabled", []string{"node-a", "node-b"})
|
||||
nodeA := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: false,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
})
|
||||
|
||||
_, err := nodeA.SendRouteHealthProbeWithFallback(
|
||||
context.Background(),
|
||||
route.RouteID,
|
||||
[]string{"route-fallback"},
|
||||
SyntheticChannelFabricControl,
|
||||
"probe-disabled-health",
|
||||
)
|
||||
if !errors.Is(err, ErrMeshRuntimeDisabled) {
|
||||
t.Fatalf("err = %v, want ErrMeshRuntimeDisabled", err)
|
||||
}
|
||||
}
|
||||
|
||||
func testRuntime(nodeID string, transport SyntheticTransport, routes ...SyntheticRoute) *SyntheticRuntime {
|
||||
return NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: nodeID},
|
||||
Routes: routes,
|
||||
Transport: transport,
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
})
|
||||
}
|
||||
|
||||
func testRuntimeWithRouteHealth(nodeID string, transport SyntheticTransport, routes []SyntheticRoute, routeHealthRoutes []SyntheticRoute) *SyntheticRuntime {
|
||||
return NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: nodeID},
|
||||
Routes: routes,
|
||||
RouteHealthRoutes: routeHealthRoutes,
|
||||
Transport: transport,
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
})
|
||||
}
|
||||
|
||||
func testRoute(routeID string, hops []string) SyntheticRoute {
|
||||
return SyntheticRoute{
|
||||
RouteID: routeID,
|
||||
ClusterID: "cluster-1",
|
||||
SourceNodeID: hops[0],
|
||||
DestinationNodeID: hops[len(hops)-1],
|
||||
Hops: hops,
|
||||
AllowedChannels: []string{SyntheticChannelFabricControl},
|
||||
ExpiresAt: time.Now().UTC().Add(time.Hour),
|
||||
MaxTTL: 8,
|
||||
MaxHops: 8,
|
||||
RouteVersion: "route-v1",
|
||||
PolicyVersion: "policy-v1",
|
||||
PeerDirectoryVersion: "peers-v1",
|
||||
}
|
||||
}
|
||||
|
||||
func testEnvelope(route SyntheticRoute, fromNodeID string, toNodeID string) SyntheticEnvelope {
|
||||
payload, _ := json.Marshal(SyntheticProbePayload{
|
||||
ProbeID: "probe-test",
|
||||
SentAt: time.Now().UTC(),
|
||||
})
|
||||
return SyntheticEnvelope{
|
||||
ProtocolVersion: ProtocolVersion,
|
||||
RouteID: route.RouteID,
|
||||
ClusterID: route.ClusterID,
|
||||
From: PeerIdentity{ClusterID: route.ClusterID, NodeID: fromNodeID},
|
||||
To: PeerIdentity{ClusterID: route.ClusterID, NodeID: toNodeID},
|
||||
Channel: SyntheticChannelFabricControl,
|
||||
MessageType: SyntheticMessageProbe,
|
||||
TTL: 8,
|
||||
HopCount: 1,
|
||||
Visited: []string{fromNodeID},
|
||||
Sequence: 1,
|
||||
SentAt: time.Now().UTC(),
|
||||
Payload: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func decodeAckPayload(t *testing.T, envelope SyntheticEnvelope) SyntheticProbeAckPayload {
|
||||
t.Helper()
|
||||
var payload SyntheticProbeAckPayload
|
||||
if err := json.Unmarshal(envelope.Payload, &payload); err != nil {
|
||||
t.Fatalf("decode ack payload: %v", err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
@@ -0,0 +1,235 @@
|
||||
package mesh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSyntheticRuntimeTestServiceDirectRoute(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-direct", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, testServiceRequest("request-direct", "hello"))
|
||||
if err != nil {
|
||||
t.Fatalf("send test service: %v", err)
|
||||
}
|
||||
if result.Ack.MessageType != SyntheticMessageTestServiceAck {
|
||||
t.Fatalf("MessageType = %q, want %q", result.Ack.MessageType, SyntheticMessageTestServiceAck)
|
||||
}
|
||||
if result.Response.EchoPayload != "hello" {
|
||||
t.Fatalf("EchoPayload = %q, want hello", result.Response.EchoPayload)
|
||||
}
|
||||
if len(result.Response.Path) != 2 || result.Response.Path[0] != "node-a" || result.Response.Path[1] != "node-b" {
|
||||
t.Fatalf("Path = %#v, want node-a -> node-b", result.Response.Path)
|
||||
}
|
||||
metrics := nodeA.SnapshotMetrics()
|
||||
if metrics.TestServiceRequestsSent != 1 || metrics.TestServiceDeliveriesSucceeded != 1 {
|
||||
t.Fatalf("metrics = %+v, want one test service success", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceSingleRelayRoute(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-relay", []string{"node-a", "node-r", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, route)
|
||||
nodeR := testRuntime("node-r", transport, route)
|
||||
nodeB := testRuntime("node-b", transport, route)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-r"] = nodeR
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, testServiceRequest("request-relay", "relay"))
|
||||
if err != nil {
|
||||
t.Fatalf("send test service: %v", err)
|
||||
}
|
||||
if len(result.Response.Path) != 3 || result.Response.Path[0] != "node-a" || result.Response.Path[1] != "node-r" || result.Response.Path[2] != "node-b" {
|
||||
t.Fatalf("Path = %#v, want node-a -> node-r -> node-b", result.Response.Path)
|
||||
}
|
||||
if nodeR.SnapshotMetrics().ProbesForwarded != 1 {
|
||||
t.Fatalf("ProbesForwarded = %d, want 1", nodeR.SnapshotMetrics().ProbesForwarded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceUsesForcedFallback(t *testing.T) {
|
||||
preferred := testServiceRoute("route-test-service-preferred", []string{"node-a", "node-r", "node-b"})
|
||||
fallback := testServiceRoute("route-test-service-fallback", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := testRuntime("node-a", transport, preferred, fallback)
|
||||
nodeB := testRuntime("node-b", transport, preferred, fallback)
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendTestServiceWithFallback(
|
||||
context.Background(),
|
||||
preferred.RouteID,
|
||||
[]string{fallback.RouteID},
|
||||
SyntheticChannelRouteControl,
|
||||
testServiceRequest("request-fallback", "fallback"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("send test service with fallback: %v", err)
|
||||
}
|
||||
if !result.FallbackUsed {
|
||||
t.Fatal("FallbackUsed = false, want true")
|
||||
}
|
||||
if result.SelectedRouteID != fallback.RouteID {
|
||||
t.Fatalf("SelectedRouteID = %q, want %q", result.SelectedRouteID, fallback.RouteID)
|
||||
}
|
||||
if result.Response.EchoPayload != "fallback" {
|
||||
t.Fatalf("EchoPayload = %q, want fallback", result.Response.EchoPayload)
|
||||
}
|
||||
metrics := nodeA.SnapshotMetrics()
|
||||
if metrics.TestServiceFallbacksUsed != 1 || metrics.TestServiceDeliveriesFailed != 1 || metrics.TestServiceDeliveriesSucceeded != 1 {
|
||||
t.Fatalf("metrics = %+v, want fallback success with one preferred failure", metrics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceRejectsWrongOrganization(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-wrong-org", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
request := testServiceRequest("request-wrong-org", "hello")
|
||||
request.OrganizationID = "org-other"
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, request)
|
||||
if !errors.Is(err, ErrSyntheticOrganizationMismatch) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticOrganizationMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceRejectsUnsupportedService(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-unsupported", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
request := testServiceRequest("request-unsupported", "hello")
|
||||
request.ServiceType = "rdp"
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, request)
|
||||
if !errors.Is(err, ErrUnsupportedSyntheticService) {
|
||||
t.Fatalf("err = %v, want ErrUnsupportedSyntheticService", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceRejectsOversizedPayload(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-oversized", []string{"node-a", "node-b"})
|
||||
nodeA := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
MaxTestPayloadBytes: 4,
|
||||
})
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, testServiceRequest("request-oversized", "12345"))
|
||||
if !errors.Is(err, ErrSyntheticPayloadTooLarge) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticPayloadTooLarge", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceRejectsUnauthorizedChannel(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-channel", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelFabricControl, testServiceRequest("request-channel", "hello"))
|
||||
if !errors.Is(err, ErrUnauthorizedChannel) {
|
||||
t.Fatalf("err = %v, want ErrUnauthorizedChannel", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceDisabledRejects(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-disabled", []string{"node-a", "node-b"})
|
||||
nodeA := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: false,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
})
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, testServiceRequest("request-disabled", "hello"))
|
||||
if !errors.Is(err, ErrMeshRuntimeDisabled) {
|
||||
t.Fatalf("err = %v, want ErrMeshRuntimeDisabled", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRelaySchedulerAcceptsTestServiceMessage(t *testing.T) {
|
||||
scheduler := testRelayScheduler()
|
||||
envelope := testRelayEnvelope(SyntheticChannelRouteControl, SyntheticMessageTestService, 42)
|
||||
envelope.Payload = mustMarshalTestServiceRequest(testServiceRequest("request-relay-scheduler", "hello"))
|
||||
|
||||
if _, err := scheduler.Enqueue(envelope); err != nil {
|
||||
t.Fatalf("enqueue test service: %v", err)
|
||||
}
|
||||
dequeued, err := scheduler.Dequeue()
|
||||
if err != nil {
|
||||
t.Fatalf("dequeue test service: %v", err)
|
||||
}
|
||||
if dequeued.MessageType != SyntheticMessageTestService {
|
||||
t.Fatalf("MessageType = %q, want %q", dequeued.MessageType, SyntheticMessageTestService)
|
||||
}
|
||||
}
|
||||
|
||||
func testServiceRoute(routeID string, hops []string) SyntheticRoute {
|
||||
route := testRoute(routeID, hops)
|
||||
route.AllowedChannels = []string{SyntheticChannelRouteControl}
|
||||
return route
|
||||
}
|
||||
|
||||
func testServiceRequest(requestID string, payload string) SyntheticTestServiceRequest {
|
||||
return SyntheticTestServiceRequest{
|
||||
RequestID: requestID,
|
||||
OrganizationID: SyntheticDefaultTestOrganizationID,
|
||||
ServiceType: SyntheticTestServiceType,
|
||||
Payload: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func mustMarshalTestServiceRequest(request SyntheticTestServiceRequest) []byte {
|
||||
payload, err := json.Marshal(request)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceRejectsMissingRequestID(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-missing-request", []string{"node-a", "node-b"})
|
||||
nodeA := testRuntime("node-a", syntheticTestTransport{}, route)
|
||||
request := testServiceRequest("", "hello")
|
||||
|
||||
_, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, request)
|
||||
if !errors.Is(err, ErrSyntheticRequestInvalid) {
|
||||
t.Fatalf("err = %v, want ErrSyntheticRequestInvalid", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyntheticRuntimeTestServiceAllowsMaxPayloadBoundary(t *testing.T) {
|
||||
route := testServiceRoute("route-test-service-max", []string{"node-a", "node-b"})
|
||||
transport := syntheticTestTransport{nodes: map[string]*SyntheticRuntime{}}
|
||||
nodeA := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-a"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
Transport: transport,
|
||||
MaxTestPayloadBytes: 8,
|
||||
})
|
||||
nodeB := NewSyntheticRuntime(SyntheticRuntimeConfig{
|
||||
Enabled: true,
|
||||
Local: PeerIdentity{ClusterID: "cluster-1", NodeID: "node-b"},
|
||||
Routes: []SyntheticRoute{route},
|
||||
Transport: transport,
|
||||
MaxTestPayloadBytes: 8,
|
||||
})
|
||||
transport.nodes["node-a"] = nodeA
|
||||
transport.nodes["node-b"] = nodeB
|
||||
|
||||
result, err := nodeA.SendTestService(context.Background(), route.RouteID, SyntheticChannelRouteControl, testServiceRequest("request-max", strings.Repeat("a", 8)))
|
||||
if err != nil {
|
||||
t.Fatalf("send test service: %v", err)
|
||||
}
|
||||
if result.Response.EchoPayload != strings.Repeat("a", 8) {
|
||||
t.Fatalf("EchoPayload = %q", result.Response.EchoPayload)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user