|
|
|
@@ -22,6 +22,7 @@ import (
|
|
|
|
|
"runtime"
|
|
|
|
|
"sort"
|
|
|
|
|
"strings"
|
|
|
|
|
"sync"
|
|
|
|
|
"sync/atomic"
|
|
|
|
|
"syscall"
|
|
|
|
|
"time"
|
|
|
|
@@ -363,6 +364,7 @@ type syntheticMeshState struct {
|
|
|
|
|
VPNFabricSessionPeers *mesh.FabricSessionPeerManager
|
|
|
|
|
VPNFabricTransport *mesh.WebSocketFabricTransport
|
|
|
|
|
VPNFabricSessionDialStats *vpnFabricSessionDialStats
|
|
|
|
|
VPNFabricEndpointObservations *vpnFabricEndpointObservationStore
|
|
|
|
|
PeerEndpoints map[string]string
|
|
|
|
|
PeerEndpointCandidates map[string][]mesh.PeerEndpointCandidate
|
|
|
|
|
VPNGateway *vpnruntime.Gateway
|
|
|
|
@@ -419,10 +421,65 @@ type vpnFabricSessionDialStats struct {
|
|
|
|
|
LastFailureUnixSec atomic.Int64
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type vpnFabricEndpointObservationStore struct {
|
|
|
|
|
mu sync.Mutex
|
|
|
|
|
observations map[string]mesh.EndpointCandidateHealthObservation
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func newVPNFabricSessionDialStats() *vpnFabricSessionDialStats {
|
|
|
|
|
return &vpnFabricSessionDialStats{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func newVPNFabricEndpointObservationStore() *vpnFabricEndpointObservationStore {
|
|
|
|
|
return &vpnFabricEndpointObservationStore{
|
|
|
|
|
observations: map[string]mesh.EndpointCandidateHealthObservation{},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *vpnFabricEndpointObservationStore) Snapshot() map[string]mesh.EndpointCandidateHealthObservation {
|
|
|
|
|
if s == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
s.mu.Lock()
|
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
out := make(map[string]mesh.EndpointCandidateHealthObservation, len(s.observations))
|
|
|
|
|
for key, value := range s.observations {
|
|
|
|
|
out[key] = value
|
|
|
|
|
}
|
|
|
|
|
return out
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *vpnFabricEndpointObservationStore) ObserveSuccess(endpointID string, latency time.Duration) {
|
|
|
|
|
if s == nil || strings.TrimSpace(endpointID) == "" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s.mu.Lock()
|
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
observation := s.observations[endpointID]
|
|
|
|
|
observation.EndpointID = endpointID
|
|
|
|
|
observation.SuccessCount++
|
|
|
|
|
observation.LastLatencyMs = latency.Milliseconds()
|
|
|
|
|
observation.ReliabilityScore = 100
|
|
|
|
|
observation.LastFailureReason = ""
|
|
|
|
|
observation.ObservedAt = time.Now().UTC()
|
|
|
|
|
s.observations[endpointID] = observation
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *vpnFabricEndpointObservationStore) ObserveFailure(endpointID string, reason string) {
|
|
|
|
|
if s == nil || strings.TrimSpace(endpointID) == "" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s.mu.Lock()
|
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
observation := s.observations[endpointID]
|
|
|
|
|
observation.EndpointID = endpointID
|
|
|
|
|
observation.FailureCount++
|
|
|
|
|
observation.LastFailureReason = strings.TrimSpace(reason)
|
|
|
|
|
observation.ReliabilityScore = 35
|
|
|
|
|
observation.ObservedAt = time.Now().UTC()
|
|
|
|
|
s.observations[endpointID] = observation
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func fabricTransportLabelIsQUIC(label string) bool {
|
|
|
|
|
switch strings.ToLower(strings.TrimSpace(label)) {
|
|
|
|
|
case "quic", "direct_quic", "udp_quic", "quic_udp":
|
|
|
|
@@ -945,6 +1002,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
|
|
|
|
|
VPNFabricSessionPeers: vpnFabricSessionPeers,
|
|
|
|
|
VPNFabricTransport: mesh.NewWebSocketFabricTransport(vpnFabricSessionPeers),
|
|
|
|
|
VPNFabricSessionDialStats: newVPNFabricSessionDialStats(),
|
|
|
|
|
VPNFabricEndpointObservations: newVPNFabricEndpointObservationStore(),
|
|
|
|
|
PeerEndpoints: copyStringMap(peerEndpoints),
|
|
|
|
|
PeerEndpointCandidates: copyPeerEndpointCandidatesMap(loadedConfig.PeerEndpointCandidates),
|
|
|
|
|
VPNGateway: vpnGateway,
|
|
|
|
@@ -1841,6 +1899,9 @@ func applyRefreshedSyntheticMeshConfig(ctx context.Context, cfg config.Config, i
|
|
|
|
|
if meshState.VPNFabricSessionDialStats == nil {
|
|
|
|
|
meshState.VPNFabricSessionDialStats = newVPNFabricSessionDialStats()
|
|
|
|
|
}
|
|
|
|
|
if meshState.VPNFabricEndpointObservations == nil {
|
|
|
|
|
meshState.VPNFabricEndpointObservations = newVPNFabricEndpointObservationStore()
|
|
|
|
|
}
|
|
|
|
|
meshState.PeerEndpoints = copyStringMap(loadedConfig.PeerEndpoints)
|
|
|
|
|
meshState.PeerEndpointCandidates = copyPeerEndpointCandidatesMap(loadedConfig.PeerEndpointCandidates)
|
|
|
|
|
if productionForwardingEnabled {
|
|
|
|
@@ -2776,6 +2837,9 @@ func heartbeatPayload(cfg config.Config, identity state.Identity, meshState *syn
|
|
|
|
|
if meshState != nil && meshState.VPNFabricSessionDialStats != nil {
|
|
|
|
|
report["dial_stats"] = meshState.VPNFabricSessionDialStats.Report(observedAt)
|
|
|
|
|
}
|
|
|
|
|
if meshState != nil && meshState.VPNFabricEndpointObservations != nil {
|
|
|
|
|
report["endpoint_observations"] = meshState.VPNFabricEndpointObservations.Snapshot()
|
|
|
|
|
}
|
|
|
|
|
payload.Metadata["vpn_fabric_session_transport_report"] = report
|
|
|
|
|
payload.Capabilities["vpn_fabric_session_transport"] = true
|
|
|
|
|
payload.Capabilities["vpn_packet_batch_binary_frames"] = true
|
|
|
|
@@ -4769,6 +4833,9 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
|
|
|
|
if meshState.VPNFabricSessionDialStats == nil {
|
|
|
|
|
meshState.VPNFabricSessionDialStats = newVPNFabricSessionDialStats()
|
|
|
|
|
}
|
|
|
|
|
if meshState.VPNFabricEndpointObservations == nil {
|
|
|
|
|
meshState.VPNFabricEndpointObservations = newVPNFabricEndpointObservationStore()
|
|
|
|
|
}
|
|
|
|
|
meshState.VPNFabricSessionDialStats.Attempts.Add(1)
|
|
|
|
|
if meshState.VPNFabricSessionPeers == nil {
|
|
|
|
|
meshState.VPNFabricSessionPeers = mesh.NewFabricSessionPeerManager()
|
|
|
|
@@ -4778,6 +4845,7 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
|
|
|
|
}
|
|
|
|
|
token := fabricSessionGatewayToken(identity, assignment, nextHop)
|
|
|
|
|
for index, target := range targets {
|
|
|
|
|
startedAt := time.Now()
|
|
|
|
|
dialCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
|
|
|
|
target.PeerID = nextHop
|
|
|
|
|
target.Token = token
|
|
|
|
@@ -4789,6 +4857,7 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
|
|
|
|
if err != nil {
|
|
|
|
|
cancel()
|
|
|
|
|
meshState.VPNFabricSessionDialStats.ObserveCandidateFailure("transport_select_failed")
|
|
|
|
|
meshState.VPNFabricEndpointObservations.ObserveFailure(target.EndpointID, "transport_select_failed")
|
|
|
|
|
log.Printf("vpn fabric session candidate skipped: vpn_connection_id=%s next_hop=%s candidate=%d endpoint=%s transport=%s reason=transport_select_failed error=%v", assignment.VPNConnectionID, nextHop, index, target.Endpoint, target.Transport, err)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
@@ -4796,6 +4865,7 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
|
|
|
|
if err != nil {
|
|
|
|
|
cancel()
|
|
|
|
|
meshState.VPNFabricSessionDialStats.ObserveCandidateFailure("session_open_failed")
|
|
|
|
|
meshState.VPNFabricEndpointObservations.ObserveFailure(selectedTarget.EndpointID, "session_open_failed")
|
|
|
|
|
log.Printf("vpn fabric session candidate skipped: vpn_connection_id=%s next_hop=%s candidate=%d endpoint=%s transport=%s reason=session_open_failed error=%v", assignment.VPNConnectionID, nextHop, index, selectedTarget.Endpoint, selectedTarget.Transport, err)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
@@ -4811,11 +4881,13 @@ func fabricSessionGatewayTransportForAssignment(ctx context.Context, identity st
|
|
|
|
|
cancel()
|
|
|
|
|
_ = session.Close()
|
|
|
|
|
meshState.VPNFabricSessionDialStats.ObserveCandidateFailure("stream_open_failed")
|
|
|
|
|
meshState.VPNFabricEndpointObservations.ObserveFailure(selectedTarget.EndpointID, "stream_open_failed")
|
|
|
|
|
log.Printf("vpn fabric session candidate skipped: vpn_connection_id=%s next_hop=%s candidate=%d endpoint=%s transport=%s reason=stream_open_failed error=%v", assignment.VPNConnectionID, nextHop, index, selectedTarget.Endpoint, selectedTarget.Transport, err)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
cancel()
|
|
|
|
|
meshState.VPNFabricSessionDialStats.ObserveSelected(selectedTarget)
|
|
|
|
|
meshState.VPNFabricEndpointObservations.ObserveSuccess(selectedTarget.EndpointID, time.Since(startedAt))
|
|
|
|
|
log.Printf("vpn fabric session transport selected: vpn_connection_id=%s next_hop=%s candidate=%d endpoint=%s transport=%s pinned_cert=%t fallback_candidates=%d", assignment.VPNConnectionID, nextHop, index, selectedTarget.Endpoint, selectedTarget.Transport, selectedTarget.PeerCertSHA256 != "", len(targets)-index-1)
|
|
|
|
|
return &vpnruntime.FabricSessionPacketTransport{
|
|
|
|
|
Sender: session,
|
|
|
|
@@ -4852,6 +4924,8 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
|
|
|
|
|
ChannelClass: mesh.SyntheticChannelFabricControl,
|
|
|
|
|
Now: time.Now().UTC(),
|
|
|
|
|
MaxVerificationAge: 5 * time.Minute,
|
|
|
|
|
Observations: meshState.VPNFabricEndpointObservations.Snapshot(),
|
|
|
|
|
MaxObservationAge: 5 * time.Minute,
|
|
|
|
|
})
|
|
|
|
|
for _, item := range ranked {
|
|
|
|
|
endpoint := strings.TrimRight(strings.TrimSpace(item.Candidate.Address), "/")
|
|
|
|
@@ -4864,6 +4938,7 @@ func vpnFabricSessionTargets(meshState *syntheticMeshState, nextHop string) []me
|
|
|
|
|
}
|
|
|
|
|
seen[key] = struct{}{}
|
|
|
|
|
out = append(out, mesh.FabricTransportTarget{
|
|
|
|
|
EndpointID: item.Candidate.EndpointID,
|
|
|
|
|
Endpoint: endpoint,
|
|
|
|
|
Transport: item.Candidate.Transport,
|
|
|
|
|
PeerCertSHA256: endpointCandidateTLSCertSHA256(item.Candidate),
|
|
|
|
|