This commit is contained in:
2026-05-14 23:30:34 +03:00
parent 26cb65e936
commit 04c46042d9
239 changed files with 34102 additions and 438 deletions
+146 -53
View File
@@ -4015,8 +4015,8 @@ func (s *Service) IssueFabricServiceChannelLease(ctx context.Context, input Issu
if ttl <= 0 {
ttl = time.Minute
}
if ttl > 5*time.Minute {
ttl = 5 * time.Minute
if ttl > 6*time.Hour {
ttl = 6 * time.Hour
}
now := s.now().UTC()
expiresAt := now.Add(ttl)
@@ -4031,6 +4031,9 @@ func (s *Service) IssueFabricServiceChannelLease(ctx context.Context, input Issu
return FabricServiceChannelLease{}, err
}
poolPolicy := fabricServiceChannelPoolPolicyFromCluster(cluster)
if input.BackendFallbackAllowed != nil {
poolPolicy.BackendFallbackAllowed = *input.BackendFallbackAllowed
}
entryNodeIDs := fabricServiceChannelEffectivePool(input.EntryNodeIDs, poolPolicy.EntryPoolNodeIDs)
exitNodeIDs := fabricServiceChannelEffectivePool(input.ExitNodeIDs, poolPolicy.ExitPoolNodeIDs)
if len(entryNodeIDs) == 0 || len(exitNodeIDs) == 0 {
@@ -7303,7 +7306,9 @@ func (s *Service) GetNodeSyntheticMeshConfig(ctx context.Context, input GetNodeS
if feedback, ok := serviceChannelFeedback[route.RouteID]; ok && feedback.Fenced {
replacementDecision := s.serviceChannelRouteReplacementDecision(input, route, intents, serviceChannelFeedback, cfg.ConfigVersion)
routePathDecisions = append(routePathDecisions, replacementDecision)
continue
if replacementDecision.DecisionSource != "service_channel_feedback_no_alternate_keep_primary" {
continue
}
}
reportedPeers, reportedCandidates, err := s.reportedEndpointConfig(ctx, input.ClusterID, input.NodeID, route.Hops, localPerspective)
if err != nil {
@@ -8700,6 +8705,98 @@ func (s *Service) RenewNodeVPNAssignmentLease(ctx context.Context, input RenewNo
return item, nil
}
func (s *Service) AcquireNodeVPNAssignmentLease(ctx context.Context, input AcquireNodeVPNAssignmentLeaseInput) (VPNConnectionLease, error) {
input.ClusterID = strings.TrimSpace(input.ClusterID)
input.VPNConnectionID = strings.TrimSpace(input.VPNConnectionID)
input.OwnerNodeID = strings.TrimSpace(input.OwnerNodeID)
if input.ClusterID == "" || input.VPNConnectionID == "" || input.OwnerNodeID == "" {
return VPNConnectionLease{}, ErrInvalidPayload
}
conn, err := s.store.GetVPNConnection(ctx, input.ClusterID, input.VPNConnectionID)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNConnection
}
if err != nil {
return VPNConnectionLease{}, err
}
if conn.Mode != VPNConnectionModeSingleActive || conn.DesiredState != VPNConnectionDesiredEnabled {
return VPNConnectionLease{}, errors.New("vpn connection must be enabled single_active before lease acquisition")
}
if err := s.ensureVPNLeaseOwnerEligible(ctx, input.ClusterID, input.VPNConnectionID, input.OwnerNodeID); err != nil {
return VPNConnectionLease{}, err
}
assignments, err := s.store.ListNodeVPNAssignments(ctx, input.ClusterID, input.OwnerNodeID)
if err != nil {
return VPNConnectionLease{}, err
}
visibleCandidate := false
for _, assignment := range assignments {
if assignment.VPNConnectionID != input.VPNConnectionID {
continue
}
if assignment.DesiredState != "" && assignment.DesiredState != VPNConnectionDesiredEnabled {
return VPNConnectionLease{}, ErrVPNLeaseOwnerNotAllowed
}
if assignment.AssignmentReason == "active_owner" &&
assignment.ActiveLease != nil &&
assignment.ActiveLease.OwnerNodeID == input.OwnerNodeID {
return VPNConnectionLease{
ID: assignment.ActiveLease.LeaseID,
VPNConnectionID: assignment.VPNConnectionID,
ClusterID: assignment.ClusterID,
OwnerNodeID: assignment.ActiveLease.OwnerNodeID,
LeaseGeneration: assignment.ActiveLease.LeaseGeneration,
Status: assignment.ActiveLease.Status,
RenewedAt: assignment.ActiveLease.RenewedAt,
ExpiresAt: assignment.ActiveLease.ExpiresAt,
}, nil
}
if assignment.AssignmentReason == "eligible_candidate" {
visibleCandidate = true
break
}
}
if !visibleCandidate {
return VPNConnectionLease{}, ErrVPNLeaseOwnerNotAllowed
}
if input.TTL <= 0 {
input.TTL = 2 * time.Minute
}
input.Metadata = defaultJSON(input.Metadata, `{}`)
if !json.Valid(input.Metadata) {
return VPNConnectionLease{}, errors.New("lease metadata must be valid json")
}
token, err := generateFencingToken()
if err != nil {
return VPNConnectionLease{}, err
}
item, err := s.store.AcquireVPNConnectionLease(ctx, AcquireVPNConnectionLeaseInput{
ClusterID: input.ClusterID,
VPNConnectionID: input.VPNConnectionID,
OwnerNodeID: input.OwnerNodeID,
TTL: input.TTL,
Metadata: input.Metadata,
}, s.now().Add(input.TTL), token)
if errors.Is(err, pgx.ErrNoRows) {
return VPNConnectionLease{}, ErrInvalidVPNLease
}
if errors.Is(err, ErrVPNLeaseAlreadyActive) {
return VPNConnectionLease{}, ErrVPNLeaseAlreadyActive
}
if err != nil {
return VPNConnectionLease{}, err
}
_ = s.store.RecordAudit(ctx, ClusterAuditEvent{
ClusterID: &input.ClusterID,
EventType: "vpn_connection.lease_acquired_by_node",
TargetType: "vpn_connection",
TargetID: &input.VPNConnectionID,
Payload: json.RawMessage(`{"node_agent_runtime_requested":true}`),
CreatedAt: s.now(),
})
return item, nil
}
func (s *Service) ReleaseVPNConnectionLease(ctx context.Context, input ReleaseVPNConnectionLeaseInput) (VPNConnectionLease, error) {
if err := s.ensurePlatformAdmin(ctx, input.ActorUserID); err != nil {
return VPNConnectionLease{}, err
@@ -8910,18 +9007,20 @@ func (s *Service) attachVPNFabricServiceChannelLeases(ctx context.Context, profi
if len(exitPool) == 0 {
exitPool = dedupeStrings(append([]string{route.SelectedExitNodeID, connection.ExitNodeID}, connection.AllowedNodeIDs...))
}
backendFallbackAllowed := false
lease, err := s.IssueFabricServiceChannelLease(ctx, IssueFabricServiceChannelLeaseInput{
ClusterID: profile.ClusterID,
OrganizationID: profile.OrganizationID,
UserID: profile.UserID,
ResourceID: connection.ID,
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: entryPool,
ExitNodeIDs: exitPool,
PreferredEntryNodeID: route.SelectedEntryNodeID,
PreferredExitNodeID: route.SelectedExitNodeID,
AllowedChannels: []string{"vpn_packet", "fabric_control", FabricChannelBulk, FabricChannelControl},
TTL: time.Minute,
ClusterID: profile.ClusterID,
OrganizationID: profile.OrganizationID,
UserID: profile.UserID,
ResourceID: connection.ID,
ServiceClass: FabricServiceClassVPNPackets,
EntryNodeIDs: entryPool,
ExitNodeIDs: exitPool,
PreferredEntryNodeID: route.SelectedEntryNodeID,
PreferredExitNodeID: route.SelectedExitNodeID,
AllowedChannels: []string{"vpn_packet", "fabric_control", FabricChannelBulk, FabricChannelControl},
TTL: 6 * time.Hour,
BackendFallbackAllowed: &backendFallbackAllowed,
})
if err != nil {
profile.Connections[i].ClientConfig = attachVPNFabricServiceChannelError(connection.ClientConfig, err)
@@ -8985,19 +9084,21 @@ func enrichVPNDataplaneSession(profile VPNClientProfile, connection VPNClientCon
status = "ready_for_entry_listener"
}
cfg["vpn_dataplane_session"] = map[string]any{
"schema_version": "rap.vpn_dataplane_session.v1",
"session_id": sessionID,
"status": status,
"issued_at": now,
"expires_at": expiresAt,
"cluster_id": profile.ClusterID,
"organization_id": profile.OrganizationID,
"user_id": profile.UserID,
"vpn_connection_id": connection.ID,
"entry_node_id": route.SelectedEntryNodeID,
"exit_node_id": route.SelectedExitNodeID,
"preferred_transport": "fabric_packet_quic_v1",
"fallback_transport": "backend_http_packet_relay",
"schema_version": "rap.vpn_dataplane_session.v1",
"session_id": sessionID,
"status": status,
"issued_at": now,
"expires_at": expiresAt,
"cluster_id": profile.ClusterID,
"organization_id": profile.OrganizationID,
"user_id": profile.UserID,
"vpn_connection_id": connection.ID,
"entry_node_id": route.SelectedEntryNodeID,
"exit_node_id": route.SelectedExitNodeID,
"preferred_transport": "fabric_service_channel_v1",
"fallback_transport": "none",
"route_authority": "fabric_farm",
"backend_relay_allowed": false,
"packet_contract": map[string]any{
"tunnel_type": "universal_ip_packet",
"application_protocol_agnostic": true,
@@ -9089,10 +9190,12 @@ func vpnConcreteEntryCandidatesFromClientConfig(cfg map[string]any) []map[string
func vpnDataplaneTransportCandidates(route vpnClientFabricRoute, entryCandidates []map[string]any) []map[string]any {
candidates := []map[string]any{
{
"type": "fabric_packet_quic_v1",
"type": "fabric_service_channel_v1",
"status": "contract_ready_listener_pending",
"entry_node_id": route.SelectedEntryNodeID,
"exit_node_id": route.SelectedExitNodeID,
"route_authority": "fabric_farm",
"backend_relay_allowed": false,
"entry_candidates": entryCandidates,
"application_protocols": []string{"ip"},
},
@@ -9100,11 +9203,6 @@ func vpnDataplaneTransportCandidates(route vpnClientFabricRoute, entryCandidates
if direct := vpnDirectHTTPEntryTransportCandidate(route, entryCandidates); direct != nil {
candidates = append(candidates, direct)
}
candidates = append(candidates, map[string]any{
"type": "backend_http_packet_relay",
"status": "active_fallback",
"description": "current safe dataplane until entry listener is available",
})
return candidates
}
@@ -9112,7 +9210,6 @@ func vpnDirectHTTPEntryTransportCandidate(route vpnClientFabricRoute, entryCandi
var selected []map[string]any
hasPublic := false
hasHTTP := false
hasLocalGatewayShortcut := false
for _, candidate := range entryCandidates {
nodeID, _ := candidate["node_id"].(string)
if route.SelectedEntryNodeID != "" && nodeID != route.SelectedEntryNodeID {
@@ -9132,9 +9229,6 @@ func vpnDirectHTTPEntryTransportCandidate(route vpnClientFabricRoute, entryCandi
if strings.EqualFold(reachability, "public") {
hasPublic = true
}
if value, ok := candidate["local_gateway_shortcut"].(bool); ok && value {
hasLocalGatewayShortcut = true
}
selected = append(selected, candidate)
}
if len(selected) == 0 {
@@ -9148,13 +9242,8 @@ func vpnDirectHTTPEntryTransportCandidate(route vpnClientFabricRoute, entryCandi
}
safeClientSwitch := hasPublic
if route.SelectedEntryNodeID != "" && route.SelectedEntryNodeID == route.SelectedExitNodeID {
if hasPublic && hasLocalGatewayShortcut {
status = "available_local_gateway_shortcut"
safeClientSwitch = true
} else {
status = "available_local_gateway_shortcut_pending"
safeClientSwitch = false
}
status = "available_farm_local_route"
safeClientSwitch = hasPublic
}
return map[string]any{
"type": "entry_direct_http_v1",
@@ -9275,9 +9364,13 @@ func vpnFabricRouteIntentPolicy(sourceNodeID, destinationNodeID string, expiresA
"route_version": version,
"policy_version": version,
"peer_directory_version": version,
"backend_relay_fallback": true,
"data_plane_preference": "fabric_mesh",
"route_owner": "vpn_client_profile",
"backend_relay_fallback": false,
"data_plane_preference": "fabric_service_channel",
"route_owner": "fabric_farm",
"vpn_builds_routes": false,
"vpn_builds_tunnels": false,
"farm_builds_routes": true,
"farm_builds_tunnels": true,
"route_refresh_required": true,
"route_refresh_threshold": "24h",
}
@@ -11387,11 +11480,11 @@ func (s *Service) serviceChannelRouteReplacementDecision(input GetNodeSyntheticM
SourceNodeID: fencedRoute.SourceNodeID,
DestinationNodeID: fencedRoute.DestinationNodeID,
OriginalHops: append([]string{}, fencedRoute.Hops...),
EffectiveHops: []string{},
DecisionSource: "service_channel_feedback_no_alternate",
EffectiveHops: append([]string{}, fencedRoute.Hops...),
DecisionSource: "service_channel_feedback_no_alternate_keep_primary",
Generation: generation,
PathScore: 0,
ScoreReasons: []string{"service_channel_fenced_route", "no_unfenced_alternate_route"},
PathScore: serviceChannelReplacementRouteScore(fencedRoute),
ScoreReasons: []string{"service_channel_fenced_route", "no_unfenced_alternate_route", "primary_route_retained_until_rebuild"},
ControlPlaneOnly: true,
ProductionForwarding: false,
ExpiresAt: fencedRoute.ExpiresAt.UTC(),
@@ -11399,10 +11492,10 @@ func (s *Service) serviceChannelRouteReplacementDecision(input GetNodeSyntheticM
applyServiceChannelFeedbackCorrelationToDecision(&decision, routeFeedback)
if serviceChannelFeedbackRequestsRebuild(routeFeedback) {
decision.RebuildRequestID = serviceChannelRebuildRequestID(fencedRoute.RouteID, input.NodeID, generation)
decision.RebuildStatus = "pending_degraded_fallback"
decision.RebuildStatus = "requested"
decision.RebuildReason = "service_channel_feedback_rebuild_requested"
decision.RebuildAttempt = routeFeedback.ConsecutiveFailures
decision.ScoreReasons = append(decision.ScoreReasons, "service_channel_rebuild_requested", "backend_relay_degraded_fallback_until_rebuild")
decision.ScoreReasons = append(decision.ScoreReasons, "service_channel_rebuild_requested")
if routeFeedback.DegradedFallbackRecommended {
decision.ScoreReasons = append(decision.ScoreReasons, "service_channel_degraded_fallback_recommended")
}