Fix VPN fabric-only routing guard

This commit is contained in:
2026-05-14 23:26:19 +03:00
parent 8f69d53193
commit 26cb65e936
5 changed files with 260 additions and 79 deletions
+35 -29
View File
@@ -7,7 +7,7 @@ import (
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
const Version = "0.2.256-c18z82"
const Version = "0.2.267-vpnfarmonly"
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
return client.EnrollRequest{
@@ -17,26 +17,29 @@ func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) cli
NodeFingerprint: identity.NodeFingerprint,
PublicKey: identity.PublicKey,
ReportedCapabilities: map[string]any{
"can_accept_client_ingress": false,
"can_accept_node_ingress": false,
"can_route_mesh": false,
"can_run_rdp_worker": true,
"can_run_vnc_worker": false,
"can_run_vpn_exit": true,
"can_run_vpn_connector": true,
"can_run_file_cache": false,
"can_run_update_cache": false,
"can_run_video_relay": false,
"native_node_agent_version": Version,
"node_update_plan_contract": "rap.node_update_plan.v1",
"node_update_status_report": true,
"host_agent_update_required": true,
"service_supervision_enabled": false,
"vpn_assignment_status": true,
"vpn_packet_forwarding": true,
"vpn_fabric_packet_transport": true,
"vpn_local_gateway_shortcut": true,
"external_backend_entry_proxy": true,
"can_accept_client_ingress": false,
"can_accept_node_ingress": false,
"can_route_mesh": false,
"can_run_rdp_worker": true,
"can_run_vnc_worker": false,
"can_run_vpn_exit": true,
"can_run_vpn_connector": true,
"can_run_file_cache": false,
"can_run_update_cache": false,
"can_run_video_relay": false,
"native_node_agent_version": Version,
"node_update_plan_contract": "rap.node_update_plan.v1",
"node_update_status_report": true,
"host_agent_update_required": true,
"service_supervision_enabled": false,
"vpn_assignment_status": true,
"vpn_packet_forwarding": true,
"vpn_fabric_packet_transport": true,
"vpn_local_gateway_shortcut": false,
"vpn_farm_owned_dataplane": true,
"vpn_backend_relay_fallback": false,
"fabric_service_channel_required": true,
"external_backend_entry_proxy": true,
},
ReportedFacts: map[string]any{
"os": runtime.GOOS,
@@ -53,14 +56,17 @@ func HeartbeatPayload() client.HeartbeatRequest {
HealthStatus: "healthy",
ReportedVersion: Version,
Capabilities: map[string]any{
"native_node_agent": true,
"node_update_plan_contract": "rap.node_update_plan.v1",
"node_update_status_report": true,
"vpn_assignment_status": true,
"vpn_packet_forwarding": true,
"vpn_fabric_packet_transport": true,
"vpn_local_gateway_shortcut": true,
"external_backend_entry_proxy": true,
"native_node_agent": true,
"node_update_plan_contract": "rap.node_update_plan.v1",
"node_update_status_report": true,
"vpn_assignment_status": true,
"vpn_packet_forwarding": true,
"vpn_fabric_packet_transport": true,
"vpn_local_gateway_shortcut": false,
"vpn_farm_owned_dataplane": true,
"vpn_backend_relay_fallback": false,
"fabric_service_channel_required": true,
"external_backend_entry_proxy": true,
},
ServiceStates: map[string]any{
"workload_supervision": "not_implemented_c3",
@@ -44,14 +44,16 @@ type FabricPacketTransport struct {
}
type FabricClientPacketIngress struct {
ForwardTransport mesh.ProductionForwardTransport
Inbox *FabricPacketInbox
Routes func() []mesh.SyntheticRoute
LocalGateway func(vpnConnectionID string) bool
FlowScheduler *FabricFlowScheduler
MaxParallelFlowSends int
RecoveryPolicyFingerprint string
AdaptivePolicyFingerprint string
ForwardTransport mesh.ProductionForwardTransport
Inbox *FabricPacketInbox
Routes func() []mesh.SyntheticRoute
LocalGateway func(vpnConnectionID string) bool
AllowLegacyLocalGatewayFallback bool
FlowScheduler *FabricFlowScheduler
MaxParallelFlowSends int
RecoveryPolicyFingerprint string
AdaptivePolicyFingerprint string
PreventLastRouteWithdrawal bool
ClusterID string
LocalNodeID string
@@ -1623,7 +1625,7 @@ func (i *FabricClientPacketIngress) ReceiveClientPacketBatch(ctx context.Context
}
func (i *FabricClientPacketIngress) localGatewayReady(vpnConnectionID string) bool {
if i == nil || i.inbox() == nil || vpnConnectionID == "" {
if i == nil || !i.AllowLegacyLocalGatewayFallback || i.inbox() == nil || vpnConnectionID == "" {
return false
}
localGateway := i.localGateway()
@@ -1669,6 +1671,7 @@ func (i *FabricClientPacketIngress) routeCandidatesWithPreference(clusterID stri
var preferred []fabricClientRouteCandidate
var alternates []fabricClientRouteCandidate
var deferred []fabricClientRouteCandidate
var withdrawn []fabricClientRouteCandidate
manager := i.routeManager()
if preferredRouteID != "" && manager.isWithdrawn(preferredRouteID) {
if replacementRouteID := manager.replacementRouteID(preferredRouteID); replacementRouteID != "" {
@@ -1684,9 +1687,6 @@ func (i *FabricClientPacketIngress) routeCandidatesWithPreference(clusterID stri
if route.ClusterID != clusterID || route.SourceNodeID != localNodeID || !containsString(route.AllowedChannels, mesh.ProductionChannelVPNPacket) {
continue
}
if manager.isWithdrawn(route.RouteID) {
continue
}
if !route.ExpiresAt.IsZero() && !route.ExpiresAt.After(now) {
continue
}
@@ -1695,6 +1695,10 @@ func (i *FabricClientPacketIngress) routeCandidatesWithPreference(clusterID stri
continue
}
candidate := fabricClientRouteCandidate{Route: route, NextHop: nextHop}
if manager.isWithdrawn(route.RouteID) {
withdrawn = append(withdrawn, candidate)
continue
}
if preferredRouteID != "" && route.RouteID == preferredRouteID {
preferred = append(preferred, candidate)
} else if avoidRouteID != "" && route.RouteID == avoidRouteID {
@@ -1703,9 +1707,32 @@ func (i *FabricClientPacketIngress) routeCandidatesWithPreference(clusterID stri
alternates = append(alternates, candidate)
}
}
if len(preferred) > 0 {
destinationNodeID := strings.TrimSpace(preferred[0].Route.DestinationNodeID)
alternates = filterRouteCandidatesByDestination(alternates, destinationNodeID)
deferred = filterRouteCandidatesByDestination(deferred, destinationNodeID)
}
out := append(preferred, alternates...)
out = i.applyRouteQualityPreferences(out, preferredRouteID)
return append(out, deferred...)
out = append(out, deferred...)
if len(out) == 0 && i.preventLastRouteWithdrawal() {
return withdrawn
}
return out
}
func filterRouteCandidatesByDestination(candidates []fabricClientRouteCandidate, destinationNodeID string) []fabricClientRouteCandidate {
destinationNodeID = strings.TrimSpace(destinationNodeID)
if destinationNodeID == "" || len(candidates) == 0 {
return candidates
}
out := candidates[:0]
for _, candidate := range candidates {
if strings.TrimSpace(candidate.Route.DestinationNodeID) == destinationNodeID {
out = append(out, candidate)
}
}
return out
}
func (i *FabricClientPacketIngress) applyRouteQualityPreferences(candidates []fabricClientRouteCandidate, preferredRouteID string) []fabricClientRouteCandidate {
@@ -1744,6 +1771,15 @@ func (i *FabricClientPacketIngress) applyRouteQualityPreferences(candidates []fa
return out
}
func (i *FabricClientPacketIngress) preventLastRouteWithdrawal() bool {
if i == nil {
return false
}
i.mu.Lock()
defer i.mu.Unlock()
return i.PreventLastRouteWithdrawal
}
func (t *FabricPacketTransport) ReceiveGatewayPacketBatch(ctx context.Context, timeout time.Duration) ([][]byte, error) {
if t == nil || t.Inbox == nil {
return nil, mesh.ErrForwardRuntimeUnavailable
@@ -524,6 +524,52 @@ func TestFabricClientPacketIngressTriesAlternateRouteBeforeBackendFallback(t *te
}
}
func TestFabricClientPacketIngressDoesNotFailOverPreferredRouteToDifferentDestination(t *testing.T) {
transport := &failoverProductionTransport{failNextHop: "relay-home"}
ingress := &FabricClientPacketIngress{
ForwardTransport: transport,
Inbox: NewFabricPacketInbox(4),
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
Routes: func() []mesh.SyntheticRoute {
return []mesh.SyntheticRoute{
{
RouteID: "route-other",
ClusterID: "cluster-1",
SourceNodeID: "entry-1",
DestinationNodeID: "ifcm-1",
Hops: []string{"entry-1", "relay-ifcm", "ifcm-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
MaxTTL: 8,
},
{
RouteID: "route-home",
ClusterID: "cluster-1",
SourceNodeID: "entry-1",
DestinationNodeID: "home-1",
Hops: []string{"entry-1", "relay-home", "home-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
MaxTTL: 8,
},
}
},
}
ingress.PreferClientRoute("route-home")
err := ingress.SendClientPacketBatch(context.Background(), "cluster-1", "vpn-1", [][]byte{[]byte("packet")})
if err == nil {
t.Fatal("send client packet batch succeeded after preferred route failure; want failure without cross-destination fallback")
}
if len(transport.calls) != 1 || transport.calls[0] != "relay-home" {
t.Fatalf("route attempts = %#v, want only relay-home", transport.calls)
}
if transport.envelope.RouteID == "route-other" {
t.Fatalf("cross-destination route was used: %+v", transport.envelope)
}
}
func TestFabricClientPacketIngressAvoidsChannelFailedRouteOnNextSend(t *testing.T) {
transport := &captureManyProductionTransport{}
scheduler := NewFabricFlowScheduler(8, 16)
@@ -822,6 +868,44 @@ func TestFabricClientPacketIngressPendingDegradedFallbackWithdrawsRouteWithoutAl
}
}
func TestFabricClientPacketIngressKeepsLastRouteWhenWithdrawalPreventionEnabled(t *testing.T) {
transport := &captureManyProductionTransport{}
ingress := &FabricClientPacketIngress{
ForwardTransport: transport,
Inbox: NewFabricPacketInbox(4),
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
PreventLastRouteWithdrawal: true,
Routes: func() []mesh.SyntheticRoute {
return []mesh.SyntheticRoute{{
RouteID: "route-only",
ClusterID: "cluster-1",
SourceNodeID: "entry-1",
DestinationNodeID: "exit-1",
Hops: []string{"entry-1", "exit-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
MaxTTL: 8,
}}
},
}
ingress.UpdateRouteManager([]FabricServiceChannelRouteManagerDecision{{
RouteID: "route-only",
RebuildStatus: "pending_degraded_fallback",
DecisionSource: "service_channel_feedback_no_alternate",
}}, "config-v2", time.Now().UTC())
if err := ingress.SendClientPacketBatch(context.Background(), "cluster-1", "vpn-1", [][]byte{[]byte("packet")}); err != nil {
t.Fatalf("send client packet batch: %v", err)
}
if len(transport.envelopes) != 1 || transport.envelopes[0].RouteID != "route-only" {
t.Fatalf("envelopes = %+v, want preserved last route", transport.envelopes)
}
if snapshot := ingress.Snapshot("cluster-1"); snapshot.RouteCandidateCount != 1 {
t.Fatalf("route candidate count = %d, want last withdrawn route preserved", snapshot.RouteCandidateCount)
}
}
func TestFabricClientPacketIngressMarksChannelForRebuildAfterRepeatedRouteFailures(t *testing.T) {
transport := &failoverProductionTransport{failNextHop: "relay-bad"}
scheduler := NewFabricFlowScheduler(8, 16)
@@ -1930,9 +2014,10 @@ func TestFabricClientPacketIngressBoundedLoadReportsPerChannelDrops(t *testing.T
func TestFabricClientPacketIngressUsesLocalGatewayShortcutWithoutRoute(t *testing.T) {
inbox := NewFabricPacketInbox(4)
ingress := &FabricClientPacketIngress{
Inbox: inbox,
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
Inbox: inbox,
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
AllowLegacyLocalGatewayFallback: true,
LocalGateway: func(vpnConnectionID string) bool {
return vpnConnectionID == "vpn-1"
},
@@ -1954,9 +2039,10 @@ func TestFabricClientPacketIngressUsesLocalGatewayShortcutWithoutRoute(t *testin
func TestFabricClientPacketIngressReceivesLocalGatewayReplyWithoutRoute(t *testing.T) {
inbox := NewFabricPacketInbox(4)
ingress := &FabricClientPacketIngress{
Inbox: inbox,
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
Inbox: inbox,
ClusterID: "cluster-1",
LocalNodeID: "entry-1",
AllowLegacyLocalGatewayFallback: true,
LocalGateway: func(vpnConnectionID string) bool {
return vpnConnectionID == "vpn-1"
},