Fix VPN fabric-only routing guard

This commit is contained in:
2026-05-14 23:26:19 +03:00
parent 8f69d53193
commit 26cb65e936
5 changed files with 260 additions and 79 deletions
@@ -85,7 +85,15 @@ func main() {
}
defer stopMeshEndpoint()
supervisor := supervisor.StubSupervisor{Version: agent.Version}
supervisor := supervisor.StubSupervisor{
Version: agent.Version,
RemoteWorkspaceRealAdapter: supervisor.RemoteWorkspaceRealAdapterConfig{
EnabledRequested: cfg.RemoteWorkspaceRealAdapterEnabled,
Command: cfg.RemoteWorkspaceRealAdapterCommand,
ArgsJSON: cfg.RemoteWorkspaceRealAdapterArgsJSON,
WorkDir: cfg.RemoteWorkspaceRealAdapterWorkDir,
},
}
startedAt := time.Now().UTC()
ticker := time.NewTicker(cfg.HeartbeatInterval)
defer ticker.Stop()
@@ -805,6 +813,7 @@ func newVPNFabricIngress(meshState *syntheticMeshState, identity state.Identity,
if ingress == nil {
ingress = &vpnruntime.FabricClientPacketIngress{}
}
ingress.PreventLastRouteWithdrawal = true
ingress.UpdateRuntime(
meshState.ProductionForwardTransport,
meshState.VPNFabricInbox,
@@ -2484,8 +2493,9 @@ func fabricServiceChannelRuntimeReport(meshState *syntheticMeshState, identity s
"service_class": "vpn_packets",
"channel_class": mesh.ProductionChannelVPNPacket,
"route_manager": "primary_sticky_with_alternate_route_failover",
"backend_relay_fallback": true,
"backend_relay_fallback_position": "after_all_fabric_routes_fail",
"backend_relay_fallback": false,
"backend_relay_fallback_position": "disabled_farm_owned_dataplane",
"route_authority": "fabric_farm",
"application_protocol_agnostic": true,
"observed_at": observedAt.UTC().Format(time.RFC3339Nano),
}
@@ -4191,6 +4201,24 @@ func ensureVPNGatewayRuntime(ctx context.Context, api *client.Client, identity s
}
activeOwner := false
for _, assignment := range assignments {
if assignment.AssignmentReason == "eligible_candidate" && assignment.DesiredState == "enabled" {
lease, err := api.AcquireNodeVPNAssignmentLease(ctx, identity.ClusterID, identity.NodeID, assignment.VPNConnectionID, client.NodeVPNAssignmentLeaseAcquireRequest{
TTLSeconds: 300,
Metadata: map[string]any{
"reason": "node_agent_auto_acquire",
"node_id": identity.NodeID,
"agent": "rap-node-agent",
"acquired_at": time.Now().UTC().Format(time.RFC3339Nano),
},
})
if err != nil {
log.Printf("vpn assignment lease auto-acquire skipped: vpn_connection_id=%s error=%v", assignment.VPNConnectionID, err)
} else if lease != nil {
assignment.AssignmentReason = "active_owner"
assignment.ActiveLease = lease
log.Printf("vpn assignment lease auto-acquired: vpn_connection_id=%s lease_id=%s", assignment.VPNConnectionID, lease.LeaseID)
}
}
if assignment.AssignmentReason != "active_owner" {
continue
}
@@ -4220,6 +4248,11 @@ func ensureVPNGatewayRuntime(ctx context.Context, api *client.Client, identity s
} else if _, ok := gateway.Transport.(*vpnruntime.AdaptivePacketTransport); ok {
gateway.Stop()
gateway.Transport = nil
} else {
gateway.Stop()
gateway.Transport = nil
log.Printf("vpn gateway runtime skipped: vpn_connection_id=%s reason=fabric_packet_transport_unavailable", assignment.VPNConnectionID)
return nil
}
if err := gateway.EnsureStarted(ctx); err != nil {
return err
@@ -4236,29 +4269,17 @@ func ensureVPNGatewayRuntime(ctx context.Context, api *client.Client, identity s
return nil
}
func localGatewayTransportForAssignment(identity state.Identity, assignment client.NodeVPNAssignment, meshState *syntheticMeshState, api *client.Client) vpnruntime.PacketTransport {
func localGatewayTransportForAssignment(identity state.Identity, assignment client.NodeVPNAssignment, meshState *syntheticMeshState, _ *client.Client) vpnruntime.PacketTransport {
if meshState == nil || meshState.VPNFabricInbox == nil || assignment.VPNConnectionID == "" {
return nil
}
local := &vpnruntime.LocalPacketTransport{
return &vpnruntime.LocalPacketTransport{
Inbox: meshState.VPNFabricInbox,
VPNConnectionID: assignment.VPNConnectionID,
}
if api == nil {
return local
}
return &vpnruntime.AdaptivePacketTransport{
Primary: local,
Fallback: vpnruntime.BackendPacketTransport{
API: api,
ClusterID: identity.ClusterID,
VPNConnectionID: assignment.VPNConnectionID,
},
PrimaryTimeout: 50 * time.Millisecond,
}
}
func fabricGatewayTransportForAssignment(identity state.Identity, assignment client.NodeVPNAssignment, meshState *syntheticMeshState, api *client.Client) vpnruntime.PacketTransport {
func fabricGatewayTransportForAssignment(identity state.Identity, assignment client.NodeVPNAssignment, meshState *syntheticMeshState, _ *client.Client) vpnruntime.PacketTransport {
if meshState == nil || meshState.ProductionForwardTransport == nil || meshState.VPNFabricInbox == nil {
return nil
}
@@ -4266,7 +4287,7 @@ func fabricGatewayTransportForAssignment(identity state.Identity, assignment cli
if !ok {
return nil
}
fabric := &vpnruntime.FabricPacketTransport{
return &vpnruntime.FabricPacketTransport{
ForwardTransport: meshState.ProductionForwardTransport,
Inbox: meshState.VPNFabricInbox,
ClusterID: identity.ClusterID,
@@ -4279,18 +4300,6 @@ func fabricGatewayTransportForAssignment(identity state.Identity, assignment cli
SendDirection: vpnruntime.FabricDirectionGatewayToClient,
ReceiveDirection: vpnruntime.FabricDirectionClientToGateway,
}
if api == nil {
return fabric
}
return &vpnruntime.AdaptivePacketTransport{
Primary: fabric,
Fallback: vpnruntime.BackendPacketTransport{
API: api,
ClusterID: identity.ClusterID,
VPNConnectionID: assignment.VPNConnectionID,
},
PrimaryTimeout: 50 * time.Millisecond,
}
}
func selectVPNPacketRoute(routes []mesh.SyntheticRoute, clusterID string, localNodeID string) (mesh.SyntheticRoute, string, bool) {
@@ -20,6 +20,7 @@ import (
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/config"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/vpnruntime"
)
func TestLoadSyntheticMeshConfigPrefersScopedFile(t *testing.T) {
@@ -197,6 +198,49 @@ func TestRouteManagerDecisionsFromControlPlaneRejectsGuardedRemediationCommand(t
}
}
func TestGatewayTransportForAssignmentUsesFabricWithoutBackendFallback(t *testing.T) {
inbox := vpnruntime.NewFabricPacketInbox(4)
transport := fabricGatewayTransportForAssignment(
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{
ProductionForwardTransport: noopProductionForwardTransport{},
VPNFabricInbox: inbox,
Routes: []mesh.SyntheticRoute{{
RouteID: "route-exit-entry",
ClusterID: "cluster-1",
SourceNodeID: "exit-1",
DestinationNodeID: "entry-1",
Hops: []string{"exit-1", "entry-1"},
AllowedChannels: []string{mesh.ProductionChannelVPNPacket},
ExpiresAt: time.Now().UTC().Add(time.Minute),
}},
},
nil,
)
if _, ok := transport.(*vpnruntime.FabricPacketTransport); !ok {
t.Fatalf("transport = %T, want fabric packet transport without backend fallback", transport)
}
}
func TestLocalGatewayTransportForAssignmentUsesLocalInboxWithoutBackendFallback(t *testing.T) {
transport := localGatewayTransportForAssignment(
state.Identity{ClusterID: "cluster-1", NodeID: "exit-1"},
client.NodeVPNAssignment{VPNConnectionID: "vpn-1"},
&syntheticMeshState{VPNFabricInbox: vpnruntime.NewFabricPacketInbox(4)},
nil,
)
if _, ok := transport.(*vpnruntime.LocalPacketTransport); !ok {
t.Fatalf("transport = %T, want local packet transport without backend fallback", transport)
}
}
type noopProductionForwardTransport struct{}
func (noopProductionForwardTransport) SendProduction(context.Context, string, mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
return mesh.ProductionForwardResult{}, nil
}
func TestRouteManagerDecisionsFromControlPlaneKeepsExplicitRemediationCommand(t *testing.T) {
now := time.Now().UTC()
report := &client.RoutePathDecisionReport{Decisions: []client.RoutePathDecision{{