Stabilize VPN farm startup path

This commit is contained in:
2026-05-15 10:31:29 +03:00
parent 96566cbe55
commit e3f21d591f
6 changed files with 113 additions and 16 deletions
@@ -4202,6 +4202,10 @@ func ensureVPNGatewayRuntime(ctx context.Context, api *client.Client, identity s
activeOwner := false
for _, assignment := range assignments {
if assignment.AssignmentReason == "eligible_candidate" && assignment.DesiredState == "enabled" {
if !vpnAssignmentLeaseAutoAcquireAllowed(identity.NodeID, assignment) {
log.Printf("vpn assignment lease auto-acquire skipped: vpn_connection_id=%s reason=local_node_is_not_selected_exit", assignment.VPNConnectionID)
continue
}
lease, err := api.AcquireNodeVPNAssignmentLease(ctx, identity.ClusterID, identity.NodeID, assignment.VPNConnectionID, client.NodeVPNAssignmentLeaseAcquireRequest{
TTLSeconds: 300,
Metadata: map[string]any{
@@ -4269,6 +4273,29 @@ func ensureVPNGatewayRuntime(ctx context.Context, api *client.Client, identity s
return nil
}
func vpnAssignmentLeaseAutoAcquireAllowed(localNodeID string, assignment client.NodeVPNAssignment) bool {
localNodeID = strings.TrimSpace(localNodeID)
if localNodeID == "" {
return false
}
var policy struct {
ExitNodeID string `json:"exit_node_id"`
ExitNodeIDs []string `json:"exit_node_ids"`
}
if len(assignment.PlacementPolicy) == 0 || json.Unmarshal(assignment.PlacementPolicy, &policy) != nil {
return true
}
if exitNodeID := strings.TrimSpace(policy.ExitNodeID); exitNodeID != "" {
return exitNodeID == localNodeID
}
for _, exitNodeID := range policy.ExitNodeIDs {
if strings.TrimSpace(exitNodeID) == localNodeID {
return true
}
}
return len(policy.ExitNodeIDs) == 0
}
func localGatewayTransportForAssignment(identity state.Identity, assignment client.NodeVPNAssignment, meshState *syntheticMeshState, _ *client.Client) vpnruntime.PacketTransport {
if meshState == nil || meshState.VPNFabricInbox == nil || assignment.VPNConnectionID == "" {
return nil
@@ -235,6 +235,35 @@ func TestLocalGatewayTransportForAssignmentUsesLocalInboxWithoutBackendFallback(
}
}
func TestVPNAssignmentLeaseAutoAcquireAllowedRequiresSelectedExit(t *testing.T) {
assignment := client.NodeVPNAssignment{
VPNConnectionID: "vpn-1",
PlacementPolicy: json.RawMessage(`{
"entry_node_ids":["entry-1"],
"exit_node_id":"exit-1"
}`),
}
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
t.Fatal("entry node must not auto-acquire the gateway lease")
}
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-1", assignment) {
t.Fatal("selected exit node should auto-acquire the gateway lease")
}
}
func TestVPNAssignmentLeaseAutoAcquireAllowedSupportsExitPool(t *testing.T) {
assignment := client.NodeVPNAssignment{
VPNConnectionID: "vpn-1",
PlacementPolicy: json.RawMessage(`{"exit_node_ids":["exit-1","exit-2"]}`),
}
if !vpnAssignmentLeaseAutoAcquireAllowed("exit-2", assignment) {
t.Fatal("node from exit pool should auto-acquire the gateway lease")
}
if vpnAssignmentLeaseAutoAcquireAllowed("entry-1", assignment) {
t.Fatal("node outside exit pool must not auto-acquire the gateway lease")
}
}
type noopProductionForwardTransport struct{}
func (noopProductionForwardTransport) SendProduction(context.Context, string, mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
@@ -7,7 +7,7 @@ import (
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
const Version = "0.2.269-vpnwsfarm"
const Version = "0.2.271-vpnwsfarm"
func EnrollmentPayload(clusterID, joinToken string, identity state.Identity) client.EnrollRequest {
return client.EnrollRequest{
@@ -955,6 +955,9 @@ func (s Server) readVPNPacketWebSocket(ctx context.Context, conn *websocket.Conn
if sendErr != nil {
if !backendFallbackAllowed {
s.logFabricServiceChannelViolation(nil, clusterID, channelID, vpnConnectionID, backendRelayPolicy, "fabric_route_send_failed_backend_fallback_blocked", sendErr.Error())
if isRetryableVPNPacketIngressError(sendErr) {
continue
}
return sendErr
}
if proxyErr := s.backendVPNPacketPost(ctx, clusterID, vpnConnectionID, payload); proxyErr != nil {