Files
rdp-proxy/agents/rap-node-agent/mobile/fabricvpn/fabricvpn.go
T
2026-05-18 21:33:39 +03:00

592 lines
17 KiB
Go

package fabricvpn
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"net"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/fabricproto"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/mesh"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/vpnruntime"
"github.com/quic-go/quic-go"
)
type endpointConfig struct {
EndpointID string `json:"endpoint_id"`
NodeID string `json:"node_id"`
Transport string `json:"transport"`
Address string `json:"address"`
PeerCertSHA256 string `json:"peer_cert_sha256"`
TLSCertSHA256 string `json:"tls_cert_sha256"`
Priority int `json:"priority"`
}
type runtimeConfig struct {
ClusterID string `json:"cluster_id"`
LocalNodeID string `json:"local_node_id"`
ExitNodeID string `json:"exit_node_id"`
VPNConnectionID string `json:"vpn_connection_id"`
Endpoints []endpointConfig `json:"endpoints"`
RouteBundle routeBundleConfig `json:"route_bundle"`
ServiceChannelRequest serviceChannelRequest `json:"service_channel_request"`
StreamShards int `json:"stream_shards"`
}
type controlForwardResponse struct {
Payload json.RawMessage `json:"payload,omitempty"`
Error string `json:"error,omitempty"`
}
type routeBundleConfig struct {
SchemaVersion string `json:"schema_version"`
RouteAuthority string `json:"route_authority"`
SelectedTargetNode string `json:"selected_target_node_id"`
EndpointCandidates []endpointConfig `json:"endpoint_candidates"`
TargetCandidates []endpointConfig `json:"target_candidates"`
RouteLease routeLeaseConfig `json:"route_lease"`
}
type routeLeaseConfig struct {
SchemaVersion string `json:"schema_version"`
LeaseID string `json:"lease_id"`
SelectedTargetNode string `json:"selected_target_node"`
PrimaryPath routeLeasePath `json:"primary_path"`
WarmStandbyPaths []routeLeasePath `json:"warm_standby_paths"`
Multipath map[string]any `json:"multipath"`
RebuildPolicy map[string]any `json:"rebuild_policy"`
}
type routeLeasePath struct {
PathID string `json:"path_id"`
TargetNodeID string `json:"target_node_id"`
Status string `json:"status"`
EndpointCandidates []endpointConfig `json:"endpoint_candidates"`
}
type serviceChannelRequest struct {
SchemaVersion string `json:"schema_version"`
ChannelID string `json:"channel_id"`
ServiceClass string `json:"service_class"`
SourceRole string `json:"source_role"`
}
type SocketProtector interface {
Protect(fd int64) bool
}
type Manager struct {
opMu sync.Mutex
mu sync.Mutex
cancel context.CancelFunc
transport *mesh.QUICFabricTransport
session mesh.FabricTransportSession
packet *vpnruntime.FabricSessionPacketTransport
inbox *vpnruntime.FabricPacketInbox
cfg runtimeConfig
lastErr string
endpoint string
protector SocketProtector
uplinkPackets atomic.Uint64
uplinkBytes atomic.Uint64
downlinkPackets atomic.Uint64
downlinkBytes atomic.Uint64
}
func NewManager() *Manager {
return &Manager{}
}
func (m *Manager) SetSocketProtector(protector SocketProtector) {
m.mu.Lock()
m.protector = protector
m.mu.Unlock()
}
func (m *Manager) Start(configJSON string) error {
var cfg runtimeConfig
if err := json.Unmarshal([]byte(configJSON), &cfg); err != nil {
return err
}
cfg.ClusterID = strings.TrimSpace(cfg.ClusterID)
cfg.LocalNodeID = strings.TrimSpace(cfg.LocalNodeID)
cfg.ExitNodeID = strings.TrimSpace(cfg.ExitNodeID)
cfg.VPNConnectionID = strings.TrimSpace(cfg.VPNConnectionID)
cfg.Endpoints = fabricRuntimeEndpoints(cfg)
cfg.ExitNodeID = firstNonEmpty(cfg.ExitNodeID, fabricRuntimeTargetNodeID(cfg))
if cfg.ClusterID == "" || cfg.LocalNodeID == "" || cfg.VPNConnectionID == "" {
return fmt.Errorf("cluster, local node and vpn connection id are required")
}
if strings.TrimSpace(cfg.ServiceChannelRequest.SchemaVersion) == "" {
return fmt.Errorf("fabric service channel request is required")
}
if len(cfg.Endpoints) == 0 {
return fmt.Errorf("fabric route lease has no QUIC candidates")
}
if cfg.StreamShards <= 0 {
cfg.StreamShards = 4
}
if cfg.StreamShards > 32 {
cfg.StreamShards = 32
}
m.Stop()
ctx, cancel := context.WithCancel(context.Background())
if err := m.connect(ctx, cfg, cancel); err != nil {
cancel()
m.setErr(err)
return err
}
return nil
}
func fabricRuntimeEndpoints(cfg runtimeConfig) []endpointConfig {
if len(cfg.RouteBundle.RouteLease.PrimaryPath.EndpointCandidates) > 0 {
return cfg.RouteBundle.RouteLease.PrimaryPath.EndpointCandidates
}
for _, path := range cfg.RouteBundle.RouteLease.WarmStandbyPaths {
if len(path.EndpointCandidates) > 0 {
return path.EndpointCandidates
}
}
if len(cfg.RouteBundle.EndpointCandidates) > 0 {
return cfg.RouteBundle.EndpointCandidates
}
if len(cfg.RouteBundle.TargetCandidates) > 0 {
return cfg.RouteBundle.TargetCandidates
}
return cfg.Endpoints
}
func fabricRuntimeTargetNodeID(cfg runtimeConfig) string {
if cfg.RouteBundle.RouteLease.PrimaryPath.TargetNodeID != "" {
return cfg.RouteBundle.RouteLease.PrimaryPath.TargetNodeID
}
if cfg.RouteBundle.RouteLease.SelectedTargetNode != "" {
return cfg.RouteBundle.RouteLease.SelectedTargetNode
}
return cfg.RouteBundle.SelectedTargetNode
}
func (m *Manager) connect(ctx context.Context, cfg runtimeConfig, cancel context.CancelFunc) error {
quicTransport := mesh.NewQUICFabricTransport(nil)
quicTransport.SetLocalPeerID(cfg.LocalNodeID)
quicTransport.DialAddr = m.protectedQUICDialer()
inbox := vpnruntime.NewFabricPacketInbox(4096)
quicTransport.SetInboundHandlers(func(ctx context.Context, envelope mesh.ProductionEnvelope) (mesh.ProductionForwardResult, error) {
if err := inbox.DeliverProductionEnvelope(ctx, envelope); err != nil {
return mesh.ProductionForwardResult{}, err
}
return mesh.ProductionForwardResult{Delivered: true, MessageID: envelope.MessageID}, nil
}, nil, nil)
var lastErr error
for _, endpoint := range cfg.Endpoints {
target := mesh.FabricTransportTarget{
EndpointID: firstNonEmpty(endpoint.EndpointID, endpoint.Address),
PeerID: firstNonEmpty(endpoint.NodeID, cfg.ExitNodeID),
Endpoint: endpoint.Address,
Transport: firstNonEmpty(endpoint.Transport, "direct_quic"),
PeerCertSHA256: firstNonEmpty(endpoint.PeerCertSHA256, endpoint.TLSCertSHA256),
Timeout: 5 * time.Second,
OutboundBuffer: 512,
InboundBuffer: 512,
ErrorBuffer: 32,
}
carrier, selected, err := mesh.FabricTransportForTarget(target, quicTransport)
if err != nil {
lastErr = err
continue
}
dialCtx, dialCancel := context.WithTimeout(ctx, 5*time.Second)
session, err := carrier.Connect(dialCtx, selected)
if err != nil {
dialCancel()
lastErr = err
continue
}
streamIDs, streamID, err := openStreams(dialCtx, session, cfg.StreamShards)
dialCancel()
if err != nil {
_ = session.Close()
lastErr = err
continue
}
m.mu.Lock()
m.cancel = cancel
m.transport = quicTransport
m.session = session
m.inbox = inbox
m.cfg = cfg
m.endpoint = endpoint.Address
m.lastErr = ""
m.packet = &vpnruntime.FabricSessionPacketTransport{
Sender: session,
Receiver: session,
Inbox: inbox,
StreamID: streamID,
StreamIDsByTrafficClass: streamIDs,
VPNConnectionID: cfg.VPNConnectionID,
SendDirection: vpnruntime.FabricDirectionClientToGateway,
ReceiveDirection: vpnruntime.FabricDirectionGatewayToClient,
}
m.mu.Unlock()
return nil
}
if lastErr == nil {
lastErr = fmt.Errorf("no QUIC exit endpoints available")
}
return fmt.Errorf("fabric bootstrap failed after %d endpoint candidates: %w", len(cfg.Endpoints), lastErr)
}
func (m *Manager) protectedQUICDialer() func(context.Context, string, *tls.Config, *quic.Config) (*quic.Conn, error) {
m.mu.Lock()
protector := m.protector
m.mu.Unlock()
if protector == nil {
return nil
}
return func(ctx context.Context, endpoint string, tlsConfig *tls.Config, config *quic.Config) (*quic.Conn, error) {
network := "udp4"
if strings.Contains(endpoint, "[") {
network = "udp6"
}
conn, err := net.ListenPacket(network, ":0")
if err != nil {
return nil, err
}
raw, ok := conn.(interface {
SyscallConn() (syscall.RawConn, error)
})
if !ok {
_ = conn.Close()
return nil, fmt.Errorf("udp socket does not expose raw connection for vpn protection")
}
rawConn, err := raw.SyscallConn()
if err != nil {
_ = conn.Close()
return nil, err
}
var protectErr error
if err := rawConn.Control(func(fd uintptr) {
if !protector.Protect(int64(fd)) {
protectErr = fmt.Errorf("android vpn socket protect failed")
}
}); err != nil {
_ = conn.Close()
return nil, err
}
if protectErr != nil {
_ = conn.Close()
return nil, protectErr
}
return mesh.DialQUICAddrWithPacketConn(ctx, endpoint, conn, tlsConfig, config)
}
}
func (m *Manager) Stop() {
m.opMu.Lock()
defer m.opMu.Unlock()
m.stopLocked()
}
func (m *Manager) stopLocked() {
m.mu.Lock()
cancel := m.cancel
session := m.session
transport := m.transport
m.cancel = nil
m.session = nil
m.transport = nil
m.packet = nil
m.mu.Unlock()
if cancel != nil {
cancel()
}
if session != nil {
_ = session.Close()
}
if transport != nil {
_ = transport.Close()
}
}
func (m *Manager) SendPacket(packet []byte) error {
if len(packet) == 0 {
return nil
}
m.opMu.Lock()
defer m.opMu.Unlock()
if err := m.ensureConnectedLocked(); err != nil {
return err
}
m.mu.Lock()
transport := m.packet
m.mu.Unlock()
if transport == nil {
return fmt.Errorf("fabric vpn runtime is not connected")
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := transport.SendGatewayPacketBatch(ctx, [][]byte{append([]byte(nil), packet...)}); err != nil {
m.setErr(err)
if reconnectErr := m.reconnectLocked(); reconnectErr != nil {
return err
}
m.mu.Lock()
transport = m.packet
m.mu.Unlock()
if transport == nil {
return err
}
retryCtx, retryCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer retryCancel()
if retryErr := transport.SendGatewayPacketBatch(retryCtx, [][]byte{append([]byte(nil), packet...)}); retryErr != nil {
m.setErr(retryErr)
return retryErr
}
}
m.uplinkPackets.Add(1)
m.uplinkBytes.Add(uint64(len(packet)))
return nil
}
func (m *Manager) ReceivePacket(timeoutMillis int) ([]byte, error) {
m.opMu.Lock()
defer m.opMu.Unlock()
if err := m.ensureConnectedLocked(); err != nil {
return nil, err
}
m.mu.Lock()
transport := m.packet
m.mu.Unlock()
if transport == nil {
return nil, fmt.Errorf("fabric vpn runtime is not connected")
}
timeout := time.Duration(timeoutMillis) * time.Millisecond
if timeout <= 0 {
timeout = 100 * time.Millisecond
}
ctx, cancel := context.WithTimeout(context.Background(), timeout+time.Second)
defer cancel()
packets, err := transport.ReceiveGatewayPacketBatch(ctx, timeout)
if err != nil {
m.setErr(err)
_ = m.reconnectLocked()
return nil, err
}
if len(packets) == 0 {
return nil, nil
}
packet := append([]byte(nil), packets[0]...)
m.downlinkPackets.Add(1)
m.downlinkBytes.Add(uint64(len(packet)))
return packet, nil
}
func (m *Manager) ControlRequest(payloadJSON string) (string, error) {
m.opMu.Lock()
defer m.opMu.Unlock()
if err := m.ensureConnectedLocked(); err != nil {
return "", err
}
m.mu.Lock()
transport := m.transport
cfg := m.cfg
endpointAddress := m.endpoint
m.mu.Unlock()
if transport == nil || endpointAddress == "" {
return "", fmt.Errorf("fabric control runtime is not connected")
}
endpoint := endpointConfig{Address: endpointAddress}
for _, candidate := range cfg.Endpoints {
if strings.TrimSpace(candidate.Address) == endpointAddress {
endpoint = candidate
break
}
}
target := mesh.FabricTransportTarget{
EndpointID: firstNonEmpty(endpoint.EndpointID, endpoint.Address),
PeerID: firstNonEmpty(endpoint.NodeID, cfg.ExitNodeID),
Endpoint: endpoint.Address,
Transport: firstNonEmpty(endpoint.Transport, "direct_quic"),
PeerCertSHA256: firstNonEmpty(endpoint.PeerCertSHA256, endpoint.TLSCertSHA256),
Timeout: 8 * time.Second,
OutboundBuffer: 16,
InboundBuffer: 16,
ErrorBuffer: 8,
}
carrier, selected, err := mesh.FabricTransportForTarget(target, transport)
if err != nil {
return "", err
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
session, err := carrier.Connect(ctx, selected)
if err != nil {
return "", err
}
defer session.Close()
if err := session.Send(ctx, fabricproto.Frame{
Type: fabricproto.FrameData,
TrafficClass: fabricproto.TrafficClassReliable,
StreamID: mesh.FabricControlForwardQUICStreamID,
Sequence: uint64(time.Now().UnixNano()),
Payload: []byte(payloadJSON),
}); err != nil {
return "", err
}
for {
select {
case <-ctx.Done():
return "", ctx.Err()
case err, ok := <-session.Errors():
if !ok {
return "", fmt.Errorf("fabric control error stream closed")
}
if err != nil {
return "", err
}
case frame, ok := <-session.Frames():
if !ok {
return "", fmt.Errorf("fabric control stream closed")
}
if frame.Type != fabricproto.FrameData || frame.StreamID != mesh.FabricControlForwardQUICStreamID {
continue
}
var response controlForwardResponse
if err := json.Unmarshal(frame.Payload, &response); err != nil {
return "", err
}
if response.Error != "" {
return "", fmt.Errorf("%s", response.Error)
}
return string(response.Payload), nil
}
}
}
func (m *Manager) Reconnect() error {
m.opMu.Lock()
defer m.opMu.Unlock()
return m.reconnectLocked()
}
func (m *Manager) ensureConnectedLocked() error {
m.mu.Lock()
connected := m.packet != nil
cancel := m.cancel
m.mu.Unlock()
if connected {
return nil
}
if cancel == nil {
return fmt.Errorf("fabric vpn runtime is stopped")
}
return m.reconnectLocked()
}
func (m *Manager) reconnectLocked() error {
m.mu.Lock()
cfg := m.cfg
oldSession := m.session
oldTransport := m.transport
cancel := m.cancel
m.session = nil
m.transport = nil
m.packet = nil
m.mu.Unlock()
if oldSession != nil {
_ = oldSession.Close()
}
if oldTransport != nil {
_ = oldTransport.Close()
}
if cancel == nil {
return fmt.Errorf("fabric vpn runtime is stopped")
}
ctx, ctxCancel := context.WithTimeout(context.Background(), 8*time.Second)
defer ctxCancel()
if err := m.connect(ctx, cfg, cancel); err != nil {
m.setErr(err)
return err
}
return nil
}
func (m *Manager) SnapshotJSON() string {
m.mu.Lock()
connected := m.packet != nil
endpoint := m.endpoint
lastErr := m.lastErr
vpnConnectionID := m.cfg.VPNConnectionID
localNodeID := m.cfg.LocalNodeID
exitNodeID := m.cfg.ExitNodeID
m.mu.Unlock()
payload, _ := json.Marshal(map[string]any{
"schema_version": "rap.android_fabric_vpn_runtime.v1",
"connected": connected,
"endpoint": endpoint,
"last_error": lastErr,
"vpn_connection": vpnConnectionID,
"local_node_id": localNodeID,
"exit_node_id": exitNodeID,
"uplink_packets": m.uplinkPackets.Load(),
"uplink_bytes": m.uplinkBytes.Load(),
"downlink_packets": m.downlinkPackets.Load(),
"downlink_bytes": m.downlinkBytes.Load(),
})
return string(payload)
}
func (m *Manager) setErr(err error) {
if err == nil {
return
}
m.mu.Lock()
m.lastErr = err.Error()
m.mu.Unlock()
}
func openStreams(ctx context.Context, session mesh.FabricTransportSession, shards int) (map[string][]uint64, uint64, error) {
base := uint64(time.Now().UnixNano())
classes := []struct {
name string
trafficClass fabricproto.TrafficClass
}{
{name: vpnruntime.FabricTrafficClassInteractive, trafficClass: fabricproto.TrafficClassInteractive},
{name: vpnruntime.FabricTrafficClassBulk, trafficClass: fabricproto.TrafficClassBulk},
}
out := make(map[string][]uint64, len(classes))
var primary uint64
for classIndex, class := range classes {
for shard := 0; shard < shards; shard++ {
streamID := base + uint64(classIndex*shards+shard)
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FrameOpenStream, StreamID: streamID, TrafficClass: class.trafficClass}); err != nil {
return nil, 0, err
}
if primary == 0 {
primary = streamID
}
out[class.name] = append(out[class.name], streamID)
}
}
return out, primary, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}