Pin QUIC fabric endpoint certificates

This commit is contained in:
2026-05-16 10:51:06 +03:00
parent 3386a5e9b2
commit 4ebc6629e6
6 changed files with 157 additions and 18 deletions
@@ -4,9 +4,11 @@ import (
"context" "context"
"crypto/rand" "crypto/rand"
"crypto/rsa" "crypto/rsa"
"crypto/sha256"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"crypto/x509/pkix" "crypto/x509/pkix"
"encoding/hex"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log" "log"
@@ -373,6 +375,7 @@ type syntheticMeshState struct {
StopListener func() StopListener func()
QUICFabricServer *mesh.QUICFabricServer QUICFabricServer *mesh.QUICFabricServer
QUICFabricListenAddr string QUICFabricListenAddr string
QUICFabricCertSHA256 string
QUICFabricError string QUICFabricError string
ConfigLoadError string ConfigLoadError string
} }
@@ -788,7 +791,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
listenerCfg := meshListenerRuntimeConfig(cfg, loadedConfig.MeshListener) listenerCfg := meshListenerRuntimeConfig(cfg, loadedConfig.MeshListener)
listenerReport, stopListener := startSyntheticMeshHTTPServer(ctx, listenerCfg, identity, dynamicListenerHandler, len(peerEndpoints), len(routes), gateEnabled, runtimeEnabled) listenerReport, stopListener := startSyntheticMeshHTTPServer(ctx, listenerCfg, identity, dynamicListenerHandler, len(peerEndpoints), len(routes), gateEnabled, runtimeEnabled)
vpnFabricSessionPeers := mesh.NewFabricSessionPeerManager() vpnFabricSessionPeers := mesh.NewFabricSessionPeerManager()
quicFabricServer, quicFabricAddr, quicFabricErr := startQUICFabricEndpoint(ctx, cfg, identity) quicFabricServer, quicFabricAddr, quicFabricCertSHA256, quicFabricErr := startQUICFabricEndpoint(ctx, cfg, identity)
return &syntheticMeshState{ return &syntheticMeshState{
Runtime: runtime, Runtime: runtime,
Routes: routes, Routes: routes,
@@ -827,6 +830,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
StopListener: stopListener, StopListener: stopListener,
QUICFabricServer: quicFabricServer, QUICFabricServer: quicFabricServer,
QUICFabricListenAddr: quicFabricAddr, QUICFabricListenAddr: quicFabricAddr,
QUICFabricCertSHA256: quicFabricCertSHA256,
QUICFabricError: errorString(quicFabricErr), QUICFabricError: errorString(quicFabricErr),
ConfigLoadError: errorString(err), ConfigLoadError: errorString(err),
}, stopListener, nil }, stopListener, nil
@@ -1166,16 +1170,16 @@ func bindSyntheticMeshListener(cfg config.Config) (net.Listener, string, bool, e
return nil, "", false, err return nil, "", false, err
} }
func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity state.Identity) (*mesh.QUICFabricServer, string, error) { func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity state.Identity) (*mesh.QUICFabricServer, string, string, error) {
if !cfg.MeshQUICFabricEnabled { if !cfg.MeshQUICFabricEnabled {
return nil, "", nil return nil, "", "", nil
} }
if strings.TrimSpace(cfg.MeshQUICFabricListenAddr) == "" { if strings.TrimSpace(cfg.MeshQUICFabricListenAddr) == "" {
return nil, "", fmt.Errorf("quic fabric enabled but listen addr is empty") return nil, "", "", fmt.Errorf("quic fabric enabled but listen addr is empty")
} }
tlsConfig, err := quicFabricTLSConfig(identity) tlsConfig, certSHA256, err := quicFabricTLSConfig(identity)
if err != nil { if err != nil {
return nil, "", err return nil, "", "", err
} }
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{ server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
ListenAddr: cfg.MeshQUICFabricListenAddr, ListenAddr: cfg.MeshQUICFabricListenAddr,
@@ -1190,20 +1194,20 @@ func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity st
}, },
}) })
if err != nil { if err != nil {
return nil, "", err return nil, "", "", err
} }
addr := "" addr := ""
if server.Addr() != nil { if server.Addr() != nil {
addr = server.Addr().String() addr = server.Addr().String()
} }
log.Printf("quic fabric endpoint enabled: listen_addr=%s effective_addr=%s node_id=%s cluster_id=%s", cfg.MeshQUICFabricListenAddr, addr, identity.NodeID, identity.ClusterID) log.Printf("quic fabric endpoint enabled: listen_addr=%s effective_addr=%s node_id=%s cluster_id=%s", cfg.MeshQUICFabricListenAddr, addr, identity.NodeID, identity.ClusterID)
return server, addr, nil return server, addr, certSHA256, nil
} }
func quicFabricTLSConfig(identity state.Identity) (*tls.Config, error) { func quicFabricTLSConfig(identity state.Identity) (*tls.Config, string, error) {
key, err := rsa.GenerateKey(rand.Reader, 2048) key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil { if err != nil {
return nil, err return nil, "", err
} }
commonName := firstNonEmpty(identity.NodeID, "rap-fabric-node") commonName := firstNonEmpty(identity.NodeID, "rap-fabric-node")
template := x509.Certificate{ template := x509.Certificate{
@@ -1217,15 +1221,16 @@ func quicFabricTLSConfig(identity state.Identity) (*tls.Config, error) {
} }
certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key) certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
if err != nil { if err != nil {
return nil, err return nil, "", err
} }
sum := sha256.Sum256(certDER)
return &tls.Config{ return &tls.Config{
Certificates: []tls.Certificate{{ Certificates: []tls.Certificate{{
Certificate: [][]byte{certDER}, Certificate: [][]byte{certDER},
PrivateKey: key, PrivateKey: key,
}}, }},
NextProtos: []string{"rap-fabric-data-session-v1"}, NextProtos: []string{"rap-fabric-data-session-v1"},
}, nil }, hex.EncodeToString(sum[:]), nil
} }
func isAddressInUse(err error) bool { func isAddressInUse(err error) bool {
@@ -1825,17 +1830,20 @@ func applyQUICFabricConfigIfChanged(ctx context.Context, cfg config.Config, iden
_ = meshState.QUICFabricServer.Close() _ = meshState.QUICFabricServer.Close()
meshState.QUICFabricServer = nil meshState.QUICFabricServer = nil
meshState.QUICFabricListenAddr = "" meshState.QUICFabricListenAddr = ""
meshState.QUICFabricCertSHA256 = ""
} }
if !cfg.MeshQUICFabricEnabled { if !cfg.MeshQUICFabricEnabled {
meshState.QUICFabricError = "" meshState.QUICFabricError = ""
meshState.QUICFabricCertSHA256 = ""
return return
} }
if meshState.QUICFabricServer != nil { if meshState.QUICFabricServer != nil {
return return
} }
server, addr, err := startQUICFabricEndpoint(ctx, cfg, identity) server, addr, certSHA256, err := startQUICFabricEndpoint(ctx, cfg, identity)
meshState.QUICFabricServer = server meshState.QUICFabricServer = server
meshState.QUICFabricListenAddr = addr meshState.QUICFabricListenAddr = addr
meshState.QUICFabricCertSHA256 = certSHA256
meshState.QUICFabricError = errorString(err) meshState.QUICFabricError = errorString(err)
if err != nil { if err != nil {
log.Printf("quic fabric endpoint unavailable: listen_addr=%s node_id=%s cluster_id=%s err=%v", cfg.MeshQUICFabricListenAddr, identity.NodeID, identity.ClusterID, err) log.Printf("quic fabric endpoint unavailable: listen_addr=%s node_id=%s cluster_id=%s err=%v", cfg.MeshQUICFabricListenAddr, identity.NodeID, identity.ClusterID, err)
@@ -2610,6 +2618,7 @@ func heartbeatPayload(cfg config.Config, identity state.Identity, meshState *syn
"enabled": meshState.QUICFabricServer != nil, "enabled": meshState.QUICFabricServer != nil,
"listen_addr": cfg.MeshQUICFabricListenAddr, "listen_addr": cfg.MeshQUICFabricListenAddr,
"effective_listen_addr": meshState.QUICFabricListenAddr, "effective_listen_addr": meshState.QUICFabricListenAddr,
"tls_cert_sha256": meshState.QUICFabricCertSHA256,
"error": meshState.QUICFabricError, "error": meshState.QUICFabricError,
} }
} }
@@ -3860,6 +3869,7 @@ func advertisedEndpointCandidates(cfg config.Config, identity state.Identity, me
Region: cfg.MeshRegion, Region: cfg.MeshRegion,
Priority: 5, Priority: 5,
PolicyTags: []string{"fast-path"}, PolicyTags: []string{"fast-path"},
Metadata: quicFabricEndpointMetadata(meshState.QUICFabricCertSHA256),
}) })
} }
candidates = append(candidates, interfaceEndpointCandidates(cfg, identity, meshState, observedAt)...) candidates = append(candidates, interfaceEndpointCandidates(cfg, identity, meshState, observedAt)...)
@@ -3916,6 +3926,18 @@ func advertisedEndpointCandidates(cfg config.Config, identity state.Identity, me
return candidates, nil return candidates, nil
} }
func quicFabricEndpointMetadata(certSHA256 string) json.RawMessage {
certSHA256 = strings.TrimSpace(certSHA256)
if certSHA256 == "" {
return nil
}
payload, err := json.Marshal(map[string]string{"tls_cert_sha256": certSHA256})
if err != nil {
return nil
}
return payload
}
func interfaceEndpointCandidates(cfg config.Config, identity state.Identity, meshState *syntheticMeshState, observedAt time.Time) []mesh.PeerEndpointCandidate { func interfaceEndpointCandidates(cfg config.Config, identity state.Identity, meshState *syntheticMeshState, observedAt time.Time) []mesh.PeerEndpointCandidate {
if meshState == nil { if meshState == nil {
return nil return nil
@@ -4677,6 +4699,7 @@ func vpnFabricSessionTarget(meshState *syntheticMeshState, nextHop string) (mesh
return mesh.FabricTransportTarget{ return mesh.FabricTransportTarget{
Endpoint: endpoint, Endpoint: endpoint,
Transport: item.Candidate.Transport, Transport: item.Candidate.Transport,
PeerCertSHA256: endpointCandidateTLSCertSHA256(item.Candidate),
}, true }, true
} }
} }
@@ -4687,6 +4710,19 @@ func vpnFabricSessionTarget(meshState *syntheticMeshState, nextHop string) (mesh
return mesh.FabricTransportTarget{Endpoint: endpoint}, true return mesh.FabricTransportTarget{Endpoint: endpoint}, true
} }
func endpointCandidateTLSCertSHA256(candidate mesh.PeerEndpointCandidate) string {
if len(candidate.Metadata) == 0 {
return ""
}
var metadata struct {
TLSCertSHA256 string `json:"tls_cert_sha256"`
}
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
return ""
}
return strings.TrimSpace(metadata.TLSCertSHA256)
}
func fabricSessionGatewayToken(identity state.Identity, assignment client.NodeVPNAssignment, nextHop string) string { func fabricSessionGatewayToken(identity state.Identity, assignment client.NodeVPNAssignment, nextHop string) string {
tokenParts := []string{ tokenParts := []string{
"rap_fsn_vpn", "rap_fsn_vpn",
@@ -782,6 +782,7 @@ func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
ConnectivityMode: "direct", ConnectivityMode: "direct",
Priority: 10, Priority: 10,
LastVerifiedAt: &now, LastVerifiedAt: &now,
Metadata: json.RawMessage(`{"tls_cert_sha256":"abcdef"}`),
}, },
}, },
}, },
@@ -789,7 +790,7 @@ func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
if !ok { if !ok {
t.Fatal("target missing") t.Fatal("target missing")
} }
if target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != "direct_quic" { if target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != "direct_quic" || target.PeerCertSHA256 != "abcdef" {
t.Fatalf("target = %+v, want direct quic candidate", target) t.Fatalf("target = %+v, want direct quic candidate", target)
} }
} }
@@ -2,7 +2,10 @@ package mesh
import ( import (
"context" "context"
"crypto/sha256"
"crypto/tls" "crypto/tls"
"crypto/x509"
"encoding/hex"
"fmt" "fmt"
"strings" "strings"
"sync" "sync"
@@ -34,6 +37,34 @@ func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
return &QUICFabricTransport{Config: config} return &QUICFabricTransport{Config: config}
} }
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
expectedFingerprint := normalizeCertSHA256(target.PeerCertSHA256)
config := &tls.Config{NextProtos: []string{fabricQUICNextProto}}
if expectedFingerprint == "" {
return config
}
config.InsecureSkipVerify = true
config.VerifyPeerCertificate = func(rawCerts [][]byte, _ [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
return fmt.Errorf("quic peer certificate missing")
}
sum := sha256.Sum256(rawCerts[0])
actual := hex.EncodeToString(sum[:])
if actual != expectedFingerprint {
return fmt.Errorf("quic peer certificate fingerprint mismatch")
}
return nil
}
return config
}
func normalizeCertSHA256(value string) string {
value = strings.ToLower(strings.TrimSpace(value))
value = strings.ReplaceAll(value, "sha256:", "")
value = strings.ReplaceAll(value, ":", "")
return value
}
func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) { func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
if target.Endpoint == "" { if target.Endpoint == "" {
return nil, fmt.Errorf("quic fabric endpoint is required") return nil, fmt.Errorf("quic fabric endpoint is required")
@@ -41,7 +72,7 @@ func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTranspor
target.Endpoint = strings.TrimPrefix(strings.TrimSpace(target.Endpoint), "quic://") target.Endpoint = strings.TrimPrefix(strings.TrimSpace(target.Endpoint), "quic://")
tlsConfig := target.TLSConfig tlsConfig := target.TLSConfig
if tlsConfig == nil { if tlsConfig == nil {
tlsConfig = &tls.Config{NextProtos: []string{fabricQUICNextProto}} tlsConfig = quicTLSConfigForTarget(target)
} else { } else {
tlsConfig = tlsConfig.Clone() tlsConfig = tlsConfig.Clone()
if len(tlsConfig.NextProtos) == 0 { if len(tlsConfig.NextProtos) == 0 {
@@ -4,11 +4,14 @@ import (
"context" "context"
"crypto/rand" "crypto/rand"
"crypto/rsa" "crypto/rsa"
"crypto/sha256"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"crypto/x509/pkix" "crypto/x509/pkix"
"encoding/hex"
"encoding/pem" "encoding/pem"
"math/big" "math/big"
"strings"
"testing" "testing"
"time" "time"
@@ -102,6 +105,56 @@ func TestQUICFabricTransportDataAck(t *testing.T) {
} }
} }
func TestQUICFabricTransportVerifiesPinnedCertificate(t *testing.T) {
tlsConfig := testQUICTLSConfig(t)
listener := startQUICFabricEchoServerWithTLS(t, tlsConfig)
defer listener.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: listener.Addr().String(),
PeerCertSHA256: testQUICCertSHA256(t, tlsConfig),
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("connect quic fabric with pinned certificate: %v", err)
}
defer session.Close()
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, Sequence: 43, Payload: []byte("pin")}); err != nil {
t.Fatalf("send ping: %v", err)
}
select {
case frame := <-session.Frames():
if frame.Type != fabricproto.FramePong || frame.Sequence != 43 || string(frame.Payload) != "pin" {
t.Fatalf("frame = %+v", frame)
}
case err := <-session.Errors():
t.Fatalf("session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
}
func TestQUICFabricTransportRejectsPinnedCertificateMismatch(t *testing.T) {
listener := startQUICFabricEchoServer(t)
defer listener.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
_, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: listener.Addr().String(),
PeerCertSHA256: strings.Repeat("0", 64),
Timeout: time.Second,
})
if err == nil {
t.Fatal("connect succeeded with mismatched certificate pin")
}
}
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) { func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
var events []FabricSessionEventLogEntry var events []FabricSessionEventLogEntry
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{ server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
@@ -152,7 +205,12 @@ func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
func startQUICFabricEchoServer(t *testing.T) *quic.Listener { func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
t.Helper() t.Helper()
listener, err := quic.ListenAddr("127.0.0.1:0", testQUICTLSConfig(t), &quic.Config{EnableDatagrams: true}) return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
}
func startQUICFabricEchoServerWithTLS(t *testing.T, tlsConfig *tls.Config) *quic.Listener {
t.Helper()
listener, err := quic.ListenAddr("127.0.0.1:0", tlsConfig, &quic.Config{EnableDatagrams: true})
if err != nil { if err != nil {
t.Fatalf("listen quic: %v", err) t.Fatalf("listen quic: %v", err)
} }
@@ -189,6 +247,15 @@ func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
return listener return listener
} }
func testQUICCertSHA256(t *testing.T, tlsConfig *tls.Config) string {
t.Helper()
if len(tlsConfig.Certificates) == 0 || len(tlsConfig.Certificates[0].Certificate) == 0 {
t.Fatal("test tls config has no certificate")
}
sum := sha256.Sum256(tlsConfig.Certificates[0].Certificate[0])
return hex.EncodeToString(sum[:])
}
func testQUICTLSConfig(t *testing.T) *tls.Config { func testQUICTLSConfig(t *testing.T) *tls.Config {
t.Helper() t.Helper()
key, err := rsa.GenerateKey(rand.Reader, 2048) key, err := rsa.GenerateKey(rand.Reader, 2048)
@@ -31,6 +31,7 @@ type FabricTransportTarget struct {
Token string Token string
Header http.Header Header http.Header
TLSConfig *tls.Config TLSConfig *tls.Config
PeerCertSHA256 string
Timeout time.Duration Timeout time.Duration
MaxPayload int MaxPayload int
OutboundBuffer int OutboundBuffer int
@@ -313,6 +313,9 @@ compatibility candidates for fabric sessions.
VPN fabric-session gateway transport now consumes ranked endpoint candidates, VPN fabric-session gateway transport now consumes ranked endpoint candidates,
so dataplane sessions can select QUIC fast-path candidates and fall back to so dataplane sessions can select QUIC fast-path candidates and fall back to
legacy peer endpoints when the control plane has not published candidates yet. legacy peer endpoints when the control plane has not published candidates yet.
The temporary self-signed QUIC listener advertises its SHA-256 certificate
fingerprint in endpoint metadata, and the QUIC client can pin that fingerprint
instead of disabling verification while the cluster CA path is being finished.
Deliverables: Deliverables: