Pin QUIC fabric endpoint certificates

This commit is contained in:
2026-05-16 10:51:06 +03:00
parent 3386a5e9b2
commit 4ebc6629e6
6 changed files with 157 additions and 18 deletions
@@ -4,9 +4,11 @@ import (
"context"
"crypto/rand"
"crypto/rsa"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/hex"
"encoding/json"
"fmt"
"log"
@@ -373,6 +375,7 @@ type syntheticMeshState struct {
StopListener func()
QUICFabricServer *mesh.QUICFabricServer
QUICFabricListenAddr string
QUICFabricCertSHA256 string
QUICFabricError string
ConfigLoadError string
}
@@ -788,7 +791,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
listenerCfg := meshListenerRuntimeConfig(cfg, loadedConfig.MeshListener)
listenerReport, stopListener := startSyntheticMeshHTTPServer(ctx, listenerCfg, identity, dynamicListenerHandler, len(peerEndpoints), len(routes), gateEnabled, runtimeEnabled)
vpnFabricSessionPeers := mesh.NewFabricSessionPeerManager()
quicFabricServer, quicFabricAddr, quicFabricErr := startQUICFabricEndpoint(ctx, cfg, identity)
quicFabricServer, quicFabricAddr, quicFabricCertSHA256, quicFabricErr := startQUICFabricEndpoint(ctx, cfg, identity)
return &syntheticMeshState{
Runtime: runtime,
Routes: routes,
@@ -827,6 +830,7 @@ func startSyntheticMeshEndpoint(ctx context.Context, _ context.CancelFunc, cfg c
StopListener: stopListener,
QUICFabricServer: quicFabricServer,
QUICFabricListenAddr: quicFabricAddr,
QUICFabricCertSHA256: quicFabricCertSHA256,
QUICFabricError: errorString(quicFabricErr),
ConfigLoadError: errorString(err),
}, stopListener, nil
@@ -1166,16 +1170,16 @@ func bindSyntheticMeshListener(cfg config.Config) (net.Listener, string, bool, e
return nil, "", false, err
}
func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity state.Identity) (*mesh.QUICFabricServer, string, error) {
func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity state.Identity) (*mesh.QUICFabricServer, string, string, error) {
if !cfg.MeshQUICFabricEnabled {
return nil, "", nil
return nil, "", "", nil
}
if strings.TrimSpace(cfg.MeshQUICFabricListenAddr) == "" {
return nil, "", fmt.Errorf("quic fabric enabled but listen addr is empty")
return nil, "", "", fmt.Errorf("quic fabric enabled but listen addr is empty")
}
tlsConfig, err := quicFabricTLSConfig(identity)
tlsConfig, certSHA256, err := quicFabricTLSConfig(identity)
if err != nil {
return nil, "", err
return nil, "", "", err
}
server, err := mesh.StartQUICFabricServer(ctx, mesh.QUICFabricServerConfig{
ListenAddr: cfg.MeshQUICFabricListenAddr,
@@ -1190,20 +1194,20 @@ func startQUICFabricEndpoint(ctx context.Context, cfg config.Config, identity st
},
})
if err != nil {
return nil, "", err
return nil, "", "", err
}
addr := ""
if server.Addr() != nil {
addr = server.Addr().String()
}
log.Printf("quic fabric endpoint enabled: listen_addr=%s effective_addr=%s node_id=%s cluster_id=%s", cfg.MeshQUICFabricListenAddr, addr, identity.NodeID, identity.ClusterID)
return server, addr, nil
return server, addr, certSHA256, nil
}
func quicFabricTLSConfig(identity state.Identity) (*tls.Config, error) {
func quicFabricTLSConfig(identity state.Identity) (*tls.Config, string, error) {
key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
return nil, err
return nil, "", err
}
commonName := firstNonEmpty(identity.NodeID, "rap-fabric-node")
template := x509.Certificate{
@@ -1217,15 +1221,16 @@ func quicFabricTLSConfig(identity state.Identity) (*tls.Config, error) {
}
certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key)
if err != nil {
return nil, err
return nil, "", err
}
sum := sha256.Sum256(certDER)
return &tls.Config{
Certificates: []tls.Certificate{{
Certificate: [][]byte{certDER},
PrivateKey: key,
}},
NextProtos: []string{"rap-fabric-data-session-v1"},
}, nil
}, hex.EncodeToString(sum[:]), nil
}
func isAddressInUse(err error) bool {
@@ -1825,17 +1830,20 @@ func applyQUICFabricConfigIfChanged(ctx context.Context, cfg config.Config, iden
_ = meshState.QUICFabricServer.Close()
meshState.QUICFabricServer = nil
meshState.QUICFabricListenAddr = ""
meshState.QUICFabricCertSHA256 = ""
}
if !cfg.MeshQUICFabricEnabled {
meshState.QUICFabricError = ""
meshState.QUICFabricCertSHA256 = ""
return
}
if meshState.QUICFabricServer != nil {
return
}
server, addr, err := startQUICFabricEndpoint(ctx, cfg, identity)
server, addr, certSHA256, err := startQUICFabricEndpoint(ctx, cfg, identity)
meshState.QUICFabricServer = server
meshState.QUICFabricListenAddr = addr
meshState.QUICFabricCertSHA256 = certSHA256
meshState.QUICFabricError = errorString(err)
if err != nil {
log.Printf("quic fabric endpoint unavailable: listen_addr=%s node_id=%s cluster_id=%s err=%v", cfg.MeshQUICFabricListenAddr, identity.NodeID, identity.ClusterID, err)
@@ -2610,6 +2618,7 @@ func heartbeatPayload(cfg config.Config, identity state.Identity, meshState *syn
"enabled": meshState.QUICFabricServer != nil,
"listen_addr": cfg.MeshQUICFabricListenAddr,
"effective_listen_addr": meshState.QUICFabricListenAddr,
"tls_cert_sha256": meshState.QUICFabricCertSHA256,
"error": meshState.QUICFabricError,
}
}
@@ -3860,6 +3869,7 @@ func advertisedEndpointCandidates(cfg config.Config, identity state.Identity, me
Region: cfg.MeshRegion,
Priority: 5,
PolicyTags: []string{"fast-path"},
Metadata: quicFabricEndpointMetadata(meshState.QUICFabricCertSHA256),
})
}
candidates = append(candidates, interfaceEndpointCandidates(cfg, identity, meshState, observedAt)...)
@@ -3916,6 +3926,18 @@ func advertisedEndpointCandidates(cfg config.Config, identity state.Identity, me
return candidates, nil
}
func quicFabricEndpointMetadata(certSHA256 string) json.RawMessage {
certSHA256 = strings.TrimSpace(certSHA256)
if certSHA256 == "" {
return nil
}
payload, err := json.Marshal(map[string]string{"tls_cert_sha256": certSHA256})
if err != nil {
return nil
}
return payload
}
func interfaceEndpointCandidates(cfg config.Config, identity state.Identity, meshState *syntheticMeshState, observedAt time.Time) []mesh.PeerEndpointCandidate {
if meshState == nil {
return nil
@@ -4677,6 +4699,7 @@ func vpnFabricSessionTarget(meshState *syntheticMeshState, nextHop string) (mesh
return mesh.FabricTransportTarget{
Endpoint: endpoint,
Transport: item.Candidate.Transport,
PeerCertSHA256: endpointCandidateTLSCertSHA256(item.Candidate),
}, true
}
}
@@ -4687,6 +4710,19 @@ func vpnFabricSessionTarget(meshState *syntheticMeshState, nextHop string) (mesh
return mesh.FabricTransportTarget{Endpoint: endpoint}, true
}
func endpointCandidateTLSCertSHA256(candidate mesh.PeerEndpointCandidate) string {
if len(candidate.Metadata) == 0 {
return ""
}
var metadata struct {
TLSCertSHA256 string `json:"tls_cert_sha256"`
}
if err := json.Unmarshal(candidate.Metadata, &metadata); err != nil {
return ""
}
return strings.TrimSpace(metadata.TLSCertSHA256)
}
func fabricSessionGatewayToken(identity state.Identity, assignment client.NodeVPNAssignment, nextHop string) string {
tokenParts := []string{
"rap_fsn_vpn",
@@ -782,6 +782,7 @@ func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
ConnectivityMode: "direct",
Priority: 10,
LastVerifiedAt: &now,
Metadata: json.RawMessage(`{"tls_cert_sha256":"abcdef"}`),
},
},
},
@@ -789,7 +790,7 @@ func TestVPNFabricSessionTargetPrefersRankedQUICCandidate(t *testing.T) {
if !ok {
t.Fatal("target missing")
}
if target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != "direct_quic" {
if target.Endpoint != "quic://node-b.example.test:19443" || target.Transport != "direct_quic" || target.PeerCertSHA256 != "abcdef" {
t.Fatalf("target = %+v, want direct quic candidate", target)
}
}
@@ -2,7 +2,10 @@ package mesh
import (
"context"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/hex"
"fmt"
"strings"
"sync"
@@ -34,6 +37,34 @@ func NewQUICFabricTransport(config *quic.Config) *QUICFabricTransport {
return &QUICFabricTransport{Config: config}
}
func quicTLSConfigForTarget(target FabricTransportTarget) *tls.Config {
expectedFingerprint := normalizeCertSHA256(target.PeerCertSHA256)
config := &tls.Config{NextProtos: []string{fabricQUICNextProto}}
if expectedFingerprint == "" {
return config
}
config.InsecureSkipVerify = true
config.VerifyPeerCertificate = func(rawCerts [][]byte, _ [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
return fmt.Errorf("quic peer certificate missing")
}
sum := sha256.Sum256(rawCerts[0])
actual := hex.EncodeToString(sum[:])
if actual != expectedFingerprint {
return fmt.Errorf("quic peer certificate fingerprint mismatch")
}
return nil
}
return config
}
func normalizeCertSHA256(value string) string {
value = strings.ToLower(strings.TrimSpace(value))
value = strings.ReplaceAll(value, "sha256:", "")
value = strings.ReplaceAll(value, ":", "")
return value
}
func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTransportTarget) (FabricTransportSession, error) {
if target.Endpoint == "" {
return nil, fmt.Errorf("quic fabric endpoint is required")
@@ -41,7 +72,7 @@ func (t *QUICFabricTransport) Connect(ctx context.Context, target FabricTranspor
target.Endpoint = strings.TrimPrefix(strings.TrimSpace(target.Endpoint), "quic://")
tlsConfig := target.TLSConfig
if tlsConfig == nil {
tlsConfig = &tls.Config{NextProtos: []string{fabricQUICNextProto}}
tlsConfig = quicTLSConfigForTarget(target)
} else {
tlsConfig = tlsConfig.Clone()
if len(tlsConfig.NextProtos) == 0 {
@@ -4,11 +4,14 @@ import (
"context"
"crypto/rand"
"crypto/rsa"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/hex"
"encoding/pem"
"math/big"
"strings"
"testing"
"time"
@@ -102,6 +105,56 @@ func TestQUICFabricTransportDataAck(t *testing.T) {
}
}
func TestQUICFabricTransportVerifiesPinnedCertificate(t *testing.T) {
tlsConfig := testQUICTLSConfig(t)
listener := startQUICFabricEchoServerWithTLS(t, tlsConfig)
defer listener.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
session, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: listener.Addr().String(),
PeerCertSHA256: testQUICCertSHA256(t, tlsConfig),
Timeout: time.Second,
InboundBuffer: 4,
ErrorBuffer: 4,
})
if err != nil {
t.Fatalf("connect quic fabric with pinned certificate: %v", err)
}
defer session.Close()
if err := session.Send(ctx, fabricproto.Frame{Type: fabricproto.FramePing, Sequence: 43, Payload: []byte("pin")}); err != nil {
t.Fatalf("send ping: %v", err)
}
select {
case frame := <-session.Frames():
if frame.Type != fabricproto.FramePong || frame.Sequence != 43 || string(frame.Payload) != "pin" {
t.Fatalf("frame = %+v", frame)
}
case err := <-session.Errors():
t.Fatalf("session error: %v", err)
case <-ctx.Done():
t.Fatal(ctx.Err())
}
}
func TestQUICFabricTransportRejectsPinnedCertificateMismatch(t *testing.T) {
listener := startQUICFabricEchoServer(t)
defer listener.Close()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
_, err := NewQUICFabricTransport(nil).Connect(ctx, FabricTransportTarget{
Endpoint: listener.Addr().String(),
PeerCertSHA256: strings.Repeat("0", 64),
Timeout: time.Second,
})
if err == nil {
t.Fatal("connect succeeded with mismatched certificate pin")
}
}
func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
var events []FabricSessionEventLogEntry
server, err := StartQUICFabricServer(context.Background(), QUICFabricServerConfig{
@@ -152,7 +205,12 @@ func TestQUICFabricServerHandlesFabricFrames(t *testing.T) {
func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
t.Helper()
listener, err := quic.ListenAddr("127.0.0.1:0", testQUICTLSConfig(t), &quic.Config{EnableDatagrams: true})
return startQUICFabricEchoServerWithTLS(t, testQUICTLSConfig(t))
}
func startQUICFabricEchoServerWithTLS(t *testing.T, tlsConfig *tls.Config) *quic.Listener {
t.Helper()
listener, err := quic.ListenAddr("127.0.0.1:0", tlsConfig, &quic.Config{EnableDatagrams: true})
if err != nil {
t.Fatalf("listen quic: %v", err)
}
@@ -189,6 +247,15 @@ func startQUICFabricEchoServer(t *testing.T) *quic.Listener {
return listener
}
func testQUICCertSHA256(t *testing.T, tlsConfig *tls.Config) string {
t.Helper()
if len(tlsConfig.Certificates) == 0 || len(tlsConfig.Certificates[0].Certificate) == 0 {
t.Fatal("test tls config has no certificate")
}
sum := sha256.Sum256(tlsConfig.Certificates[0].Certificate[0])
return hex.EncodeToString(sum[:])
}
func testQUICTLSConfig(t *testing.T) *tls.Config {
t.Helper()
key, err := rsa.GenerateKey(rand.Reader, 2048)
@@ -31,6 +31,7 @@ type FabricTransportTarget struct {
Token string
Header http.Header
TLSConfig *tls.Config
PeerCertSHA256 string
Timeout time.Duration
MaxPayload int
OutboundBuffer int
@@ -313,6 +313,9 @@ compatibility candidates for fabric sessions.
VPN fabric-session gateway transport now consumes ranked endpoint candidates,
so dataplane sessions can select QUIC fast-path candidates and fall back to
legacy peer endpoints when the control plane has not published candidates yet.
The temporary self-signed QUIC listener advertises its SHA-256 certificate
fingerprint in endpoint metadata, and the QUIC client can pin that fingerprint
instead of disabling verification while the cluster CA path is being finished.
Deliverables: