Record project continuation changes
This commit is contained in:
@@ -66,24 +66,44 @@ type PeerConnectionManagerSnapshot struct {
|
||||
}
|
||||
|
||||
type PeerConnectionProbeResult struct {
|
||||
NodeID string `json:"node_id"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
ConnectionState PeerConnectionState `json:"connection_state"`
|
||||
TransportMode string `json:"transport_mode"`
|
||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||
DirectCandidate bool `json:"direct_candidate"`
|
||||
RelayCandidate bool `json:"relay_candidate"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
LatencyMs int `json:"latency_ms,omitempty"`
|
||||
FailureReason string `json:"failure_reason,omitempty"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
NodeID string `json:"node_id"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
Action string `json:"action"`
|
||||
Reason string `json:"reason"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
SelectedCandidateID string `json:"selected_candidate_id,omitempty"`
|
||||
SelectedEndpoint string `json:"selected_endpoint,omitempty"`
|
||||
ConnectionState PeerConnectionState `json:"connection_state"`
|
||||
TransportMode string `json:"transport_mode"`
|
||||
RequiresRendezvous bool `json:"requires_rendezvous"`
|
||||
RendezvousResolved bool `json:"rendezvous_resolved"`
|
||||
DirectCandidate bool `json:"direct_candidate"`
|
||||
RelayCandidate bool `json:"relay_candidate"`
|
||||
RendezvousLeaseID string `json:"rendezvous_lease_id,omitempty"`
|
||||
RelayNodeID string `json:"relay_node_id,omitempty"`
|
||||
RelayEndpoint string `json:"relay_endpoint,omitempty"`
|
||||
LatencyMs int `json:"latency_ms,omitempty"`
|
||||
FailureReason string `json:"failure_reason,omitempty"`
|
||||
CandidateResults []PeerConnectionCandidateProbeResult `json:"candidate_results,omitempty"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
}
|
||||
|
||||
type PeerConnectionCandidateProbeResult struct {
|
||||
CandidateID string `json:"candidate_id,omitempty"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
Transport string `json:"transport,omitempty"`
|
||||
LinkStatus string `json:"link_status"`
|
||||
LatencyMs int `json:"latency_ms,omitempty"`
|
||||
FailureReason string `json:"failure_reason,omitempty"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
}
|
||||
|
||||
type peerConnectionProbeTarget struct {
|
||||
CandidateID string
|
||||
Endpoint string
|
||||
Transport string
|
||||
}
|
||||
|
||||
func NewPeerConnectionManager(cfg PeerConnectionManagerConfig) *PeerConnectionManager {
|
||||
@@ -137,6 +157,10 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
||||
RendezvousLeases: rendezvousLeases,
|
||||
Now: startedAt,
|
||||
})
|
||||
entriesByNode := map[string]PeerCacheEntry{}
|
||||
for _, entry := range peerSnapshot.Entries {
|
||||
entriesByNode[entry.NodeID] = entry
|
||||
}
|
||||
cycle := PeerConnectionManagerCycle{
|
||||
Mode: recoveryPlan.Mode,
|
||||
StartedAt: startedAt,
|
||||
@@ -150,7 +174,7 @@ func (m *PeerConnectionManager) ProbeOnce(ctx context.Context) PeerConnectionMan
|
||||
Results: make([]PeerConnectionProbeResult, 0, len(intentPlan.Intents)),
|
||||
}
|
||||
for _, intent := range intentPlan.Intents {
|
||||
result := m.probeIntent(ctx, intent)
|
||||
result := m.probeIntent(ctx, intent, entriesByNode[intent.NodeID])
|
||||
cycle.Results = append(cycle.Results, result)
|
||||
switch result.LinkStatus {
|
||||
case PeerConnectionProbeReachable:
|
||||
@@ -200,7 +224,7 @@ func (m *PeerConnectionManager) peerConfigSnapshot() (*PeerCache, []PeerRendezvo
|
||||
return m.peerCache, append([]PeerRendezvousLease{}, m.rendezvousLeases...)
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConnectionIntent) PeerConnectionProbeResult {
|
||||
func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConnectionIntent, cacheEntry PeerCacheEntry) PeerConnectionProbeResult {
|
||||
startedAt := normalizedNow(m.now())
|
||||
result := PeerConnectionProbeResult{
|
||||
NodeID: intent.NodeID,
|
||||
@@ -254,9 +278,6 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
result.CompletedAt = normalizedNow(m.now())
|
||||
return result
|
||||
}
|
||||
m.tracker.BeginProbe(peer, startedAt)
|
||||
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
||||
defer cancel()
|
||||
target := PeerIdentity{
|
||||
ClusterID: m.local.ClusterID,
|
||||
NodeID: intent.NodeID,
|
||||
@@ -264,30 +285,118 @@ func (m *PeerConnectionManager) probeIntent(ctx context.Context, intent PeerConn
|
||||
if intent.RelayCandidate && intent.RelayNodeID != "" {
|
||||
target.NodeID = intent.RelayNodeID
|
||||
}
|
||||
_, err := NewClient(strings.TrimRight(intent.Endpoint, "/")).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
|
||||
completedAt := normalizedNow(m.now())
|
||||
if err != nil {
|
||||
result.LinkStatus = PeerConnectionProbeUnreachable
|
||||
result.FailureReason = err.Error()
|
||||
result.ConnectionState = m.tracker.RecordFailure(intent.NodeID, err.Error(), completedAt)
|
||||
targets := []peerConnectionProbeTarget{{
|
||||
CandidateID: intent.BestCandidateID,
|
||||
Endpoint: intent.Endpoint,
|
||||
Transport: intent.Transport,
|
||||
}}
|
||||
if intent.DirectCandidate {
|
||||
targets = peerConnectionProbeTargets(intent, cacheEntry)
|
||||
}
|
||||
var lastFailure string
|
||||
for _, probeTarget := range targets {
|
||||
probePeer := peer
|
||||
probePeer.Endpoint = strings.TrimRight(strings.TrimSpace(probeTarget.Endpoint), "/")
|
||||
probePeer.BestCandidateID = strings.TrimSpace(probeTarget.CandidateID)
|
||||
probePeer.BestCandidateAddr = probePeer.Endpoint
|
||||
probePeer.BestTransport = strings.TrimSpace(probeTarget.Transport)
|
||||
if probePeer.Endpoint == "" {
|
||||
continue
|
||||
}
|
||||
candidateStartedAt := normalizedNow(m.now())
|
||||
m.tracker.BeginProbe(probePeer, candidateStartedAt)
|
||||
probeCtx, cancel := context.WithTimeout(ctx, m.probeTimeout)
|
||||
_, err := NewClient(probePeer.Endpoint).withHTTPClient(m.httpClient).SendHealth(probeCtx, NewHealthMessage(m.local, target))
|
||||
cancel()
|
||||
completedAt := normalizedNow(m.now())
|
||||
candidateResult := PeerConnectionCandidateProbeResult{
|
||||
CandidateID: probePeer.BestCandidateID,
|
||||
Endpoint: probePeer.Endpoint,
|
||||
Transport: probePeer.BestTransport,
|
||||
StartedAt: candidateStartedAt,
|
||||
CompletedAt: completedAt,
|
||||
}
|
||||
if err != nil {
|
||||
lastFailure = err.Error()
|
||||
candidateResult.LinkStatus = PeerConnectionProbeUnreachable
|
||||
candidateResult.FailureReason = lastFailure
|
||||
result.CandidateResults = append(result.CandidateResults, candidateResult)
|
||||
continue
|
||||
}
|
||||
latency := int(completedAt.Sub(candidateStartedAt).Milliseconds())
|
||||
if latency < 0 {
|
||||
latency = 0
|
||||
}
|
||||
candidateResult.LinkStatus = PeerConnectionProbeReachable
|
||||
candidateResult.LatencyMs = latency
|
||||
result.CandidateResults = append(result.CandidateResults, candidateResult)
|
||||
result.LinkStatus = PeerConnectionProbeReachable
|
||||
result.Endpoint = probePeer.Endpoint
|
||||
result.SelectedCandidateID = probePeer.BestCandidateID
|
||||
result.SelectedEndpoint = probePeer.Endpoint
|
||||
result.LatencyMs = latency
|
||||
if intent.RelayCandidate {
|
||||
result.ConnectionState = m.tracker.RecordRelayReady(probePeer, latency, completedAt)
|
||||
} else {
|
||||
result.ConnectionState = m.tracker.RecordSuccessForPeer(probePeer, latency, completedAt)
|
||||
}
|
||||
result.CompletedAt = completedAt
|
||||
return result
|
||||
}
|
||||
latency := int(completedAt.Sub(startedAt).Milliseconds())
|
||||
if latency < 0 {
|
||||
latency = 0
|
||||
}
|
||||
result.LinkStatus = PeerConnectionProbeReachable
|
||||
result.LatencyMs = latency
|
||||
if intent.RelayCandidate {
|
||||
result.ConnectionState = m.tracker.RecordRelayReady(peer, latency, completedAt)
|
||||
} else {
|
||||
result.ConnectionState = m.tracker.RecordSuccess(intent.NodeID, latency, completedAt)
|
||||
completedAt := normalizedNow(m.now())
|
||||
if lastFailure == "" {
|
||||
lastFailure = "no_probe_endpoint_available"
|
||||
}
|
||||
result.LinkStatus = PeerConnectionProbeUnreachable
|
||||
result.FailureReason = lastFailure
|
||||
result.ConnectionState = m.tracker.RecordFailure(intent.NodeID, lastFailure, completedAt)
|
||||
result.CompletedAt = completedAt
|
||||
return result
|
||||
}
|
||||
|
||||
func peerConnectionProbeTargets(intent PeerConnectionIntent, cacheEntry PeerCacheEntry) []peerConnectionProbeTarget {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]peerConnectionProbeTarget, 0, len(cacheEntry.EndpointCandidates)+1)
|
||||
add := func(candidateID, endpoint, transport string) {
|
||||
endpoint = strings.TrimRight(strings.TrimSpace(endpoint), "/")
|
||||
if endpoint == "" {
|
||||
return
|
||||
}
|
||||
key := candidateID + "|" + endpoint
|
||||
if _, ok := seen[key]; ok {
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, peerConnectionProbeTarget{
|
||||
CandidateID: strings.TrimSpace(candidateID),
|
||||
Endpoint: endpoint,
|
||||
Transport: strings.TrimSpace(transport),
|
||||
})
|
||||
}
|
||||
for _, candidate := range cacheEntry.EndpointCandidates {
|
||||
if !candidateUsableForDirectProbe(candidate) {
|
||||
continue
|
||||
}
|
||||
add(candidate.EndpointID, candidate.Address, candidate.Transport)
|
||||
}
|
||||
add(intent.BestCandidateID, intent.Endpoint, intent.Transport)
|
||||
return out
|
||||
}
|
||||
|
||||
func candidateUsableForDirectProbe(candidate PeerEndpointCandidate) bool {
|
||||
endpoint := strings.TrimSpace(candidate.Address)
|
||||
if endpoint == "" || strings.HasPrefix(endpoint, "relay://") || strings.HasPrefix(endpoint, "outbound://") {
|
||||
return false
|
||||
}
|
||||
connectivity := strings.ToLower(strings.TrimSpace(candidate.ConnectivityMode))
|
||||
reachability := strings.ToLower(strings.TrimSpace(candidate.Reachability))
|
||||
transport := strings.ToLower(strings.TrimSpace(candidate.Transport))
|
||||
if connectivity == "outbound_only" || connectivity == "relay_required" || reachability == "outbound_only" || reachability == "relay" {
|
||||
return false
|
||||
}
|
||||
return transport == "" || strings.Contains(transport, "direct") || transport == "wss" || strings.HasPrefix(endpoint, "http://") || strings.HasPrefix(endpoint, "https://")
|
||||
}
|
||||
|
||||
func (m *PeerConnectionManager) connectionState(nodeID string) PeerConnectionState {
|
||||
snapshot := m.tracker.Snapshot()
|
||||
for _, entry := range snapshot.Entries {
|
||||
|
||||
Reference in New Issue
Block a user