This commit is contained in:
2026-05-18 21:33:39 +03:00
parent 5096155d83
commit 469fa0e860
94 changed files with 8761 additions and 8003 deletions
+126 -17
View File
@@ -166,6 +166,7 @@ type DockerInstallProfile struct {
BackendURL string `json:"backend_url"`
ControlPlaneEndpoints []string `json:"control_plane_endpoints,omitempty"`
ArtifactEndpoints []string `json:"artifact_endpoints,omitempty"`
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records,omitempty"`
DockerImageArtifact *DockerArtifact `json:"docker_image_artifact,omitempty"`
JoinToken string `json:"join_token"`
NodeName string `json:"node_name"`
@@ -203,6 +204,7 @@ type WindowsInstallProfile struct {
BackendURL string `json:"backend_url"`
ControlPlaneEndpoints []string `json:"control_plane_endpoints,omitempty"`
ArtifactEndpoints []string `json:"artifact_endpoints,omitempty"`
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records,omitempty"`
NodeAgentArtifact *DockerArtifact `json:"node_agent_artifact,omitempty"`
JoinToken string `json:"join_token"`
NodeName string `json:"node_name"`
@@ -235,6 +237,7 @@ type LinuxInstallProfile struct {
BackendURL string `json:"backend_url"`
ControlPlaneEndpoints []string `json:"control_plane_endpoints,omitempty"`
ArtifactEndpoints []string `json:"artifact_endpoints,omitempty"`
FabricRegistryRecords json.RawMessage `json:"fabric_registry_records,omitempty"`
NodeAgentArtifact *DockerArtifact `json:"node_agent_artifact,omitempty"`
JoinToken string `json:"join_token"`
NodeName string `json:"node_name"`
@@ -372,6 +375,28 @@ type NodeUpdatePlan struct {
ProductionForwarding bool `json:"production_forwarding"`
}
type NodeBridgeReplayProductPlan struct {
Product string `json:"product"`
RecoveryBridgeMode string `json:"recovery_bridge_mode,omitempty"`
RecoveryBridgeReplayReady bool `json:"recovery_bridge_replay_ready"`
LastStatusReason string `json:"last_status_reason,omitempty"`
UpdatePlan NodeUpdatePlan `json:"update_plan"`
}
type NodeBridgeReplayPlan struct {
SchemaVersion string `json:"schema_version"`
ClusterID string `json:"cluster_id"`
NodeID string `json:"node_id"`
NodeName string `json:"node_name,omitempty"`
HealthStatus string `json:"health_status,omitempty"`
HeartbeatStale bool `json:"heartbeat_stale"`
BridgeHoldRequired bool `json:"bridge_hold_required"`
RecoveryBridgeReplayReady bool `json:"recovery_bridge_replay_ready"`
BridgeHoldReasons []string `json:"bridge_hold_reasons,omitempty"`
BridgeActions []string `json:"bridge_actions,omitempty"`
Products []NodeBridgeReplayProductPlan `json:"products,omitempty"`
}
type NodeUpdateStatus struct {
ID string `json:"id"`
ClusterID string `json:"cluster_id"`
@@ -388,6 +413,77 @@ type NodeUpdateStatus struct {
ObservedAt time.Time `json:"observed_at"`
}
type StaleNodeRiskReport struct {
ClusterID string `json:"cluster_id"`
GeneratedAt time.Time `json:"generated_at"`
HeartbeatStaleAfterSeconds int `json:"heartbeat_stale_after_seconds"`
LegacyRemovalAllowed bool `json:"legacy_removal_allowed"`
BridgeHoldRequired bool `json:"bridge_hold_required"`
BridgeHoldNodeIDs []string `json:"bridge_hold_node_ids,omitempty"`
BridgeHoldReasons []string `json:"bridge_hold_reasons,omitempty"`
BlockedOperations []string `json:"blocked_operations,omitempty"`
Nodes []StaleNodeRiskNode `json:"nodes"`
Summary StaleNodeRiskSummary `json:"summary"`
}
type StaleNodeRiskSummary struct {
TotalNodes int `json:"total_nodes"`
StaleNodes int `json:"stale_nodes"`
BlockedNodes int `json:"blocked_nodes"`
DirectPeerAlertNodes int `json:"direct_peer_alert_nodes"`
ArtifactGapNodes int `json:"artifact_gap_nodes"`
UnknownProfileNodes int `json:"unknown_profile_nodes"`
WaitingUpdateStatusNodes int `json:"waiting_update_status_nodes"`
UnknownVersionNodes int `json:"unknown_version_nodes"`
LegacyRecoveryContractNodes int `json:"legacy_recovery_contract_nodes"`
RecoveryBridgeRequiredNodes int `json:"recovery_bridge_required_nodes"`
RecoveryBridgeReplayReadyNodes int `json:"recovery_bridge_replay_ready_nodes"`
WaitingRecoveryHeartbeatNodes int `json:"waiting_recovery_heartbeat_nodes"`
}
type StaleNodeRiskNode struct {
NodeID string `json:"node_id"`
Name string `json:"name"`
RegistrationStatus string `json:"registration_status"`
HealthStatus string `json:"health_status"`
ReportedVersion *string `json:"reported_version,omitempty"`
LastSeenAt *time.Time `json:"last_seen_at,omitempty"`
HeartbeatStale bool `json:"heartbeat_stale"`
Blocked bool `json:"blocked"`
DirectPeerAlert bool `json:"direct_peer_alert"`
DirectPeerReadyCount int `json:"direct_peer_ready_count,omitempty"`
DirectPeerTargetCount int `json:"direct_peer_target_count,omitempty"`
DirectPeerDeficit int `json:"direct_peer_deficit,omitempty"`
Alerts []string `json:"alerts,omitempty"`
RecoveryBridgeRequired bool `json:"recovery_bridge_required"`
RecoveryBridgeReplayReady bool `json:"recovery_bridge_replay_ready"`
RecoveryBridgeActions []string `json:"recovery_bridge_actions,omitempty"`
Risks []string `json:"risks,omitempty"`
Products []StaleNodeRiskProduct `json:"products,omitempty"`
}
type StaleNodeRiskProduct struct {
Product string `json:"product"`
CurrentVersion string `json:"current_version,omitempty"`
TargetVersion *string `json:"target_version,omitempty"`
Channel string `json:"channel,omitempty"`
Strategy string `json:"strategy,omitempty"`
Enabled bool `json:"enabled"`
DetectedOS string `json:"detected_os,omitempty"`
DetectedArch string `json:"detected_arch,omitempty"`
DetectedInstallType string `json:"detected_install_type,omitempty"`
CompatibleArtifactFound bool `json:"compatible_artifact_found"`
MatchingReleaseVersion string `json:"matching_release_version,omitempty"`
LastStatusObservedAt *time.Time `json:"last_status_observed_at,omitempty"`
LastStatusPhase string `json:"last_status_phase,omitempty"`
LastStatusValue string `json:"last_status_value,omitempty"`
LastStatusReason string `json:"last_status_reason,omitempty"`
RecoveryBridgeRequired bool `json:"recovery_bridge_required"`
RecoveryBridgeReplayReady bool `json:"recovery_bridge_replay_ready"`
RecoveryBridgeMode string `json:"recovery_bridge_mode,omitempty"`
Risks []string `json:"risks,omitempty"`
}
type NodeBootstrap struct {
NodeID string `json:"node_id"`
ClusterID string `json:"cluster_id"`
@@ -761,23 +857,25 @@ type NodeSyntheticMeshConfig struct {
}
type NodeMeshListenerConfig struct {
SchemaVersion string `json:"schema_version"`
Source string `json:"source"`
DesiredState string `json:"desired_state"`
ListenAddr string `json:"listen_addr"`
ListenPortMode string `json:"listen_port_mode"`
AutoPortStart int `json:"auto_port_start,omitempty"`
AutoPortEnd int `json:"auto_port_end,omitempty"`
AdvertiseEndpoint string `json:"advertise_endpoint,omitempty"`
AdvertiseTransport string `json:"advertise_transport,omitempty"`
ConnectivityMode string `json:"connectivity_mode,omitempty"`
NATType string `json:"nat_type,omitempty"`
Region string `json:"region,omitempty"`
ConfigVersion string `json:"config_version,omitempty"`
UpdatedByUserID string `json:"updated_by_user_id,omitempty"`
UpdatedAt string `json:"updated_at,omitempty"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
SchemaVersion string `json:"schema_version"`
Source string `json:"source"`
DesiredState string `json:"desired_state"`
ListenAddr string `json:"listen_addr"`
ListenPortMode string `json:"listen_port_mode"`
AutoPortStart int `json:"auto_port_start,omitempty"`
AutoPortEnd int `json:"auto_port_end,omitempty"`
AdvertiseEndpoint string `json:"advertise_endpoint,omitempty"`
AdvertiseEndpoints []string `json:"advertise_endpoints,omitempty"`
EndpointCandidates []PeerEndpointCandidate `json:"endpoint_candidates,omitempty"`
AdvertiseTransport string `json:"advertise_transport,omitempty"`
ConnectivityMode string `json:"connectivity_mode,omitempty"`
NATType string `json:"nat_type,omitempty"`
Region string `json:"region,omitempty"`
ConfigVersion string `json:"config_version,omitempty"`
UpdatedByUserID string `json:"updated_by_user_id,omitempty"`
UpdatedAt string `json:"updated_at,omitempty"`
ControlPlaneOnly bool `json:"control_plane_only"`
ProductionForwarding bool `json:"production_forwarding"`
}
type MeshQoSPolicy struct {
@@ -2027,6 +2125,17 @@ type GetNodeUpdatePlanInput struct {
ArtifactOrigin string
}
type GetStaleNodeRiskReportInput struct {
ActorUserID string
ClusterID string
}
type GetNodeBridgeReplayPlanInput struct {
ActorUserID string
ClusterID string
NodeID string
}
type ReportNodeUpdateStatusInput struct {
ClusterID string
NodeID string
@@ -84,8 +84,10 @@ func (m *Module) RegisterRoutes(router chi.Router) {
r.Post("/{clusterID}/updates/releases", m.createReleaseVersion)
r.Put("/{clusterID}/nodes/{nodeID}/updates/policy", m.upsertNodeUpdatePolicy)
r.Get("/{clusterID}/nodes/{nodeID}/updates/plan", m.getNodeUpdatePlan)
r.Get("/{clusterID}/nodes/{nodeID}/updates/bridge-replay-plan", m.getNodeBridgeReplayPlan)
r.Post("/{clusterID}/nodes/{nodeID}/updates/status", m.reportNodeUpdateStatus)
r.Get("/{clusterID}/nodes/{nodeID}/updates/statuses", m.listNodeUpdateStatuses)
r.Get("/{clusterID}/updates/stale-node-risk-report", m.getStaleNodeRiskReport)
r.Get("/{clusterID}/nodes/{nodeID}/testing-flags", m.getEffectiveNodeTestingFlags)
r.Get("/{clusterID}/nodes/{nodeID}/mesh/synthetic-config", m.getNodeSyntheticMeshConfig)
r.Post("/{clusterID}/nodes/{nodeID}/telemetry", m.recordNodeTelemetry)
@@ -843,6 +845,29 @@ func (m *Module) listNodeUpdateStatuses(w http.ResponseWriter, r *http.Request)
httpx.WriteJSON(w, http.StatusOK, map[string]any{"node_update_statuses": items})
}
func (m *Module) getStaleNodeRiskReport(w http.ResponseWriter, r *http.Request) {
item, err := m.service.GetStaleNodeRiskReport(r.Context(), GetStaleNodeRiskReportInput{
ActorUserID: r.URL.Query().Get("actor_user_id"),
ClusterID: chi.URLParam(r, "clusterID"),
})
if writeServiceError(w, err) {
return
}
httpx.WriteJSON(w, http.StatusOK, map[string]any{"stale_node_risk_report": item})
}
func (m *Module) getNodeBridgeReplayPlan(w http.ResponseWriter, r *http.Request) {
item, err := m.service.GetNodeBridgeReplayPlan(r.Context(), GetNodeBridgeReplayPlanInput{
ActorUserID: r.URL.Query().Get("actor_user_id"),
ClusterID: chi.URLParam(r, "clusterID"),
NodeID: chi.URLParam(r, "nodeID"),
})
if writeServiceError(w, err) {
return
}
httpx.WriteJSON(w, http.StatusOK, map[string]any{"node_bridge_replay_plan": item})
}
func (m *Module) getEffectiveNodeTestingFlags(w http.ResponseWriter, r *http.Request) {
item, err := m.service.GetEffectiveNodeTestingFlags(r.Context(), chi.URLParam(r, "clusterID"), chi.URLParam(r, "nodeID"))
if writeServiceError(w, err) {
@@ -3386,6 +3411,7 @@ func writeServiceError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
var legacyRemovalBlocked *LegacyRemovalBlockedError
switch {
case errors.Is(err, ErrAccessDenied):
httpx.WriteError(w, http.StatusForbidden, err.Error())
@@ -3393,6 +3419,12 @@ func writeServiceError(w http.ResponseWriter, err error) bool {
httpx.WriteError(w, http.StatusForbidden, err.Error())
case errors.Is(err, ErrClusterReadOnly):
httpx.WriteError(w, http.StatusConflict, err.Error())
case errors.As(err, &legacyRemovalBlocked):
httpx.WriteErrorMessage(w, http.StatusConflict, httpx.ErrorResponse{
Error: httpx.NewErrorMessage(http.StatusConflict, err.Error(), legacyRemovalBlockedErrorDetails(*legacyRemovalBlocked), ""),
})
case errors.Is(err, ErrLegacyRemovalBlocked):
httpx.WriteError(w, http.StatusConflict, err.Error())
case errors.Is(err, ErrVPNLeaseAlreadyActive):
httpx.WriteError(w, http.StatusConflict, err.Error())
case errors.Is(err, ErrInvalidPayload), errors.Is(err, ErrInvalidJoinToken), errors.Is(err, ErrInvalidNodeRole):
@@ -3404,3 +3436,37 @@ func writeServiceError(w http.ResponseWriter, err error) bool {
}
return true
}
func legacyRemovalBlockedErrorDetails(err LegacyRemovalBlockedError) map[string]any {
details := map[string]any{
"blocked_operation": err.BlockedOperation,
"legacy_removal_allowed": err.Report.LegacyRemovalAllowed,
"bridge_hold_required": err.Report.BridgeHoldRequired,
"bridge_hold_reasons": err.Report.BridgeHoldReasons,
"blocked_operations": err.Report.BlockedOperations,
"heartbeat_stale_after_seconds": err.Report.HeartbeatStaleAfterSeconds,
"stale_nodes": err.Report.Summary.StaleNodes,
"blocked_nodes": err.Report.Summary.BlockedNodes,
"artifact_gap_nodes": err.Report.Summary.ArtifactGapNodes,
"unknown_profile_nodes": err.Report.Summary.UnknownProfileNodes,
"waiting_update_status_nodes": err.Report.Summary.WaitingUpdateStatusNodes,
"unknown_version_nodes": err.Report.Summary.UnknownVersionNodes,
"legacy_recovery_contract_nodes": err.Report.Summary.LegacyRecoveryContractNodes,
"recovery_bridge_required_nodes": err.Report.Summary.RecoveryBridgeRequiredNodes,
"recovery_bridge_replay_ready_nodes": err.Report.Summary.RecoveryBridgeReplayReadyNodes,
"waiting_recovery_heartbeat_nodes": err.Report.Summary.WaitingRecoveryHeartbeatNodes,
}
blockedNodeIDs := make([]string, 0, len(err.Report.Nodes))
for _, node := range err.Report.Nodes {
if node.Blocked {
blockedNodeIDs = append(blockedNodeIDs, node.NodeID)
}
}
if len(blockedNodeIDs) > 0 {
details["blocked_node_ids"] = blockedNodeIDs
}
if len(err.Report.BridgeHoldNodeIDs) > 0 {
details["bridge_hold_node_ids"] = err.Report.BridgeHoldNodeIDs
}
return details
}
@@ -0,0 +1,68 @@
package cluster
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
)
func TestWriteServiceErrorLegacyRemovalBlockedIncludesBreakdownDetails(t *testing.T) {
recorder := httptest.NewRecorder()
handled := writeServiceError(recorder, &LegacyRemovalBlockedError{
BlockedOperation: "create_breaking_release",
Report: StaleNodeRiskReport{
HeartbeatStaleAfterSeconds: 900,
LegacyRemovalAllowed: false,
BridgeHoldRequired: true,
BridgeHoldNodeIDs: []string{"node-1"},
BridgeHoldReasons: []string{"legacy_contract_overlap"},
BlockedOperations: []string{"create_breaking_release", "target_breaking_update_policy", "remove_recovery_bridge_overlap"},
Nodes: []StaleNodeRiskNode{
{NodeID: "node-1", Blocked: true, RecoveryBridgeRequired: true},
{NodeID: "node-2", Blocked: false},
},
Summary: StaleNodeRiskSummary{
StaleNodes: 1,
BlockedNodes: 1,
ArtifactGapNodes: 0,
UnknownProfileNodes: 0,
WaitingUpdateStatusNodes: 0,
UnknownVersionNodes: 0,
LegacyRecoveryContractNodes: 0,
WaitingRecoveryHeartbeatNodes: 1,
},
},
})
if !handled {
t.Fatalf("writeServiceError returned false")
}
if recorder.Code != http.StatusConflict {
t.Fatalf("status = %d, want %d", recorder.Code, http.StatusConflict)
}
var payload struct {
Error struct {
Details map[string]any `json:"details"`
} `json:"error"`
}
if err := json.Unmarshal(recorder.Body.Bytes(), &payload); err != nil {
t.Fatalf("unmarshal response: %v", err)
}
if payload.Error.Details["blocked_operation"] != "create_breaking_release" {
t.Fatalf("blocked_operation = %v", payload.Error.Details["blocked_operation"])
}
if payload.Error.Details["waiting_recovery_heartbeat_nodes"] != float64(1) {
t.Fatalf("waiting_recovery_heartbeat_nodes = %v", payload.Error.Details["waiting_recovery_heartbeat_nodes"])
}
if payload.Error.Details["bridge_hold_required"] != true {
t.Fatalf("bridge_hold_required = %v", payload.Error.Details["bridge_hold_required"])
}
blockedNodeIDs, ok := payload.Error.Details["blocked_node_ids"].([]any)
if !ok || len(blockedNodeIDs) != 1 || blockedNodeIDs[0] != "node-1" {
t.Fatalf("blocked_node_ids = %#v", payload.Error.Details["blocked_node_ids"])
}
bridgeHoldNodeIDs, ok := payload.Error.Details["bridge_hold_node_ids"].([]any)
if !ok || len(bridgeHoldNodeIDs) != 1 || bridgeHoldNodeIDs[0] != "node-1" {
t.Fatalf("bridge_hold_node_ids = %#v", payload.Error.Details["bridge_hold_node_ids"])
}
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -158,6 +158,7 @@ func (m *Module) bootstrapEnrollment(w http.ResponseWriter, r *http.Request) {
func (m *Module) registerAgent(w http.ResponseWriter, r *http.Request) {
var payload struct {
ClusterID string `json:"cluster_id"`
NodeKey string `json:"node_key"`
Name string `json:"name"`
OwnershipType string `json:"ownership_type"`
@@ -197,6 +198,19 @@ func (m *Module) registerAgent(w http.ResponseWriter, r *http.Request) {
httpx.WriteError(w, http.StatusInternalServerError, err.Error())
return
}
if payload.ClusterID != "" {
if _, err := m.db.Exec(r.Context(), `
INSERT INTO cluster_memberships (cluster_id, node_id, membership_status, joined_at, last_seen_at, metadata)
VALUES ($1::uuid, $2::uuid, 'active', $3, $3, $4::jsonb)
ON CONFLICT (cluster_id, node_id) DO UPDATE SET
membership_status = 'active',
last_seen_at = EXCLUDED.last_seen_at,
metadata = cluster_memberships.metadata || EXCLUDED.metadata
`, payload.ClusterID, nodeID, now, []byte(`{"source":"fabric_control_candidate_registration"}`)); err != nil {
httpx.WriteError(w, http.StatusInternalServerError, err.Error())
return
}
}
httpx.WriteJSON(w, http.StatusOK, map[string]any{
"node_id": nodeID,
"status": "registered",