1
This commit is contained in:
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -58,6 +59,14 @@ func main() {
|
||||
if err := runUpdateLoop(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("update-loop failed: %v", err)
|
||||
}
|
||||
case "monitor-loop":
|
||||
if err := runMonitorLoop(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("monitor-loop failed: %v", err)
|
||||
}
|
||||
case "monitor-once":
|
||||
if err := runMonitorOnce(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("monitor-once failed: %v", err)
|
||||
}
|
||||
case "install-updater":
|
||||
if err := runInstallUpdater(ctx, os.Args[2:]); err != nil {
|
||||
log.Fatalf("install-updater failed: %v", err)
|
||||
@@ -288,6 +297,9 @@ func runInstall(ctx context.Context, args []string) error {
|
||||
return err
|
||||
}
|
||||
fmt.Print(result.Unit)
|
||||
if result.MonitorUnit != "" {
|
||||
fmt.Print(result.MonitorUnit)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -304,7 +316,7 @@ func runInstall(ctx context.Context, args []string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t\n", serviceResult.UnitName, serviceResult.UnitPath, serviceResult.BinaryPath, serviceResult.Started)
|
||||
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t monitor_service=%s\n", serviceResult.UnitName, serviceResult.UnitPath, serviceResult.BinaryPath, serviceResult.Started, serviceResult.MonitorUnitName)
|
||||
}
|
||||
fmt.Println("next: approve the join request in the platform admin panel, then the node-agent will finish bootstrap and start heartbeats")
|
||||
return nil
|
||||
@@ -429,6 +441,75 @@ func runUpdateLoop(ctx context.Context, args []string) error {
|
||||
return (hostagent.DockerManager{}).RunUpdateLoop(ctx, cfg)
|
||||
}
|
||||
|
||||
func runMonitorLoop(ctx context.Context, args []string) error {
|
||||
cfg, err := parseMonitor(args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return hostagent.RunMonitorLoop(ctx, cfg)
|
||||
}
|
||||
|
||||
func runMonitorOnce(ctx context.Context, args []string) error {
|
||||
cfg, err := parseMonitor(args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cfg.MaxRuns = 1
|
||||
result := hostagent.RunMonitorOnce(ctx, cfg)
|
||||
if err := json.NewEncoder(os.Stdout).Encode(result); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseMonitor(args []string) (hostagent.MonitorConfig, error) {
|
||||
fs := flag.NewFlagSet("monitor-loop", flag.ContinueOnError)
|
||||
cfg := hostagent.MonitorConfig{}
|
||||
var intervalSeconds int
|
||||
var initialDelaySeconds int
|
||||
var maxRuns int
|
||||
var restartCooldownSeconds int
|
||||
var staleRestartingSeconds int
|
||||
var tmpMinAgeMinutes int
|
||||
watchContainers := repeatedFlag{}
|
||||
fs.StringVar(&cfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL used for monitor status reports.")
|
||||
fs.StringVar(&cfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&cfg.NodeID, "node-id", getenv("RAP_NODE_ID", ""), "Already enrolled node ID.")
|
||||
fs.StringVar(&cfg.StateDir, "state-dir", getenv("RAP_NODE_STATE_DIR", hostagent.DefaultStateDir), "Host path containing node-agent identity.json.")
|
||||
fs.StringVar(&cfg.Product, "product", getenv("RAP_MONITOR_PRODUCT", hostagent.DefaultMonitorProduct), "Status product name.")
|
||||
fs.StringVar(&cfg.CurrentVersion, "current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Current rap-host-agent version.")
|
||||
fs.StringVar(&cfg.DockerBinary, "docker-binary", getenv("RAP_DOCKER_BINARY", "docker"), "Docker CLI binary.")
|
||||
fs.StringVar(&cfg.DiskPath, "disk-path", getenv("RAP_MONITOR_DISK_PATH", "/"), "Filesystem path used for disk usage checks.")
|
||||
fs.StringVar(&cfg.TmpDir, "tmp-dir", getenv("RAP_MONITOR_TMP_DIR", "/tmp"), "Temporary directory cleaned under pressure.")
|
||||
fs.StringVar(&cfg.StatusFile, "status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written after every run.")
|
||||
fs.IntVar(&intervalSeconds, "interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
|
||||
fs.IntVar(&initialDelaySeconds, "initial-delay-seconds", getenvInt("RAP_MONITOR_INITIAL_DELAY_SECONDS", 0), "Seconds to wait before first monitor check.")
|
||||
fs.IntVar(&maxRuns, "max-runs", getenvInt("RAP_MONITOR_MAX_RUNS", 0), "Maximum monitor iterations. Use 0 to run until stopped.")
|
||||
fs.IntVar(&cfg.DiskWarnPercent, "disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
|
||||
fs.IntVar(&cfg.DiskCleanupPercent, "disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
|
||||
fs.IntVar(&cfg.DiskCriticalPercent, "disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
|
||||
fs.IntVar(&restartCooldownSeconds, "restart-cooldown-seconds", getenvInt("RAP_MONITOR_RESTART_COOLDOWN_SECONDS", hostagent.DefaultMonitorRestartCooldownSec), "Minimum seconds between repeated restarts of the same target.")
|
||||
fs.IntVar(&staleRestartingSeconds, "stale-restarting-seconds", getenvInt("RAP_MONITOR_STALE_RESTARTING_SECONDS", hostagent.DefaultMonitorStaleRestartingSec), "Seconds after which docker restarting state is considered stuck.")
|
||||
fs.IntVar(&tmpMinAgeMinutes, "tmp-min-age-minutes", getenvInt("RAP_MONITOR_TMP_MIN_AGE_MINUTES", hostagent.DefaultMonitorTmpMinAgeMinutes), "Minimum age for /tmp rap-* and go-build* cleanup.")
|
||||
fs.BoolVar(&cfg.RestartContainers, "restart-containers", getenvBool("RAP_MONITOR_RESTART_CONTAINERS", true), "Start/restart watched containers when they are stopped, unhealthy, or stuck restarting.")
|
||||
fs.BoolVar(&cfg.CleanupDocker, "cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
|
||||
fs.Var(&watchContainers, "watch-container", "Docker container to watch and heal; may be repeated.")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return hostagent.MonitorConfig{}, err
|
||||
}
|
||||
cfg.WatchContainers = watchContainers
|
||||
cfg.Interval = time.Duration(intervalSeconds) * time.Second
|
||||
cfg.InitialDelay = time.Duration(initialDelaySeconds) * time.Second
|
||||
cfg.MaxRuns = maxRuns
|
||||
cfg.RestartCooldown = time.Duration(restartCooldownSeconds) * time.Second
|
||||
cfg.StaleRestartingAfter = time.Duration(staleRestartingSeconds) * time.Second
|
||||
cfg.TmpMinAge = time.Duration(tmpMinAgeMinutes) * time.Minute
|
||||
cfg.Logf = func(format string, args ...any) {
|
||||
fmt.Printf(format+"\n", args...)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func firstNonEmptyLocal(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
@@ -444,6 +525,8 @@ func runInstallUpdater(ctx context.Context, args []string) error {
|
||||
service := hostagent.UpdateServiceConfig{}
|
||||
var dryRun bool
|
||||
var selfUpdater bool
|
||||
var monitorEnabled bool
|
||||
monitorContainers := repeatedFlag{}
|
||||
fs.StringVar(&runtimeCfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL.")
|
||||
fs.StringVar(&runtimeCfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&runtimeCfg.ContainerName, "container-name", getenv("RAP_NODE_AGENT_CONTAINER", hostagent.DefaultContainerName), "Docker container name to update.")
|
||||
@@ -456,6 +539,14 @@ func runInstallUpdater(ctx context.Context, args []string) error {
|
||||
fs.IntVar(&service.HealthTimeoutSec, "health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated container running-state timeout in seconds.")
|
||||
fs.StringVar(&service.BinaryInstallPath, "binary-path", getenv("RAP_HOST_AGENT_BINARY_PATH", hostagent.DefaultHostAgentInstallPath), "Persistent host path for rap-host-agent binary used by the service.")
|
||||
fs.BoolVar(&selfUpdater, "self-updater-enabled", getenvBool("RAP_HOST_AGENT_SELF_UPDATE_ENABLED", true), "Install and start one global host-agent binary self-updater service.")
|
||||
fs.BoolVar(&monitorEnabled, "monitor-enabled", getenvBool("RAP_HOST_AGENT_MONITOR_ENABLED", true), "Install and start the local host monitor service.")
|
||||
fs.IntVar(&service.MonitorIntervalSec, "monitor-interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
|
||||
fs.StringVar(&service.MonitorStatusFile, "monitor-status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written by the monitor.")
|
||||
fs.IntVar(&service.MonitorDiskWarn, "monitor-disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
|
||||
fs.IntVar(&service.MonitorDiskCleanup, "monitor-disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
|
||||
fs.IntVar(&service.MonitorDiskCritical, "monitor-disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
|
||||
fs.BoolVar(&service.MonitorCleanupDocker, "monitor-cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
|
||||
fs.Var(&monitorContainers, "monitor-container", "Extra Docker container watched by monitor; may be repeated.")
|
||||
fs.BoolVar(&dryRun, "dry-run", false, "Print the systemd unit without installing it.")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return err
|
||||
@@ -465,6 +556,8 @@ func runInstallUpdater(ctx context.Context, args []string) error {
|
||||
service.DryRun = dryRun
|
||||
service.InstallSelfUpdater = selfUpdater
|
||||
service.SelfUpdateVersion = agent.Version
|
||||
service.InstallMonitor = monitorEnabled
|
||||
service.MonitorContainers = monitorContainers
|
||||
result, err := (hostagent.DockerManager{}).InstallUpdateService(ctx, service)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -474,9 +567,12 @@ func runInstallUpdater(ctx context.Context, args []string) error {
|
||||
if result.SelfUnit != "" {
|
||||
fmt.Print(result.SelfUnit)
|
||||
}
|
||||
if result.MonitorUnit != "" {
|
||||
fmt.Print(result.MonitorUnit)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t self_updater=%s\n", result.UnitName, result.UnitPath, result.BinaryPath, result.Started, result.SelfUnitName)
|
||||
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t self_updater=%s monitor_service=%s\n", result.UnitName, result.UnitPath, result.BinaryPath, result.Started, result.SelfUnitName, result.MonitorUnitName)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -572,6 +668,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
||||
var installToken string
|
||||
var autoUpdateEnabled bool
|
||||
autoUpdate := hostagent.UpdateServiceConfig{}
|
||||
monitorContainers := repeatedFlag{}
|
||||
fs.StringVar(&cfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL.")
|
||||
fs.StringVar(&cfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
|
||||
fs.StringVar(&cfg.JoinToken, "join-token", getenv("RAP_JOIN_TOKEN", ""), "One-time join token for first enrollment.")
|
||||
@@ -591,6 +688,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
||||
fs.BoolVar(&dryRun, "dry-run", false, "Print the docker command with secrets redacted.")
|
||||
fs.BoolVar(&autoUpdateEnabled, "auto-update-enabled", getenvBool("RAP_AUTO_UPDATE_ENABLED", true), "Install and start the local update-loop service.")
|
||||
fs.BoolVar(&autoUpdate.InstallSelfUpdater, "host-agent-self-update-enabled", getenvBool("RAP_HOST_AGENT_SELF_UPDATE_ENABLED", true), "Install and start one global host-agent binary self-updater service.")
|
||||
fs.BoolVar(&autoUpdate.InstallMonitor, "host-agent-monitor-enabled", getenvBool("RAP_HOST_AGENT_MONITOR_ENABLED", true), "Install and start the local host monitor service.")
|
||||
fs.StringVar(&autoUpdate.CurrentVersion, "auto-update-current-version", getenv("RAP_NODE_AGENT_VERSION", agent.Version), "Initial node-agent version used by update-loop before the first successful update.")
|
||||
fs.StringVar(&autoUpdate.SelfUpdateVersion, "host-agent-current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Initial host-agent binary version used by the self-updater.")
|
||||
fs.StringVar(&autoUpdate.Channel, "auto-update-channel", getenv("RAP_UPDATE_CHANNEL", ""), "Optional update channel override for update-loop.")
|
||||
@@ -599,6 +697,12 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
||||
fs.Float64Var(&autoUpdate.Jitter, "auto-update-jitter", getenvFloat("RAP_UPDATE_JITTER", 0.15), "Update-loop interval jitter, 0..1.")
|
||||
fs.IntVar(&autoUpdate.HealthTimeoutSec, "auto-update-health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated container running-state timeout in seconds.")
|
||||
fs.StringVar(&autoUpdate.BinaryInstallPath, "auto-update-binary-path", getenv("RAP_HOST_AGENT_BINARY_PATH", hostagent.DefaultHostAgentInstallPath), "Persistent host path for rap-host-agent binary used by the service.")
|
||||
fs.IntVar(&autoUpdate.MonitorIntervalSec, "monitor-interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
|
||||
fs.StringVar(&autoUpdate.MonitorStatusFile, "monitor-status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written by the monitor.")
|
||||
fs.IntVar(&autoUpdate.MonitorDiskWarn, "monitor-disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
|
||||
fs.IntVar(&autoUpdate.MonitorDiskCleanup, "monitor-disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
|
||||
fs.IntVar(&autoUpdate.MonitorDiskCritical, "monitor-disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
|
||||
fs.BoolVar(&autoUpdate.MonitorCleanupDocker, "monitor-cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
|
||||
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
|
||||
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
|
||||
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
|
||||
@@ -622,12 +726,14 @@ func parseInstall(args []string) (installCommandConfig, error) {
|
||||
fs.Var(&extraEnv, "env", "Extra KEY=VALUE env passed to node-agent container; may be repeated.")
|
||||
fs.Var(&extraRunArg, "docker-run-arg", "Extra raw docker run argument; may be repeated.")
|
||||
fs.Var(&imageArtifactURL, "image-artifact-url", "Docker image tar artifact URL to docker load before running; may be repeated.")
|
||||
fs.Var(&monitorContainers, "monitor-container", "Extra Docker container watched by monitor; may be repeated.")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return installCommandConfig{}, err
|
||||
}
|
||||
cfg.ExtraEnv = extraEnv
|
||||
cfg.AdditionalDockerRunArgs = extraRunArg
|
||||
cfg.ImageArtifactURLs = append(cfg.ImageArtifactURLs, imageArtifactURL...)
|
||||
autoUpdate.MonitorContainers = monitorContainers
|
||||
if strings.TrimSpace(profileURL) != "" || strings.TrimSpace(installToken) != "" {
|
||||
profile, err := hostagent.FetchDockerInstallProfile(context.Background(), hostagent.ProfileRequest{
|
||||
URL: profileURL,
|
||||
@@ -738,6 +844,8 @@ func usage() {
|
||||
rap-host-agent install-updater -backend-url URL -cluster-id ID -state-dir DIR -container-name NAME
|
||||
rap-host-agent update-host-agent -backend-url URL -cluster-id ID -state-dir DIR
|
||||
rap-host-agent update-host-agent-loop -backend-url URL -cluster-id ID -state-dir DIR
|
||||
rap-host-agent monitor-loop -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent monitor-once -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
|
||||
rap-host-agent update -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent update-loop -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
|
||||
rap-host-agent status [-container-name NAME]`)
|
||||
|
||||
@@ -222,6 +222,11 @@ type NodeVPNAssignmentLeaseRenewRequest struct {
|
||||
TTLSeconds int `json:"ttl_seconds"`
|
||||
}
|
||||
|
||||
type NodeVPNAssignmentLeaseAcquireRequest struct {
|
||||
TTLSeconds int `json:"ttl_seconds"`
|
||||
Metadata map[string]any `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
type MeshLinkObservationRequest struct {
|
||||
SourceNodeID string `json:"source_node_id"`
|
||||
TargetNodeID string `json:"target_node_id"`
|
||||
@@ -658,6 +663,17 @@ func (c *Client) ReportNodeVPNAssignmentStatus(ctx context.Context, clusterID, n
|
||||
return c.postJSON(ctx, path, request, nil)
|
||||
}
|
||||
|
||||
func (c *Client) AcquireNodeVPNAssignmentLease(ctx context.Context, clusterID, nodeID, vpnConnectionID string, request NodeVPNAssignmentLeaseAcquireRequest) (*NodeVPNAssignmentLease, error) {
|
||||
var response struct {
|
||||
Lease NodeVPNAssignmentLease `json:"lease"`
|
||||
}
|
||||
path := fmt.Sprintf("/clusters/%s/nodes/%s/vpn/assignments/%s/lease/acquire", clusterID, nodeID, vpnConnectionID)
|
||||
if err := c.postJSON(ctx, path, request, &response); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &response.Lease, nil
|
||||
}
|
||||
|
||||
func (c *Client) RenewNodeVPNAssignmentLease(ctx context.Context, clusterID, nodeID, vpnConnectionID, leaseID string, request NodeVPNAssignmentLeaseRenewRequest) error {
|
||||
path := fmt.Sprintf("/clusters/%s/nodes/%s/vpn/assignments/%s/lease/%s/renew", clusterID, nodeID, vpnConnectionID, leaseID)
|
||||
return c.postJSON(ctx, path, request, nil)
|
||||
|
||||
@@ -40,6 +40,10 @@ type Config struct {
|
||||
MeshSyntheticConfigPath string
|
||||
MeshPeerEndpointsJSON string
|
||||
MeshSyntheticRoutesJSON string
|
||||
RemoteWorkspaceRealAdapterEnabled bool
|
||||
RemoteWorkspaceRealAdapterCommand string
|
||||
RemoteWorkspaceRealAdapterArgsJSON string
|
||||
RemoteWorkspaceRealAdapterWorkDir string
|
||||
}
|
||||
|
||||
func Load(args []string, env map[string]string) (Config, error) {
|
||||
@@ -73,6 +77,10 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
|
||||
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
|
||||
fs.StringVar(&cfg.MeshSyntheticRoutesJSON, "mesh-synthetic-routes-json", getEnv(env, "RAP_MESH_SYNTHETIC_ROUTES_JSON", ""), "JSON array of synthetic mesh routes for test-only runtime.")
|
||||
fs.BoolVar(&cfg.RemoteWorkspaceRealAdapterEnabled, "remote-workspace-real-adapter-enabled", getEnvBool(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", false), "Request future real remote workspace adapter supervision. Disabled until the real runtime stage is implemented.")
|
||||
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterCommand, "remote-workspace-real-adapter-command", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", ""), "Future real remote workspace adapter command path. Redacted from status payloads.")
|
||||
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterArgsJSON, "remote-workspace-real-adapter-args-json", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", ""), "Future real remote workspace adapter args JSON. Redacted from status payloads.")
|
||||
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterWorkDir, "remote-workspace-real-adapter-workdir", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR", ""), "Future real remote workspace adapter working directory. Redacted from status payloads.")
|
||||
heartbeatSeconds := getEnvInt(env, "RAP_HEARTBEAT_INTERVAL_SECONDS", 15)
|
||||
fs.DurationVar(&cfg.HeartbeatInterval, "heartbeat-interval", time.Duration(heartbeatSeconds)*time.Second, "Heartbeat interval.")
|
||||
enrollmentPollIntervalSeconds := getEnvInt(env, "RAP_ENROLLMENT_POLL_INTERVAL_SECONDS", 5)
|
||||
@@ -100,6 +108,9 @@ func Load(args []string, env map[string]string) (Config, error) {
|
||||
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
|
||||
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
|
||||
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
|
||||
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
|
||||
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
|
||||
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
|
||||
if cfg.BackendURL == "" {
|
||||
return Config{}, errors.New("backend URL is required")
|
||||
}
|
||||
|
||||
@@ -34,6 +34,10 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
|
||||
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
|
||||
"RAP_MESH_SYNTHETIC_ROUTES_JSON": `[{"route_id":"route-1"}]`,
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED": "true",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND": " /opt/rap/bin/rdp-worker ",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON": ` ["--future-probe"] `,
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR": " /var/lib/rap-node-agent/rdp-worker ",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
@@ -85,6 +89,12 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
|
||||
if cfg.MeshPeerEndpointsJSON == "" || cfg.MeshSyntheticRoutesJSON == "" {
|
||||
t.Fatalf("mesh live synthetic config was not loaded: %+v", cfg)
|
||||
}
|
||||
if !cfg.RemoteWorkspaceRealAdapterEnabled ||
|
||||
cfg.RemoteWorkspaceRealAdapterCommand != "/opt/rap/bin/rdp-worker" ||
|
||||
cfg.RemoteWorkspaceRealAdapterArgsJSON != `["--future-probe"]` ||
|
||||
cfg.RemoteWorkspaceRealAdapterWorkDir != "/var/lib/rap-node-agent/rdp-worker" {
|
||||
t.Fatalf("unexpected remote workspace real adapter config: %+v", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
|
||||
@@ -98,6 +108,12 @@ func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
|
||||
if cfg.EnrollmentPollTimeout != 0 {
|
||||
t.Fatalf("EnrollmentPollTimeout = %s, want no timeout", cfg.EnrollmentPollTimeout)
|
||||
}
|
||||
if cfg.RemoteWorkspaceRealAdapterEnabled ||
|
||||
cfg.RemoteWorkspaceRealAdapterCommand != "" ||
|
||||
cfg.RemoteWorkspaceRealAdapterArgsJSON != "" ||
|
||||
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
|
||||
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
//go:build !windows
|
||||
|
||||
package hostagent
|
||||
|
||||
import "syscall"
|
||||
|
||||
func diskUsage(path string) (DiskUsage, error) {
|
||||
var stat syscall.Statfs_t
|
||||
if err := syscall.Statfs(path, &stat); err != nil {
|
||||
return DiskUsage{}, err
|
||||
}
|
||||
total := stat.Blocks * uint64(stat.Bsize)
|
||||
free := stat.Bavail * uint64(stat.Bsize)
|
||||
used := total - free
|
||||
percent := 0
|
||||
if total > 0 {
|
||||
percent = int((used*100 + total - 1) / total)
|
||||
}
|
||||
return DiskUsage{
|
||||
Path: path,
|
||||
TotalBytes: total,
|
||||
FreeBytes: free,
|
||||
UsedBytes: used,
|
||||
UsedPercent: percent,
|
||||
AvailablePercent: 100 - percent,
|
||||
}, nil
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
//go:build windows
|
||||
|
||||
package hostagent
|
||||
|
||||
import "fmt"
|
||||
|
||||
func diskUsage(path string) (DiskUsage, error) {
|
||||
return DiskUsage{Path: path}, fmt.Errorf("disk usage monitor is not implemented on windows")
|
||||
}
|
||||
@@ -0,0 +1,494 @@
|
||||
package hostagent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultMonitorProduct = "rap-host-agent"
|
||||
DefaultMonitorPhase = "host_monitor"
|
||||
DefaultMonitorIntervalSeconds = 60
|
||||
DefaultMonitorDiskWarnPercent = 80
|
||||
DefaultMonitorDiskCleanupPercent = 85
|
||||
DefaultMonitorDiskCriticalPercent = 95
|
||||
DefaultMonitorRestartCooldownSec = 300
|
||||
DefaultMonitorTmpMinAgeMinutes = 240
|
||||
DefaultMonitorStaleRestartingSec = 180
|
||||
DefaultMonitorDockerBinary = "docker"
|
||||
DefaultMonitorDiskPath = "/"
|
||||
DefaultMonitorTmpDir = "/tmp"
|
||||
DefaultMonitorStatusSchemaVersion = "rap.host_monitor_status.v1"
|
||||
DefaultMonitorRemediationSucceeded = "remediated"
|
||||
)
|
||||
|
||||
type MonitorConfig struct {
|
||||
BackendURL string
|
||||
ClusterID string
|
||||
NodeID string
|
||||
StateDir string
|
||||
Product string
|
||||
CurrentVersion string
|
||||
Interval time.Duration
|
||||
InitialDelay time.Duration
|
||||
MaxRuns int
|
||||
DockerBinary string
|
||||
WatchContainers []string
|
||||
RestartContainers bool
|
||||
RestartCooldown time.Duration
|
||||
StaleRestartingAfter time.Duration
|
||||
DiskPath string
|
||||
TmpDir string
|
||||
DiskWarnPercent int
|
||||
DiskCleanupPercent int
|
||||
DiskCriticalPercent int
|
||||
TmpMinAge time.Duration
|
||||
CleanupDocker bool
|
||||
StatusFile string
|
||||
Runner CommandRunner
|
||||
Logf func(format string, args ...any)
|
||||
restartHistory map[string]time.Time
|
||||
}
|
||||
|
||||
type DiskUsage struct {
|
||||
Path string `json:"path"`
|
||||
TotalBytes uint64 `json:"total_bytes"`
|
||||
FreeBytes uint64 `json:"free_bytes"`
|
||||
UsedBytes uint64 `json:"used_bytes"`
|
||||
UsedPercent int `json:"used_percent"`
|
||||
AvailablePercent int `json:"available_percent"`
|
||||
}
|
||||
|
||||
type MonitorContainerStatus struct {
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Running bool `json:"running"`
|
||||
Restarting bool `json:"restarting"`
|
||||
ExitCode int `json:"exit_code,omitempty"`
|
||||
Health string `json:"health,omitempty"`
|
||||
RestartCount int `json:"restart_count,omitempty"`
|
||||
StartedAt string `json:"started_at,omitempty"`
|
||||
FinishedAt string `json:"finished_at,omitempty"`
|
||||
LastAction string `json:"last_action,omitempty"`
|
||||
LastActionOK bool `json:"last_action_ok,omitempty"`
|
||||
LastActionError string `json:"last_action_error,omitempty"`
|
||||
}
|
||||
|
||||
type MonitorAction struct {
|
||||
Kind string `json:"kind"`
|
||||
Target string `json:"target,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type MonitorResult struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
Status string `json:"status"`
|
||||
ObservedAt time.Time `json:"observed_at"`
|
||||
Disk *DiskUsage `json:"disk,omitempty"`
|
||||
Containers []MonitorContainerStatus `json:"containers,omitempty"`
|
||||
Actions []MonitorAction `json:"actions,omitempty"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
type monitorDockerInspect struct {
|
||||
Name string `json:"Name"`
|
||||
RestartCount int `json:"RestartCount"`
|
||||
State struct {
|
||||
Status string `json:"Status"`
|
||||
Running bool `json:"Running"`
|
||||
Restarting bool `json:"Restarting"`
|
||||
ExitCode int `json:"ExitCode"`
|
||||
Error string `json:"Error"`
|
||||
StartedAt string `json:"StartedAt"`
|
||||
FinishedAt string `json:"FinishedAt"`
|
||||
Health *struct {
|
||||
Status string `json:"Status"`
|
||||
} `json:"Health"`
|
||||
} `json:"State"`
|
||||
}
|
||||
|
||||
func RunMonitorLoop(ctx context.Context, cfg MonitorConfig) error {
|
||||
cfg = normalizeMonitorConfig(cfg)
|
||||
if cfg.InitialDelay > 0 {
|
||||
if err := sleepContext(ctx, cfg.InitialDelay); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
runs := 0
|
||||
restartHistory := map[string]time.Time{}
|
||||
for {
|
||||
cfg.restartHistory = restartHistory
|
||||
result := RunMonitorOnce(ctx, cfg)
|
||||
logMonitorResult(cfg, result)
|
||||
if err := writeMonitorStatusFile(cfg.StatusFile, result); err != nil && cfg.Logf != nil {
|
||||
cfg.Logf("monitor status-file failed: %v", err)
|
||||
}
|
||||
if err := reportMonitorStatus(ctx, cfg, result); err != nil && cfg.Logf != nil {
|
||||
cfg.Logf("monitor report failed: %v", err)
|
||||
}
|
||||
runs++
|
||||
if cfg.MaxRuns > 0 && runs >= cfg.MaxRuns {
|
||||
return nil
|
||||
}
|
||||
if err := sleepContext(ctx, cfg.Interval); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RunMonitorOnce(ctx context.Context, cfg MonitorConfig) MonitorResult {
|
||||
cfg = normalizeMonitorConfig(cfg)
|
||||
result := MonitorResult{
|
||||
SchemaVersion: DefaultMonitorStatusSchemaVersion,
|
||||
Status: "ok",
|
||||
ObservedAt: time.Now().UTC(),
|
||||
}
|
||||
if usage, err := diskUsage(cfg.DiskPath); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("disk usage %s: %v", cfg.DiskPath, err))
|
||||
} else {
|
||||
result.Disk = &usage
|
||||
if usage.UsedPercent >= cfg.DiskWarnPercent {
|
||||
result.Status = "warning"
|
||||
}
|
||||
if usage.UsedPercent >= cfg.DiskCleanupPercent {
|
||||
runCleanup(ctx, cfg, &result, fmt.Sprintf("disk_used_%d_percent", usage.UsedPercent))
|
||||
if refreshed, err := diskUsage(cfg.DiskPath); err == nil {
|
||||
result.Disk = &refreshed
|
||||
}
|
||||
}
|
||||
if result.Disk != nil && result.Disk.UsedPercent >= cfg.DiskCriticalPercent {
|
||||
result.Status = "failed"
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("disk %s critical: %d%% used", cfg.DiskPath, result.Disk.UsedPercent))
|
||||
}
|
||||
}
|
||||
for _, name := range uniqueTrimmed(cfg.WatchContainers) {
|
||||
status := inspectMonitorContainer(ctx, cfg, name)
|
||||
if cfg.RestartContainers {
|
||||
remediateMonitorContainer(ctx, cfg, &status, &result)
|
||||
}
|
||||
if !status.Running || status.Health == "unhealthy" || status.Restarting || status.LastActionError != "" {
|
||||
if result.Status == "ok" {
|
||||
result.Status = "warning"
|
||||
}
|
||||
}
|
||||
result.Containers = append(result.Containers, status)
|
||||
}
|
||||
for _, action := range result.Actions {
|
||||
if !action.Success {
|
||||
result.Status = "failed"
|
||||
if action.Error != "" {
|
||||
result.Errors = append(result.Errors, action.Error)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func normalizeMonitorConfig(cfg MonitorConfig) MonitorConfig {
|
||||
cfg.BackendURL = strings.TrimRight(strings.TrimSpace(cfg.BackendURL), "/")
|
||||
cfg.ClusterID = strings.TrimSpace(cfg.ClusterID)
|
||||
cfg.NodeID = strings.TrimSpace(cfg.NodeID)
|
||||
cfg.StateDir = strings.TrimSpace(cfg.StateDir)
|
||||
cfg.Product = firstNonEmpty(cfg.Product, DefaultMonitorProduct)
|
||||
if cfg.Interval <= 0 {
|
||||
cfg.Interval = time.Duration(DefaultMonitorIntervalSeconds) * time.Second
|
||||
}
|
||||
if cfg.DockerBinary == "" {
|
||||
cfg.DockerBinary = DefaultMonitorDockerBinary
|
||||
}
|
||||
if cfg.DiskPath == "" {
|
||||
cfg.DiskPath = DefaultMonitorDiskPath
|
||||
}
|
||||
if cfg.TmpDir == "" {
|
||||
cfg.TmpDir = DefaultMonitorTmpDir
|
||||
}
|
||||
if cfg.DiskWarnPercent == 0 {
|
||||
cfg.DiskWarnPercent = DefaultMonitorDiskWarnPercent
|
||||
}
|
||||
if cfg.DiskCleanupPercent == 0 {
|
||||
cfg.DiskCleanupPercent = DefaultMonitorDiskCleanupPercent
|
||||
}
|
||||
if cfg.DiskCriticalPercent == 0 {
|
||||
cfg.DiskCriticalPercent = DefaultMonitorDiskCriticalPercent
|
||||
}
|
||||
if cfg.RestartCooldown == 0 {
|
||||
cfg.RestartCooldown = time.Duration(DefaultMonitorRestartCooldownSec) * time.Second
|
||||
}
|
||||
if cfg.StaleRestartingAfter == 0 {
|
||||
cfg.StaleRestartingAfter = time.Duration(DefaultMonitorStaleRestartingSec) * time.Second
|
||||
}
|
||||
if cfg.TmpMinAge == 0 {
|
||||
cfg.TmpMinAge = time.Duration(DefaultMonitorTmpMinAgeMinutes) * time.Minute
|
||||
}
|
||||
if cfg.Runner == nil {
|
||||
cfg.Runner = ExecRunner{}
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func inspectMonitorContainer(ctx context.Context, cfg MonitorConfig, name string) MonitorContainerStatus {
|
||||
out := MonitorContainerStatus{Name: name}
|
||||
raw, err := cfg.Runner.Run(ctx, cfg.DockerBinary, "inspect", name)
|
||||
if err != nil {
|
||||
out.LastActionError = strings.TrimSpace(err.Error())
|
||||
return out
|
||||
}
|
||||
var inspected []monitorDockerInspect
|
||||
if err := json.Unmarshal([]byte(raw), &inspected); err != nil {
|
||||
out.LastActionError = fmt.Sprintf("parse docker inspect: %v", err)
|
||||
return out
|
||||
}
|
||||
if len(inspected) == 0 {
|
||||
out.LastActionError = "docker inspect returned no containers"
|
||||
return out
|
||||
}
|
||||
item := inspected[0]
|
||||
out.Name = strings.TrimPrefix(firstNonEmpty(item.Name, name), "/")
|
||||
out.Status = item.State.Status
|
||||
out.Running = item.State.Running
|
||||
out.Restarting = item.State.Restarting
|
||||
out.ExitCode = item.State.ExitCode
|
||||
out.RestartCount = item.RestartCount
|
||||
out.StartedAt = item.State.StartedAt
|
||||
out.FinishedAt = item.State.FinishedAt
|
||||
if item.State.Health != nil {
|
||||
out.Health = strings.TrimSpace(item.State.Health.Status)
|
||||
}
|
||||
if item.State.Error != "" {
|
||||
out.LastActionError = item.State.Error
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func remediateMonitorContainer(ctx context.Context, cfg MonitorConfig, status *MonitorContainerStatus, result *MonitorResult) {
|
||||
if status.Name == "" {
|
||||
return
|
||||
}
|
||||
action := ""
|
||||
reason := ""
|
||||
switch {
|
||||
case status.LastActionError != "" && status.Status == "":
|
||||
action = "start"
|
||||
reason = "inspect_failed_or_missing"
|
||||
case status.Health == "unhealthy":
|
||||
action = "restart"
|
||||
reason = "health_unhealthy"
|
||||
case status.Restarting && restartingIsStale(status.StartedAt, status.FinishedAt, cfg.StaleRestartingAfter):
|
||||
action = "restart"
|
||||
reason = "restarting_stale"
|
||||
case !status.Running && status.Status != "":
|
||||
action = "start"
|
||||
reason = "not_running"
|
||||
default:
|
||||
return
|
||||
}
|
||||
if cfg.restartHistory != nil {
|
||||
if last, ok := cfg.restartHistory[status.Name]; ok && time.Since(last) < cfg.RestartCooldown {
|
||||
result.Actions = append(result.Actions, MonitorAction{
|
||||
Kind: "docker_" + action + "_skipped",
|
||||
Target: status.Name,
|
||||
Reason: "restart_cooldown",
|
||||
Success: true,
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
args := []string{action, status.Name}
|
||||
_, err := cfg.Runner.Run(ctx, cfg.DockerBinary, args...)
|
||||
monitorAction := MonitorAction{Kind: "docker_" + action, Target: status.Name, Reason: reason, Success: err == nil}
|
||||
status.LastAction = action
|
||||
status.LastActionOK = err == nil
|
||||
if err != nil {
|
||||
monitorAction.Error = strings.TrimSpace(err.Error())
|
||||
status.LastActionError = monitorAction.Error
|
||||
} else {
|
||||
if cfg.restartHistory != nil {
|
||||
cfg.restartHistory[status.Name] = time.Now()
|
||||
}
|
||||
status.LastActionError = ""
|
||||
status.Running = true
|
||||
status.Restarting = false
|
||||
status.Status = DefaultMonitorRemediationSucceeded
|
||||
}
|
||||
result.Actions = append(result.Actions, monitorAction)
|
||||
}
|
||||
|
||||
func restartingIsStale(startedAt, finishedAt string, threshold time.Duration) bool {
|
||||
for _, value := range []string{finishedAt, startedAt} {
|
||||
parsed, err := time.Parse(time.RFC3339Nano, strings.TrimSpace(value))
|
||||
if err == nil && !parsed.IsZero() {
|
||||
return time.Since(parsed) >= threshold
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func runCleanup(ctx context.Context, cfg MonitorConfig, result *MonitorResult, reason string) {
|
||||
if cfg.CleanupDocker {
|
||||
for _, args := range [][]string{
|
||||
{"builder", "prune", "-af"},
|
||||
{"image", "prune", "-f"},
|
||||
{"container", "prune", "-f"},
|
||||
} {
|
||||
_, err := cfg.Runner.Run(ctx, cfg.DockerBinary, args...)
|
||||
action := MonitorAction{Kind: "docker_" + strings.Join(args[:len(args)-1], "_"), Reason: reason, Success: err == nil}
|
||||
if err != nil {
|
||||
action.Error = strings.TrimSpace(err.Error())
|
||||
}
|
||||
result.Actions = append(result.Actions, action)
|
||||
}
|
||||
}
|
||||
removed, err := cleanupTmpBuildDirs(cfg.TmpDir, cfg.TmpMinAge)
|
||||
action := MonitorAction{Kind: "tmp_cleanup", Target: cfg.TmpDir, Reason: reason, Success: err == nil}
|
||||
if err != nil {
|
||||
action.Error = err.Error()
|
||||
} else {
|
||||
action.Target = fmt.Sprintf("%s removed=%d", cfg.TmpDir, removed)
|
||||
}
|
||||
result.Actions = append(result.Actions, action)
|
||||
}
|
||||
|
||||
func cleanupTmpBuildDirs(tmpDir string, minAge time.Duration) (int, error) {
|
||||
tmpDir = filepath.Clean(strings.TrimSpace(tmpDir))
|
||||
if tmpDir == "" || tmpDir == "." || tmpDir == string(filepath.Separator) {
|
||||
return 0, fmt.Errorf("unsafe tmp dir: %q", tmpDir)
|
||||
}
|
||||
entries, err := os.ReadDir(tmpDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
now := time.Now()
|
||||
removed := 0
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if !strings.HasPrefix(name, "rap-") && !strings.HasPrefix(name, "go-build") {
|
||||
continue
|
||||
}
|
||||
info, err := entry.Info()
|
||||
if err != nil || now.Sub(info.ModTime()) < minAge {
|
||||
continue
|
||||
}
|
||||
if err := os.RemoveAll(filepath.Join(tmpDir, name)); err != nil {
|
||||
return removed, err
|
||||
}
|
||||
removed++
|
||||
}
|
||||
return removed, nil
|
||||
}
|
||||
|
||||
func reportMonitorStatus(ctx context.Context, cfg MonitorConfig, result MonitorResult) error {
|
||||
cfg = normalizeMonitorConfig(cfg)
|
||||
nodeID, clusterID, err := resolveMonitorIdentity(cfg)
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrNodeIdentityNotReady) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if cfg.BackendURL == "" || clusterID == "" || nodeID == "" {
|
||||
return nil
|
||||
}
|
||||
payload := map[string]any{
|
||||
"schema_version": result.SchemaVersion,
|
||||
"monitor_status": result.Status,
|
||||
"disk": result.Disk,
|
||||
"containers": result.Containers,
|
||||
"actions": result.Actions,
|
||||
"errors": result.Errors,
|
||||
}
|
||||
errText := ""
|
||||
if len(result.Errors) > 0 {
|
||||
errText = strings.Join(result.Errors, "; ")
|
||||
}
|
||||
req := NodeUpdateStatusRequest{
|
||||
Product: cfg.Product,
|
||||
CurrentVersion: cfg.CurrentVersion,
|
||||
Phase: DefaultMonitorPhase,
|
||||
Status: result.Status,
|
||||
Payload: payload,
|
||||
ObservedAt: result.ObservedAt,
|
||||
}
|
||||
if errText != "" {
|
||||
req.ErrorMessage = &errText
|
||||
}
|
||||
return ReportNodeUpdateStatus(ctx, cfg.BackendURL, clusterID, nodeID, req)
|
||||
}
|
||||
|
||||
func resolveMonitorIdentity(cfg MonitorConfig) (string, string, error) {
|
||||
nodeID := strings.TrimSpace(cfg.NodeID)
|
||||
clusterID := strings.TrimSpace(cfg.ClusterID)
|
||||
if nodeID != "" {
|
||||
return nodeID, clusterID, nil
|
||||
}
|
||||
if strings.TrimSpace(cfg.StateDir) == "" {
|
||||
return "", clusterID, ErrNodeIdentityNotReady
|
||||
}
|
||||
identity, err := state.Load(filepath.Join(cfg.StateDir, state.FileName))
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return "", clusterID, ErrNodeIdentityNotReady
|
||||
}
|
||||
return "", clusterID, err
|
||||
}
|
||||
nodeID = strings.TrimSpace(identity.NodeID)
|
||||
if nodeID == "" {
|
||||
return "", clusterID, ErrNodeIdentityNotReady
|
||||
}
|
||||
if clusterID == "" {
|
||||
clusterID = strings.TrimSpace(identity.ClusterID)
|
||||
}
|
||||
return nodeID, clusterID, nil
|
||||
}
|
||||
|
||||
func writeMonitorStatusFile(path string, result MonitorResult) error {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
payload, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := path + ".tmp"
|
||||
if err := os.WriteFile(tmp, payload, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmp, path)
|
||||
}
|
||||
|
||||
func logMonitorResult(cfg MonitorConfig, result MonitorResult) {
|
||||
if cfg.Logf == nil {
|
||||
return
|
||||
}
|
||||
cfg.Logf("monitor status=%s containers=%d actions=%d errors=%d", result.Status, len(result.Containers), len(result.Actions), len(result.Errors))
|
||||
}
|
||||
|
||||
func uniqueTrimmed(values []string) []string {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]string, 0, len(values))
|
||||
for _, value := range values {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[value]; ok {
|
||||
continue
|
||||
}
|
||||
seen[value] = struct{}{}
|
||||
out = append(out, value)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
package hostagent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type monitorRunner struct {
|
||||
inspect map[string]string
|
||||
calls []string
|
||||
}
|
||||
|
||||
func (r *monitorRunner) Run(_ context.Context, name string, args ...string) (string, error) {
|
||||
call := strings.TrimSpace(name + " " + strings.Join(args, " "))
|
||||
r.calls = append(r.calls, call)
|
||||
if len(args) >= 2 && args[0] == "inspect" {
|
||||
out, ok := r.inspect[args[1]]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("not found")
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func TestRunMonitorOnceStartsExitedContainer(t *testing.T) {
|
||||
runner := &monitorRunner{inspect: map[string]string{
|
||||
"rap-node-agent": `[{"Name":"/rap-node-agent","State":{"Status":"exited","Running":false,"ExitCode":137,"StartedAt":"2026-05-13T00:00:00Z","FinishedAt":"2026-05-13T00:01:00Z"}}]`,
|
||||
}}
|
||||
result := RunMonitorOnce(context.Background(), MonitorConfig{
|
||||
WatchContainers: []string{"rap-node-agent"},
|
||||
RestartContainers: true,
|
||||
Runner: runner,
|
||||
DiskPath: t.TempDir(),
|
||||
DiskCleanupPercent: 101,
|
||||
DiskWarnPercent: 101,
|
||||
DiskCriticalPercent: 101,
|
||||
})
|
||||
if len(result.Actions) != 1 || result.Actions[0].Kind != "docker_start" || !result.Actions[0].Success {
|
||||
t.Fatalf("unexpected actions: %+v", result.Actions)
|
||||
}
|
||||
if !containsCall(runner.calls, "docker start rap-node-agent") {
|
||||
t.Fatalf("start call missing: %+v", runner.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMonitorOnceRestartsUnhealthyContainer(t *testing.T) {
|
||||
runner := &monitorRunner{inspect: map[string]string{
|
||||
"rap-backend": `[{"Name":"/rap-backend","State":{"Status":"running","Running":true,"StartedAt":"2026-05-13T00:00:00Z","Health":{"Status":"unhealthy"}}}]`,
|
||||
}}
|
||||
result := RunMonitorOnce(context.Background(), MonitorConfig{
|
||||
WatchContainers: []string{"rap-backend"},
|
||||
RestartContainers: true,
|
||||
Runner: runner,
|
||||
DiskPath: t.TempDir(),
|
||||
DiskCleanupPercent: 101,
|
||||
DiskWarnPercent: 101,
|
||||
DiskCriticalPercent: 101,
|
||||
})
|
||||
if len(result.Actions) != 1 || result.Actions[0].Kind != "docker_restart" || !result.Actions[0].Success {
|
||||
t.Fatalf("unexpected actions: %+v", result.Actions)
|
||||
}
|
||||
if !containsCall(runner.calls, "docker restart rap-backend") {
|
||||
t.Fatalf("restart call missing: %+v", runner.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestartingIsStale(t *testing.T) {
|
||||
if !restartingIsStale(time.Now().Add(-10*time.Minute).UTC().Format(time.RFC3339Nano), "", time.Minute) {
|
||||
t.Fatalf("old restarting container should be stale")
|
||||
}
|
||||
if restartingIsStale(time.Now().UTC().Format(time.RFC3339Nano), "", time.Hour) {
|
||||
t.Fatalf("fresh restarting container should not be stale")
|
||||
}
|
||||
}
|
||||
|
||||
func containsCall(calls []string, want string) bool {
|
||||
for _, call := range calls {
|
||||
if call == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -16,33 +16,44 @@ const (
|
||||
)
|
||||
|
||||
type UpdateServiceConfig struct {
|
||||
RuntimeConfig RuntimeConfig
|
||||
Product string
|
||||
CurrentVersion string
|
||||
Channel string
|
||||
IntervalSeconds int
|
||||
InitialDelaySeconds int
|
||||
Jitter float64
|
||||
HealthTimeoutSec int
|
||||
BinaryInstallPath string
|
||||
SourceBinaryPath string
|
||||
UnitDir string
|
||||
ManageSystemd bool
|
||||
DryRun bool
|
||||
InstallSelfUpdater bool
|
||||
SelfUpdateVersion string
|
||||
RuntimeConfig RuntimeConfig
|
||||
Product string
|
||||
CurrentVersion string
|
||||
Channel string
|
||||
IntervalSeconds int
|
||||
InitialDelaySeconds int
|
||||
Jitter float64
|
||||
HealthTimeoutSec int
|
||||
BinaryInstallPath string
|
||||
SourceBinaryPath string
|
||||
UnitDir string
|
||||
ManageSystemd bool
|
||||
DryRun bool
|
||||
InstallSelfUpdater bool
|
||||
SelfUpdateVersion string
|
||||
InstallMonitor bool
|
||||
MonitorIntervalSec int
|
||||
MonitorContainers []string
|
||||
MonitorStatusFile string
|
||||
MonitorDiskWarn int
|
||||
MonitorDiskCleanup int
|
||||
MonitorDiskCritical int
|
||||
MonitorCleanupDocker bool
|
||||
}
|
||||
|
||||
type UpdateServiceResult struct {
|
||||
Installed bool
|
||||
Started bool
|
||||
UnitName string
|
||||
UnitPath string
|
||||
BinaryPath string
|
||||
Unit string
|
||||
SelfUnitName string
|
||||
SelfUnitPath string
|
||||
SelfUnit string
|
||||
Installed bool
|
||||
Started bool
|
||||
UnitName string
|
||||
UnitPath string
|
||||
BinaryPath string
|
||||
Unit string
|
||||
SelfUnitName string
|
||||
SelfUnitPath string
|
||||
SelfUnit string
|
||||
MonitorUnitName string
|
||||
MonitorUnitPath string
|
||||
MonitorUnit string
|
||||
}
|
||||
|
||||
func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServiceConfig) (UpdateServiceResult, error) {
|
||||
@@ -59,6 +70,9 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
|
||||
if cfg.HealthTimeoutSec == 0 {
|
||||
cfg.HealthTimeoutSec = 30
|
||||
}
|
||||
if cfg.MonitorIntervalSec == 0 {
|
||||
cfg.MonitorIntervalSec = DefaultMonitorIntervalSeconds
|
||||
}
|
||||
cfg.BinaryInstallPath = firstNonEmpty(cfg.BinaryInstallPath, DefaultHostAgentInstallPath)
|
||||
cfg.UnitDir = firstNonEmpty(cfg.UnitDir, DefaultSystemdUnitDir)
|
||||
unitName := "rap-host-agent-updater-" + safeUnitSlug(cfg.RuntimeConfig.ContainerName) + ".service"
|
||||
@@ -82,6 +96,15 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
|
||||
result.SelfUnitName = selfUnitName
|
||||
result.SelfUnitPath = selfUnitPath
|
||||
}
|
||||
if cfg.InstallMonitor {
|
||||
monitorUnit, monitorUnitName, monitorUnitPath, err := buildHostAgentMonitorUnit(cfg)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
result.MonitorUnit = monitorUnit
|
||||
result.MonitorUnitName = monitorUnitName
|
||||
result.MonitorUnitPath = monitorUnitPath
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
if runtime.GOOS != "linux" && cfg.UnitDir == DefaultSystemdUnitDir {
|
||||
@@ -108,6 +131,18 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
|
||||
result.SelfUnitName = selfUnitName
|
||||
result.SelfUnitPath = selfUnitPath
|
||||
}
|
||||
if cfg.InstallMonitor {
|
||||
monitorUnit, monitorUnitName, monitorUnitPath, err := buildHostAgentMonitorUnit(cfg)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
if err := os.WriteFile(monitorUnitPath, []byte(monitorUnit), 0o644); err != nil {
|
||||
return result, err
|
||||
}
|
||||
result.MonitorUnit = monitorUnit
|
||||
result.MonitorUnitName = monitorUnitName
|
||||
result.MonitorUnitPath = monitorUnitPath
|
||||
}
|
||||
result.Installed = true
|
||||
if cfg.ManageSystemd {
|
||||
runner := m.Runner
|
||||
@@ -125,6 +160,11 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
if cfg.InstallMonitor && result.MonitorUnitName != "" {
|
||||
if _, err := runner.Run(ctx, "systemctl", "enable", "--now", result.MonitorUnitName); err != nil {
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
result.Started = true
|
||||
}
|
||||
return result, nil
|
||||
@@ -223,6 +263,64 @@ WantedBy=multi-user.target
|
||||
`, systemdJoin(args)), unitName, unitPath, nil
|
||||
}
|
||||
|
||||
func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string, error) {
|
||||
runtimeCfg := cfg.RuntimeConfig.Normalize()
|
||||
if runtimeCfg.BackendURL == "" || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
|
||||
return "", "", "", fmt.Errorf("backend-url, cluster-id, and state-dir are required for host monitor")
|
||||
}
|
||||
containers := uniqueTrimmed(append([]string{runtimeCfg.ContainerName}, cfg.MonitorContainers...))
|
||||
if len(containers) == 0 {
|
||||
return "", "", "", fmt.Errorf("at least one monitor container is required")
|
||||
}
|
||||
unitName := "rap-host-agent-monitor-" + safeUnitSlug(runtimeCfg.ContainerName) + ".service"
|
||||
unitPath := filepath.Join(firstNonEmpty(cfg.UnitDir, DefaultSystemdUnitDir), unitName)
|
||||
args := []string{
|
||||
cfg.BinaryInstallPath,
|
||||
"monitor-loop",
|
||||
"--backend-url", runtimeCfg.BackendURL,
|
||||
"--cluster-id", runtimeCfg.ClusterID,
|
||||
"--state-dir", runtimeCfg.StateDir,
|
||||
"--current-version", firstNonEmpty(cfg.SelfUpdateVersion, cfg.CurrentVersion),
|
||||
"--interval-seconds", fmt.Sprintf("%d", firstNonZero(cfg.MonitorIntervalSec, DefaultMonitorIntervalSeconds)),
|
||||
"--disk-warn-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskWarn, DefaultMonitorDiskWarnPercent)),
|
||||
"--disk-cleanup-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCleanup, DefaultMonitorDiskCleanupPercent)),
|
||||
"--disk-critical-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCritical, DefaultMonitorDiskCriticalPercent)),
|
||||
}
|
||||
if cfg.MonitorCleanupDocker {
|
||||
args = append(args, "--cleanup-docker")
|
||||
}
|
||||
if strings.TrimSpace(cfg.MonitorStatusFile) != "" {
|
||||
args = append(args, "--status-file", strings.TrimSpace(cfg.MonitorStatusFile))
|
||||
}
|
||||
for _, container := range containers {
|
||||
args = append(args, "--watch-container", container)
|
||||
}
|
||||
return fmt.Sprintf(`[Unit]
|
||||
Description=RAP host-agent monitor for %s
|
||||
After=network-online.target docker.service
|
||||
Wants=network-online.target
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=%s
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
`, runtimeCfg.ContainerName, systemdJoin(args)), unitName, unitPath, nil
|
||||
}
|
||||
|
||||
func firstNonZero(values ...int) int {
|
||||
for _, value := range values {
|
||||
if value != 0 {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func installHostAgentBinary(sourcePath, targetPath string) error {
|
||||
sourcePath = strings.TrimSpace(sourcePath)
|
||||
targetPath = strings.TrimSpace(targetPath)
|
||||
|
||||
@@ -24,15 +24,18 @@ func TestInstallUpdateServiceWritesSystemdUnit(t *testing.T) {
|
||||
ContainerName: "rap-node-agent-node-a",
|
||||
StateDir: "/var/lib/rap/nodes/node-a",
|
||||
},
|
||||
CurrentVersion: "0.1.0-current",
|
||||
IntervalSeconds: 60,
|
||||
Jitter: 0.2,
|
||||
SourceBinaryPath: source,
|
||||
BinaryInstallPath: binaryPath,
|
||||
UnitDir: unitDir,
|
||||
ManageSystemd: false,
|
||||
InstallSelfUpdater: true,
|
||||
SelfUpdateVersion: "0.1.0-host",
|
||||
CurrentVersion: "0.1.0-current",
|
||||
IntervalSeconds: 60,
|
||||
Jitter: 0.2,
|
||||
SourceBinaryPath: source,
|
||||
BinaryInstallPath: binaryPath,
|
||||
UnitDir: unitDir,
|
||||
ManageSystemd: false,
|
||||
InstallSelfUpdater: true,
|
||||
SelfUpdateVersion: "0.1.0-host",
|
||||
InstallMonitor: true,
|
||||
MonitorContainers: []string{"rap-test-backend"},
|
||||
MonitorCleanupDocker: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("install update service: %v", err)
|
||||
@@ -73,6 +76,25 @@ func TestInstallUpdateServiceWritesSystemdUnit(t *testing.T) {
|
||||
if text := string(selfUnit); !strings.Contains(text, "update-host-agent-loop") || !strings.Contains(text, "--current-version 0.1.0-host") {
|
||||
t.Fatalf("unexpected self unit:\n%s", text)
|
||||
}
|
||||
if result.MonitorUnitName == "" || result.MonitorUnitPath == "" {
|
||||
t.Fatalf("monitor result = %+v", result)
|
||||
}
|
||||
monitorUnit, err := os.ReadFile(result.MonitorUnitPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read monitor unit: %v", err)
|
||||
}
|
||||
monitorText := string(monitorUnit)
|
||||
for _, want := range []string{
|
||||
"monitor-loop",
|
||||
"--watch-container rap-node-agent-node-a",
|
||||
"--watch-container rap-test-backend",
|
||||
"--cleanup-docker",
|
||||
"Restart=always",
|
||||
} {
|
||||
if !strings.Contains(monitorText, want) {
|
||||
t.Fatalf("monitor unit missing %q:\n%s", want, monitorText)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindowsHostAgentUpdateScriptTargetsWindowsService(t *testing.T) {
|
||||
|
||||
@@ -313,6 +313,9 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
|
||||
cfg.ClusterID = firstNonEmpty(cfg.ClusterID, req.ClusterID)
|
||||
cfg.ContainerName = req.ContainerName
|
||||
cfg.Image = artifactImage(*plan.Artifact, cfg.Image)
|
||||
if artifactDockerVPNGatewayEnabled(*plan.Artifact) {
|
||||
cfg.DockerVPNGatewayEnabled = true
|
||||
}
|
||||
cfg.ImageArtifactURLs = artifactURLsForBackend(*plan.Artifact, req.BackendURL)
|
||||
cfg.ImageArtifactSHA256 = plan.Artifact.SHA256
|
||||
cfg.ImageArtifactSizeBytes = plan.Artifact.SizeBytes
|
||||
@@ -681,6 +684,20 @@ func artifactImage(artifact ReleaseArtifact, fallback string) string {
|
||||
return firstNonEmpty(fallback, DefaultImage)
|
||||
}
|
||||
|
||||
func artifactDockerVPNGatewayEnabled(artifact ReleaseArtifact) bool {
|
||||
if len(artifact.Metadata) == 0 {
|
||||
return false
|
||||
}
|
||||
var metadata struct {
|
||||
DockerVPNGatewayEnabled bool `json:"docker_vpn_gateway_enabled"`
|
||||
VPNGatewayEnabled bool `json:"vpn_gateway_enabled"`
|
||||
}
|
||||
if err := json.Unmarshal(artifact.Metadata, &metadata); err != nil {
|
||||
return false
|
||||
}
|
||||
return metadata.DockerVPNGatewayEnabled || metadata.VPNGatewayEnabled
|
||||
}
|
||||
|
||||
func artifactURLs(artifact ReleaseArtifact) []string {
|
||||
out := make([]string, 0, 1+len(artifact.URLs))
|
||||
for _, raw := range append([]string{artifact.URL}, artifact.URLs...) {
|
||||
|
||||
@@ -596,6 +596,18 @@ func TestArtifactImageDerivesDockerTagFromProductAndVersion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestArtifactDockerVPNGatewayEnabledFromMetadata(t *testing.T) {
|
||||
if !artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"docker_vpn_gateway_enabled":true}`)}) {
|
||||
t.Fatal("expected docker vpn gateway metadata to enable gateway runtime")
|
||||
}
|
||||
if !artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"vpn_gateway_enabled":true}`)}) {
|
||||
t.Fatal("expected legacy vpn gateway metadata to enable gateway runtime")
|
||||
}
|
||||
if artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"docker_vpn_gateway_enabled":false}`)}) {
|
||||
t.Fatal("expected disabled metadata to remain disabled")
|
||||
}
|
||||
}
|
||||
|
||||
func serverArtifactURL(r *http.Request) string {
|
||||
scheme := "http"
|
||||
if r.TLS != nil {
|
||||
|
||||
@@ -16,119 +16,176 @@ const DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity = 32
|
||||
const RemoteWorkspaceFrameProbeSinkRuntimeID = "node_agent_rdp_worker_contract_probe"
|
||||
|
||||
type RemoteWorkspaceFrameProbeSink struct {
|
||||
mu sync.Mutex
|
||||
sequence int64
|
||||
queueCapacity int
|
||||
sessionTTL time.Duration
|
||||
sessions map[string]*remoteWorkspaceAdapterProbeSession
|
||||
terminalSessions map[string]remoteWorkspaceAdapterProbeTerminalSession
|
||||
sessionCreatedTotal int64
|
||||
sessionBoundTotal int64
|
||||
sessionBackpressureTotal int64
|
||||
sessionExpiredTotal int64
|
||||
sessionClosedTotal int64
|
||||
sessionResetTotal int64
|
||||
sessionControlTotal int64
|
||||
mailboxEventSequence int64
|
||||
mailboxEnqueuedTotal int64
|
||||
mailboxDrainedTotal int64
|
||||
mailboxDroppedTotal int64
|
||||
mailboxReadTotal int64
|
||||
mailboxWaitTotal int64
|
||||
mailboxWaitTimeoutTotal int64
|
||||
mailboxEmptyReadTotal int64
|
||||
mailboxResumeReadTotal int64
|
||||
mailboxAfterSequenceReadTotal int64
|
||||
mailboxReturnedTotal int64
|
||||
mailboxSkippedTotal int64
|
||||
mailboxConsumerReadTotal int64
|
||||
mailboxConsumerAckTotal int64
|
||||
mailboxConsumerResetTotal int64
|
||||
mailboxConsumerEvictedTotal int64
|
||||
lastMailboxReadAt string
|
||||
lastMailboxAdapterSessionID string
|
||||
lastMailboxWaitMs int
|
||||
lastMailboxWaited bool
|
||||
lastMailboxWaitTimeout bool
|
||||
lastMailboxEmpty bool
|
||||
lastMailboxResumeFrom string
|
||||
lastMailboxResumeSequence int64
|
||||
lastMailboxResumeConsumerID string
|
||||
lastMailboxAfterSequence int64
|
||||
lastMailboxSkippedCount int
|
||||
lastMailboxReturnedCount int
|
||||
lastMailboxConsumerID string
|
||||
lastMailboxConsumerAdapterSessionID string
|
||||
lastMailboxConsumerReadAt string
|
||||
lastMailboxConsumerAckAt string
|
||||
lastMailboxConsumerCheckpoint int64
|
||||
lastMailboxConsumerAck int64
|
||||
acceptedFramesTotal int64
|
||||
droppedFramesTotal int64
|
||||
ackedFramesTotal int64
|
||||
backpressureCount int64
|
||||
lastBackpressureAt string
|
||||
lastBackpressureReason string
|
||||
lastRejectedFrameCount int
|
||||
lastRejectedAdapterSessionID string
|
||||
lastRejectedChannelClass string
|
||||
lastRejectedAdapterContractID string
|
||||
lastRejectedQueueCapacity int
|
||||
lastRejectedQueueDepth int
|
||||
lastControl RemoteWorkspaceAdapterSessionControlResult
|
||||
last RemoteWorkspaceFrameBatchDeliveryReceipt
|
||||
mu sync.Mutex
|
||||
sequence int64
|
||||
queueCapacity int
|
||||
sessionTTL time.Duration
|
||||
sessions map[string]*remoteWorkspaceAdapterProbeSession
|
||||
terminalSessions map[string]remoteWorkspaceAdapterProbeTerminalSession
|
||||
sessionCreatedTotal int64
|
||||
sessionBoundTotal int64
|
||||
sessionBackpressureTotal int64
|
||||
sessionExpiredTotal int64
|
||||
sessionClosedTotal int64
|
||||
sessionResetTotal int64
|
||||
sessionControlTotal int64
|
||||
mailboxEventSequence int64
|
||||
mailboxEnqueuedTotal int64
|
||||
mailboxDrainedTotal int64
|
||||
mailboxDroppedTotal int64
|
||||
mailboxReadTotal int64
|
||||
mailboxWaitTotal int64
|
||||
mailboxWaitTimeoutTotal int64
|
||||
mailboxEmptyReadTotal int64
|
||||
mailboxResumeReadTotal int64
|
||||
mailboxAfterSequenceReadTotal int64
|
||||
mailboxReturnedTotal int64
|
||||
mailboxSkippedTotal int64
|
||||
mailboxPreflightTotal int64
|
||||
mailboxPreflightAckTotal int64
|
||||
mailboxPreflightCheckpointTotal int64
|
||||
mailboxConsumerReadTotal int64
|
||||
mailboxConsumerAckTotal int64
|
||||
mailboxConsumerResetTotal int64
|
||||
mailboxConsumerEvictedTotal int64
|
||||
lastMailboxReadAt string
|
||||
lastMailboxAdapterSessionID string
|
||||
lastMailboxWaitMs int
|
||||
lastMailboxWaited bool
|
||||
lastMailboxWaitTimeout bool
|
||||
lastMailboxEmpty bool
|
||||
lastMailboxResumeFrom string
|
||||
lastMailboxResumeSequence int64
|
||||
lastMailboxResumeConsumerID string
|
||||
lastMailboxAfterSequence int64
|
||||
lastMailboxSkippedCount int
|
||||
lastMailboxReturnedCount int
|
||||
lastMailboxPreflightAt string
|
||||
lastMailboxPreflightAdapterSessionID string
|
||||
lastMailboxPreflightConsumerID string
|
||||
lastMailboxPreflightResumeFrom string
|
||||
lastMailboxPreflightResumeSequence int64
|
||||
lastMailboxPreflightAfterSequence int64
|
||||
lastMailboxPreflightAvailableCount int
|
||||
lastMailboxPreflightReturnedCount int
|
||||
lastMailboxPreflightSkippedCount int
|
||||
lastMailboxPreflightFirstSequence int64
|
||||
lastMailboxPreflightLastSequence int64
|
||||
lastMailboxPreflightFirstRetained int64
|
||||
lastMailboxPreflightLastRetained int64
|
||||
lastMailboxPreflightMailboxDropped int64
|
||||
lastMailboxPreflightDiagnosticState string
|
||||
lastMailboxPreflightStaleCursor bool
|
||||
lastMailboxPreflightMissingDropped int
|
||||
lastMailboxPreflightRecommendedAction string
|
||||
lastMailboxPreflightActionHints []string
|
||||
lastMailboxPreflightActionReason string
|
||||
lastMailboxPreflightActionContext map[string]any
|
||||
lastMailboxPreflightOperatorSummary string
|
||||
lastMailboxPreflightOperatorStatus string
|
||||
lastMailboxPreflightOperatorSeverity string
|
||||
lastMailboxPreflightOperatorFields map[string]any
|
||||
lastMailboxConsumerID string
|
||||
lastMailboxConsumerAdapterSessionID string
|
||||
lastMailboxConsumerReadAt string
|
||||
lastMailboxConsumerAckAt string
|
||||
lastMailboxConsumerCheckpoint int64
|
||||
lastMailboxConsumerAck int64
|
||||
acceptedFramesTotal int64
|
||||
droppedFramesTotal int64
|
||||
ackedFramesTotal int64
|
||||
backpressureCount int64
|
||||
lastBackpressureAt string
|
||||
lastBackpressureReason string
|
||||
lastRejectedFrameCount int
|
||||
lastRejectedAdapterSessionID string
|
||||
lastRejectedChannelClass string
|
||||
lastRejectedAdapterContractID string
|
||||
lastRejectedQueueCapacity int
|
||||
lastRejectedQueueDepth int
|
||||
lastControl RemoteWorkspaceAdapterSessionControlResult
|
||||
last RemoteWorkspaceFrameBatchDeliveryReceipt
|
||||
}
|
||||
|
||||
type remoteWorkspaceAdapterProbeSession struct {
|
||||
ID string
|
||||
State string
|
||||
CreatedAt time.Time
|
||||
BoundAt time.Time
|
||||
LastActivityAt time.Time
|
||||
LastBackpressureAt time.Time
|
||||
ClosedAt time.Time
|
||||
DeliveryCount int64
|
||||
BackpressureCount int64
|
||||
AcceptedFrames int64
|
||||
DroppedFrames int64
|
||||
AckedFrames int64
|
||||
Mailbox []RemoteWorkspaceAdapterMailboxEvent
|
||||
MailboxEnqueued int64
|
||||
MailboxDrained int64
|
||||
MailboxDropped int64
|
||||
MailboxRead int64
|
||||
MailboxWait int64
|
||||
MailboxWaitTimeout int64
|
||||
MailboxEmptyRead int64
|
||||
MailboxResumeRead int64
|
||||
MailboxAfterSequenceRead int64
|
||||
MailboxReturnedTotal int64
|
||||
MailboxSkippedTotal int64
|
||||
MailboxConsumers map[string]*remoteWorkspaceAdapterMailboxConsumerState
|
||||
MailboxConsumerReadTotal int64
|
||||
MailboxConsumerAckTotal int64
|
||||
MailboxConsumerResetTotal int64
|
||||
MailboxConsumerEvictedTotal int64
|
||||
LastMailboxConsumerID string
|
||||
LastMailboxConsumerReadAt time.Time
|
||||
LastMailboxConsumerAckAt time.Time
|
||||
LastMailboxConsumerCheckpoint int64
|
||||
LastMailboxConsumerAck int64
|
||||
LastMailboxReadAt time.Time
|
||||
LastMailboxWaitMs int
|
||||
LastMailboxWaited bool
|
||||
LastMailboxTimeout bool
|
||||
LastMailboxEmpty bool
|
||||
LastMailboxResumeFrom string
|
||||
LastMailboxResumeSequence int64
|
||||
LastMailboxResumeConsumerID string
|
||||
LastMailboxAfterSequence int64
|
||||
LastMailboxSkippedCount int
|
||||
LastMailboxReturnedCount int
|
||||
LastChannelID string
|
||||
LastResourceID string
|
||||
LastRouteID string
|
||||
LastReason string
|
||||
ID string
|
||||
State string
|
||||
CreatedAt time.Time
|
||||
BoundAt time.Time
|
||||
LastActivityAt time.Time
|
||||
LastBackpressureAt time.Time
|
||||
ClosedAt time.Time
|
||||
DeliveryCount int64
|
||||
BackpressureCount int64
|
||||
AcceptedFrames int64
|
||||
DroppedFrames int64
|
||||
AckedFrames int64
|
||||
Mailbox []RemoteWorkspaceAdapterMailboxEvent
|
||||
MailboxEnqueued int64
|
||||
MailboxDrained int64
|
||||
MailboxDropped int64
|
||||
MailboxRead int64
|
||||
MailboxWait int64
|
||||
MailboxWaitTimeout int64
|
||||
MailboxEmptyRead int64
|
||||
MailboxResumeRead int64
|
||||
MailboxAfterSequenceRead int64
|
||||
MailboxReturnedTotal int64
|
||||
MailboxSkippedTotal int64
|
||||
MailboxPreflightTotal int64
|
||||
MailboxPreflightAckTotal int64
|
||||
MailboxPreflightCheckpointTotal int64
|
||||
MailboxPreflightOperatorStatusCounts map[string]int64
|
||||
MailboxPreflightOperatorSeverityCounts map[string]int64
|
||||
MailboxConsumers map[string]*remoteWorkspaceAdapterMailboxConsumerState
|
||||
MailboxConsumerReadTotal int64
|
||||
MailboxConsumerAckTotal int64
|
||||
MailboxConsumerResetTotal int64
|
||||
MailboxConsumerEvictedTotal int64
|
||||
LastMailboxConsumerID string
|
||||
LastMailboxConsumerReadAt time.Time
|
||||
LastMailboxConsumerAckAt time.Time
|
||||
LastMailboxConsumerCheckpoint int64
|
||||
LastMailboxConsumerAck int64
|
||||
LastMailboxReadAt time.Time
|
||||
LastMailboxWaitMs int
|
||||
LastMailboxWaited bool
|
||||
LastMailboxTimeout bool
|
||||
LastMailboxEmpty bool
|
||||
LastMailboxResumeFrom string
|
||||
LastMailboxResumeSequence int64
|
||||
LastMailboxResumeConsumerID string
|
||||
LastMailboxAfterSequence int64
|
||||
LastMailboxSkippedCount int
|
||||
LastMailboxReturnedCount int
|
||||
LastMailboxPreflightAt time.Time
|
||||
LastMailboxPreflightConsumerID string
|
||||
LastMailboxPreflightResumeFrom string
|
||||
LastMailboxPreflightResumeSequence int64
|
||||
LastMailboxPreflightAfterSequence int64
|
||||
LastMailboxPreflightAvailableCount int
|
||||
LastMailboxPreflightReturnedCount int
|
||||
LastMailboxPreflightSkippedCount int
|
||||
LastMailboxPreflightFirstSequence int64
|
||||
LastMailboxPreflightLastSequence int64
|
||||
LastMailboxPreflightFirstRetained int64
|
||||
LastMailboxPreflightLastRetained int64
|
||||
LastMailboxPreflightMailboxDropped int64
|
||||
LastMailboxPreflightDiagnosticState string
|
||||
LastMailboxPreflightStaleCursor bool
|
||||
LastMailboxPreflightMissingDropped int
|
||||
LastMailboxPreflightRecommendedAction string
|
||||
LastMailboxPreflightActionHints []string
|
||||
LastMailboxPreflightActionReason string
|
||||
LastMailboxPreflightActionContext map[string]any
|
||||
LastMailboxPreflightOperatorSummary string
|
||||
LastMailboxPreflightOperatorStatus string
|
||||
LastMailboxPreflightOperatorSeverity string
|
||||
LastMailboxPreflightOperatorFields map[string]any
|
||||
LastChannelID string
|
||||
LastResourceID string
|
||||
LastRouteID string
|
||||
LastReason string
|
||||
}
|
||||
|
||||
type remoteWorkspaceAdapterMailboxConsumerState struct {
|
||||
@@ -251,29 +308,43 @@ type RemoteWorkspaceAdapterMailboxConsumer struct {
|
||||
}
|
||||
|
||||
type RemoteWorkspaceAdapterMailboxPreflightSnapshot struct {
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
AdapterRuntimeID string `json:"adapter_runtime_id"`
|
||||
AdapterSessionID string `json:"adapter_session_id"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
ReadOnly bool `json:"read_only"`
|
||||
ConsumerID string `json:"consumer_id"`
|
||||
ResumeFrom string `json:"resume_from"`
|
||||
ResumeSequence int64 `json:"resume_sequence"`
|
||||
AfterSequence int64 `json:"after_sequence"`
|
||||
Limit int `json:"limit"`
|
||||
MailboxDepth int `json:"mailbox_depth"`
|
||||
MailboxEnqueued int64 `json:"mailbox_enqueued_total"`
|
||||
MailboxReadTotal int64 `json:"mailbox_read_total"`
|
||||
ConsumerReadTotal int64 `json:"consumer_read_total"`
|
||||
ConsumerAckTotal int64 `json:"consumer_ack_total"`
|
||||
ConsumerCheckpointSequence int64 `json:"consumer_checkpoint_sequence"`
|
||||
ConsumerAckSequence int64 `json:"consumer_ack_sequence"`
|
||||
ConsumerLagCount int `json:"consumer_lag_count"`
|
||||
ExpectedAvailableCount int `json:"expected_available_count"`
|
||||
ExpectedReturnedCount int `json:"expected_returned_count"`
|
||||
ExpectedSkippedCount int `json:"expected_skipped_count"`
|
||||
FirstExpectedSequence int64 `json:"first_expected_sequence,omitempty"`
|
||||
LastExpectedSequence int64 `json:"last_expected_sequence,omitempty"`
|
||||
SchemaVersion string `json:"schema_version"`
|
||||
AdapterRuntimeID string `json:"adapter_runtime_id"`
|
||||
AdapterSessionID string `json:"adapter_session_id"`
|
||||
ObservedAt string `json:"observed_at"`
|
||||
ReadOnly bool `json:"read_only"`
|
||||
ConsumerID string `json:"consumer_id"`
|
||||
ResumeFrom string `json:"resume_from"`
|
||||
ResumeSequence int64 `json:"resume_sequence"`
|
||||
AfterSequence int64 `json:"after_sequence"`
|
||||
Limit int `json:"limit"`
|
||||
MailboxDepth int `json:"mailbox_depth"`
|
||||
MailboxEnqueued int64 `json:"mailbox_enqueued_total"`
|
||||
MailboxDropped int64 `json:"mailbox_dropped_total"`
|
||||
MailboxReadTotal int64 `json:"mailbox_read_total"`
|
||||
ConsumerReadTotal int64 `json:"consumer_read_total"`
|
||||
ConsumerAckTotal int64 `json:"consumer_ack_total"`
|
||||
ConsumerCheckpointSequence int64 `json:"consumer_checkpoint_sequence"`
|
||||
ConsumerAckSequence int64 `json:"consumer_ack_sequence"`
|
||||
ConsumerLagCount int `json:"consumer_lag_count"`
|
||||
ExpectedAvailableCount int `json:"expected_available_count"`
|
||||
ExpectedReturnedCount int `json:"expected_returned_count"`
|
||||
ExpectedSkippedCount int `json:"expected_skipped_count"`
|
||||
FirstExpectedSequence int64 `json:"first_expected_sequence,omitempty"`
|
||||
LastExpectedSequence int64 `json:"last_expected_sequence,omitempty"`
|
||||
FirstRetainedSequence int64 `json:"first_retained_sequence,omitempty"`
|
||||
LastRetainedSequence int64 `json:"last_retained_sequence,omitempty"`
|
||||
DiagnosticState string `json:"diagnostic_state"`
|
||||
StaleCursor bool `json:"stale_cursor"`
|
||||
MissingDroppedCount int `json:"missing_dropped_count"`
|
||||
RecommendedAction string `json:"recommended_action"`
|
||||
ActionHints []string `json:"action_hints"`
|
||||
ActionReason string `json:"action_reason"`
|
||||
ActionContext map[string]any `json:"action_context"`
|
||||
OperatorSummary string `json:"operator_summary"`
|
||||
OperatorStatus string `json:"operator_status"`
|
||||
OperatorSeverity string `json:"operator_severity"`
|
||||
OperatorSummaryFields map[string]any `json:"operator_summary_fields"`
|
||||
}
|
||||
|
||||
type RemoteWorkspaceAdapterSessionSnapshot struct {
|
||||
@@ -646,11 +717,13 @@ func (s *RemoteWorkspaceFrameProbeSink) ensureSessionLocked(delivery RemoteWorks
|
||||
session := s.sessions[sessionID]
|
||||
if session == nil {
|
||||
session = &remoteWorkspaceAdapterProbeSession{
|
||||
ID: sessionID,
|
||||
State: "created",
|
||||
CreatedAt: now,
|
||||
LastActivityAt: now,
|
||||
MailboxConsumers: map[string]*remoteWorkspaceAdapterMailboxConsumerState{},
|
||||
ID: sessionID,
|
||||
State: "created",
|
||||
CreatedAt: now,
|
||||
LastActivityAt: now,
|
||||
MailboxConsumers: map[string]*remoteWorkspaceAdapterMailboxConsumerState{},
|
||||
MailboxPreflightOperatorStatusCounts: map[string]int64{},
|
||||
MailboxPreflightOperatorSeverityCounts: map[string]int64{},
|
||||
}
|
||||
s.sessions[sessionID] = session
|
||||
s.sessionCreatedTotal++
|
||||
@@ -1180,7 +1253,74 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
|
||||
firstExpected = session.Mailbox[startIndex].Sequence
|
||||
lastExpected = session.Mailbox[startIndex+returned-1].Sequence
|
||||
}
|
||||
return RemoteWorkspaceAdapterMailboxPreflightSnapshot{
|
||||
var firstRetained int64
|
||||
var lastRetained int64
|
||||
if len(session.Mailbox) > 0 {
|
||||
firstRetained = session.Mailbox[0].Sequence
|
||||
lastRetained = session.Mailbox[len(session.Mailbox)-1].Sequence
|
||||
}
|
||||
diagnosticState := "ready"
|
||||
staleCursor := false
|
||||
missingDropped := 0
|
||||
recommendedAction := "resume_from_cursor"
|
||||
actionHints := []string{"resume_from_requested_cursor"}
|
||||
actionReason := "cursor_window_available"
|
||||
if firstRetained > 0 && resumeSequence < firstRetained-1 {
|
||||
diagnosticState = "stale_cursor_gap"
|
||||
staleCursor = true
|
||||
missingDropped = int(firstRetained - resumeSequence - 1)
|
||||
recommendedAction = "reset_consumer_and_resync"
|
||||
actionHints = []string{"reset_consumer_cursor", "request_full_adapter_resync", "resume_from_checkpoint_after_resync"}
|
||||
actionReason = "consumer_cursor_before_first_retained_sequence"
|
||||
} else if returned == 0 {
|
||||
diagnosticState = "caught_up"
|
||||
recommendedAction = "wait_for_new_mailbox_events"
|
||||
actionHints = []string{"keep_consumer_cursor", "long_poll_after_sequence"}
|
||||
actionReason = "cursor_caught_up_to_retained_mailbox"
|
||||
}
|
||||
actionContext := map[string]any{
|
||||
"consumer_id": consumerID,
|
||||
"resume_from": resumeFrom,
|
||||
"resume_sequence": resumeSequence,
|
||||
"first_retained_sequence": firstRetained,
|
||||
"last_retained_sequence": lastRetained,
|
||||
"mailbox_depth": len(session.Mailbox),
|
||||
"mailbox_dropped_total": session.MailboxDropped,
|
||||
"missing_dropped_count": missingDropped,
|
||||
"expected_available_count": available,
|
||||
"expected_returned_count": returned,
|
||||
"expected_skipped_count": startIndex,
|
||||
"consumer_checkpoint_sequence": consumer.CheckpointSequence,
|
||||
"consumer_ack_sequence": consumer.AckSequence,
|
||||
}
|
||||
operatorSummary := "consumer cursor can resume from requested window"
|
||||
operatorStatus := "ready_to_resume"
|
||||
operatorSeverity := "ok"
|
||||
if diagnosticState == "stale_cursor_gap" {
|
||||
operatorSummary = "stale cursor gap: reset consumer and resync before resume"
|
||||
operatorStatus = "resync_required"
|
||||
operatorSeverity = "warn"
|
||||
} else if diagnosticState == "caught_up" {
|
||||
operatorSummary = "consumer cursor is caught up; wait for new mailbox events"
|
||||
operatorStatus = "caught_up"
|
||||
operatorSeverity = "info"
|
||||
}
|
||||
operatorSummaryFields := map[string]any{
|
||||
"diagnostic_state": diagnosticState,
|
||||
"recommended_action": recommendedAction,
|
||||
"action_reason": actionReason,
|
||||
"operator_status": operatorStatus,
|
||||
"operator_severity": operatorSeverity,
|
||||
"resume_from": resumeFrom,
|
||||
"resume_sequence": resumeSequence,
|
||||
"first_retained_sequence": firstRetained,
|
||||
"last_retained_sequence": lastRetained,
|
||||
"missing_dropped_count": missingDropped,
|
||||
"expected_available_count": available,
|
||||
"expected_returned_count": returned,
|
||||
"expected_skipped_count": startIndex,
|
||||
}
|
||||
snapshot := RemoteWorkspaceAdapterMailboxPreflightSnapshot{
|
||||
SchemaVersion: "rap.remote_workspace_adapter_mailbox_preflight.v1",
|
||||
AdapterRuntimeID: RemoteWorkspaceFrameProbeSinkRuntimeID,
|
||||
AdapterSessionID: adapterSessionID,
|
||||
@@ -1193,6 +1333,7 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
|
||||
Limit: limit,
|
||||
MailboxDepth: len(session.Mailbox),
|
||||
MailboxEnqueued: session.MailboxEnqueued,
|
||||
MailboxDropped: session.MailboxDropped,
|
||||
MailboxReadTotal: session.MailboxRead,
|
||||
ConsumerReadTotal: session.MailboxConsumerReadTotal,
|
||||
ConsumerAckTotal: session.MailboxConsumerAckTotal,
|
||||
@@ -1204,7 +1345,236 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
|
||||
ExpectedSkippedCount: startIndex,
|
||||
FirstExpectedSequence: firstExpected,
|
||||
LastExpectedSequence: lastExpected,
|
||||
}, nil
|
||||
FirstRetainedSequence: firstRetained,
|
||||
LastRetainedSequence: lastRetained,
|
||||
DiagnosticState: diagnosticState,
|
||||
StaleCursor: staleCursor,
|
||||
MissingDroppedCount: missingDropped,
|
||||
RecommendedAction: recommendedAction,
|
||||
ActionHints: actionHints,
|
||||
ActionReason: actionReason,
|
||||
ActionContext: actionContext,
|
||||
OperatorSummary: operatorSummary,
|
||||
OperatorStatus: operatorStatus,
|
||||
OperatorSeverity: operatorSeverity,
|
||||
OperatorSummaryFields: operatorSummaryFields,
|
||||
}
|
||||
s.recordAdapterSessionMailboxPreflightLocked(session, snapshot, now)
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
func (s *RemoteWorkspaceFrameProbeSink) recordAdapterSessionMailboxPreflightLocked(session *remoteWorkspaceAdapterProbeSession, snapshot RemoteWorkspaceAdapterMailboxPreflightSnapshot, now time.Time) {
|
||||
s.mailboxPreflightTotal++
|
||||
if snapshot.ResumeFrom == "ack" {
|
||||
s.mailboxPreflightAckTotal++
|
||||
}
|
||||
if snapshot.ResumeFrom == "checkpoint" {
|
||||
s.mailboxPreflightCheckpointTotal++
|
||||
}
|
||||
s.lastMailboxPreflightAt = now.Format(time.RFC3339Nano)
|
||||
s.lastMailboxPreflightAdapterSessionID = snapshot.AdapterSessionID
|
||||
s.lastMailboxPreflightConsumerID = snapshot.ConsumerID
|
||||
s.lastMailboxPreflightResumeFrom = snapshot.ResumeFrom
|
||||
s.lastMailboxPreflightResumeSequence = snapshot.ResumeSequence
|
||||
s.lastMailboxPreflightAfterSequence = snapshot.AfterSequence
|
||||
s.lastMailboxPreflightAvailableCount = snapshot.ExpectedAvailableCount
|
||||
s.lastMailboxPreflightReturnedCount = snapshot.ExpectedReturnedCount
|
||||
s.lastMailboxPreflightSkippedCount = snapshot.ExpectedSkippedCount
|
||||
s.lastMailboxPreflightFirstSequence = snapshot.FirstExpectedSequence
|
||||
s.lastMailboxPreflightLastSequence = snapshot.LastExpectedSequence
|
||||
s.lastMailboxPreflightFirstRetained = snapshot.FirstRetainedSequence
|
||||
s.lastMailboxPreflightLastRetained = snapshot.LastRetainedSequence
|
||||
s.lastMailboxPreflightMailboxDropped = snapshot.MailboxDropped
|
||||
s.lastMailboxPreflightDiagnosticState = snapshot.DiagnosticState
|
||||
s.lastMailboxPreflightStaleCursor = snapshot.StaleCursor
|
||||
s.lastMailboxPreflightMissingDropped = snapshot.MissingDroppedCount
|
||||
s.lastMailboxPreflightRecommendedAction = snapshot.RecommendedAction
|
||||
s.lastMailboxPreflightActionHints = append([]string(nil), snapshot.ActionHints...)
|
||||
s.lastMailboxPreflightActionReason = snapshot.ActionReason
|
||||
s.lastMailboxPreflightActionContext = cloneStringAnyMap(snapshot.ActionContext)
|
||||
s.lastMailboxPreflightOperatorSummary = snapshot.OperatorSummary
|
||||
s.lastMailboxPreflightOperatorStatus = snapshot.OperatorStatus
|
||||
s.lastMailboxPreflightOperatorSeverity = snapshot.OperatorSeverity
|
||||
s.lastMailboxPreflightOperatorFields = cloneStringAnyMap(snapshot.OperatorSummaryFields)
|
||||
if session == nil {
|
||||
return
|
||||
}
|
||||
session.MailboxPreflightTotal++
|
||||
if snapshot.ResumeFrom == "ack" {
|
||||
session.MailboxPreflightAckTotal++
|
||||
}
|
||||
if snapshot.ResumeFrom == "checkpoint" {
|
||||
session.MailboxPreflightCheckpointTotal++
|
||||
}
|
||||
incrementStringInt64Map(&session.MailboxPreflightOperatorStatusCounts, snapshot.OperatorStatus)
|
||||
incrementStringInt64Map(&session.MailboxPreflightOperatorSeverityCounts, snapshot.OperatorSeverity)
|
||||
session.LastMailboxPreflightAt = now
|
||||
session.LastMailboxPreflightConsumerID = snapshot.ConsumerID
|
||||
session.LastMailboxPreflightResumeFrom = snapshot.ResumeFrom
|
||||
session.LastMailboxPreflightResumeSequence = snapshot.ResumeSequence
|
||||
session.LastMailboxPreflightAfterSequence = snapshot.AfterSequence
|
||||
session.LastMailboxPreflightAvailableCount = snapshot.ExpectedAvailableCount
|
||||
session.LastMailboxPreflightReturnedCount = snapshot.ExpectedReturnedCount
|
||||
session.LastMailboxPreflightSkippedCount = snapshot.ExpectedSkippedCount
|
||||
session.LastMailboxPreflightFirstSequence = snapshot.FirstExpectedSequence
|
||||
session.LastMailboxPreflightLastSequence = snapshot.LastExpectedSequence
|
||||
session.LastMailboxPreflightFirstRetained = snapshot.FirstRetainedSequence
|
||||
session.LastMailboxPreflightLastRetained = snapshot.LastRetainedSequence
|
||||
session.LastMailboxPreflightMailboxDropped = snapshot.MailboxDropped
|
||||
session.LastMailboxPreflightDiagnosticState = snapshot.DiagnosticState
|
||||
session.LastMailboxPreflightStaleCursor = snapshot.StaleCursor
|
||||
session.LastMailboxPreflightMissingDropped = snapshot.MissingDroppedCount
|
||||
session.LastMailboxPreflightRecommendedAction = snapshot.RecommendedAction
|
||||
session.LastMailboxPreflightActionHints = append([]string(nil), snapshot.ActionHints...)
|
||||
session.LastMailboxPreflightActionReason = snapshot.ActionReason
|
||||
session.LastMailboxPreflightActionContext = cloneStringAnyMap(snapshot.ActionContext)
|
||||
session.LastMailboxPreflightOperatorSummary = snapshot.OperatorSummary
|
||||
session.LastMailboxPreflightOperatorStatus = snapshot.OperatorStatus
|
||||
session.LastMailboxPreflightOperatorSeverity = snapshot.OperatorSeverity
|
||||
session.LastMailboxPreflightOperatorFields = cloneStringAnyMap(snapshot.OperatorSummaryFields)
|
||||
}
|
||||
|
||||
func cloneStringAnyMap(source map[string]any) map[string]any {
|
||||
if source == nil {
|
||||
return nil
|
||||
}
|
||||
clone := make(map[string]any, len(source))
|
||||
for key, value := range source {
|
||||
clone[key] = value
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
func cloneStringInt64Map(source map[string]int64) map[string]int64 {
|
||||
if source == nil {
|
||||
return nil
|
||||
}
|
||||
clone := make(map[string]int64, len(source))
|
||||
for key, value := range source {
|
||||
clone[key] = value
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
func incrementStringInt64Map(target *map[string]int64, key string) {
|
||||
key = strings.TrimSpace(key)
|
||||
if key == "" || target == nil {
|
||||
return
|
||||
}
|
||||
if *target == nil {
|
||||
*target = map[string]int64{}
|
||||
}
|
||||
(*target)[key]++
|
||||
}
|
||||
|
||||
func remoteWorkspacePreflightAttentionStatus(statusCounts map[string]int64, severityCounts map[string]int64) string {
|
||||
resyncCount := statusCounts["resync_required"]
|
||||
warnCount := severityCounts["warn"]
|
||||
if resyncCount > 1 || warnCount > 1 {
|
||||
return "repeated_resync_required"
|
||||
}
|
||||
if resyncCount > 0 || warnCount > 0 {
|
||||
return "needs_attention"
|
||||
}
|
||||
if statusCounts["ready_to_resume"] > 0 || statusCounts["caught_up"] > 0 || severityCounts["ok"] > 0 || severityCounts["info"] > 0 {
|
||||
return "clean"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func remoteWorkspacePreflightAttentionReason(status string, statusCounts map[string]int64, severityCounts map[string]int64) string {
|
||||
switch status {
|
||||
case "repeated_resync_required":
|
||||
return "resync_required_preflight_repeated"
|
||||
case "needs_attention":
|
||||
if statusCounts["resync_required"] > 0 {
|
||||
return "resync_required_preflight_observed"
|
||||
}
|
||||
if severityCounts["warn"] > 0 {
|
||||
return "warn_preflight_observed"
|
||||
}
|
||||
return "attention_preflight_observed"
|
||||
case "clean":
|
||||
return "no_resync_required_preflight_observed"
|
||||
default:
|
||||
return "no_preflight_observed"
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspacePreflightRemediationChecklist(operatorStatus string, actionHints []string) []map[string]any {
|
||||
hints := map[string]bool{}
|
||||
for _, hint := range actionHints {
|
||||
hints[hint] = true
|
||||
}
|
||||
if operatorStatus == "resync_required" {
|
||||
return []map[string]any{
|
||||
{
|
||||
"step": "reset_consumer_cursor",
|
||||
"required": true,
|
||||
"satisfied": false,
|
||||
"source_hint": hints["reset_consumer_cursor"],
|
||||
},
|
||||
{
|
||||
"step": "request_full_adapter_resync",
|
||||
"required": true,
|
||||
"satisfied": false,
|
||||
"source_hint": hints["request_full_adapter_resync"],
|
||||
},
|
||||
{
|
||||
"step": "resume_from_checkpoint_after_resync",
|
||||
"required": true,
|
||||
"satisfied": false,
|
||||
"source_hint": hints["resume_from_checkpoint_after_resync"],
|
||||
},
|
||||
}
|
||||
}
|
||||
if operatorStatus == "ready_to_resume" {
|
||||
return []map[string]any{{
|
||||
"step": "resume_from_requested_cursor",
|
||||
"required": true,
|
||||
"satisfied": true,
|
||||
"source_hint": hints["resume_from_requested_cursor"],
|
||||
}}
|
||||
}
|
||||
return []map[string]any{{
|
||||
"step": "wait_for_new_mailbox_events",
|
||||
"required": true,
|
||||
"satisfied": false,
|
||||
"source_hint": hints["long_poll_after_sequence"] || hints["keep_consumer_cursor"],
|
||||
}}
|
||||
}
|
||||
|
||||
func remoteWorkspacePreflightRemediationChecklistSummary(checklist []map[string]any) map[string]any {
|
||||
total := len(checklist)
|
||||
required := 0
|
||||
satisfied := 0
|
||||
for _, item := range checklist {
|
||||
itemRequired, _ := item["required"].(bool)
|
||||
itemSatisfied, _ := item["satisfied"].(bool)
|
||||
if itemRequired {
|
||||
required++
|
||||
if itemSatisfied {
|
||||
satisfied++
|
||||
}
|
||||
}
|
||||
}
|
||||
pending := required - satisfied
|
||||
if pending < 0 {
|
||||
pending = 0
|
||||
}
|
||||
status := "not_required"
|
||||
if required > 0 && pending == 0 {
|
||||
status = "ready"
|
||||
} else if pending > 0 {
|
||||
status = "action_required"
|
||||
}
|
||||
return map[string]any{
|
||||
"status": status,
|
||||
"total_count": total,
|
||||
"required_count": required,
|
||||
"satisfied_count": satisfied,
|
||||
"pending_count": pending,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RemoteWorkspaceFrameProbeSink) evictOldestMailboxConsumerLocked(session *remoteWorkspaceAdapterProbeSession) bool {
|
||||
@@ -1256,25 +1626,50 @@ func countMailboxConsumersLocked(sessions map[string]*remoteWorkspaceAdapterProb
|
||||
|
||||
func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSink, session *remoteWorkspaceAdapterProbeSession, now time.Time) map[string]any {
|
||||
readiness := map[string]any{
|
||||
"schema_version": "rap.remote_workspace_adapter_runtime_readiness.v1",
|
||||
"adapter_runtime_id": RemoteWorkspaceFrameProbeSinkRuntimeID,
|
||||
"observed_at": now.UTC().Format(time.RFC3339Nano),
|
||||
"probe_only": true,
|
||||
"payload_traffic": "none",
|
||||
"status": "idle",
|
||||
"diagnostic_state": "waiting_for_session",
|
||||
"ready": false,
|
||||
"active_session_count": len(s.sessions),
|
||||
"terminal_session_count": len(s.terminalSessions),
|
||||
"mailbox_capacity": DefaultRemoteWorkspaceAdapterMailboxCapacity,
|
||||
"consumer_capacity": DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity,
|
||||
"mailbox_read_total": s.mailboxReadTotal,
|
||||
"mailbox_resume_total": s.mailboxResumeReadTotal,
|
||||
"schema_version": "rap.remote_workspace_adapter_runtime_readiness.v1",
|
||||
"adapter_runtime_id": RemoteWorkspaceFrameProbeSinkRuntimeID,
|
||||
"observed_at": now.UTC().Format(time.RFC3339Nano),
|
||||
"probe_only": true,
|
||||
"payload_traffic": "none",
|
||||
"status": "idle",
|
||||
"diagnostic_state": "waiting_for_session",
|
||||
"ready": false,
|
||||
"active_session_count": len(s.sessions),
|
||||
"terminal_session_count": len(s.terminalSessions),
|
||||
"mailbox_capacity": DefaultRemoteWorkspaceAdapterMailboxCapacity,
|
||||
"consumer_capacity": DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity,
|
||||
"mailbox_read_total": s.mailboxReadTotal,
|
||||
"mailbox_resume_total": s.mailboxResumeReadTotal,
|
||||
"mailbox_preflight_total": s.mailboxPreflightTotal,
|
||||
}
|
||||
if session == nil {
|
||||
if s.sequence == 0 {
|
||||
readiness["no_session_summary"] = map[string]any{
|
||||
"schema_version": "rap.remote_workspace_adapter_no_session_summary.v1",
|
||||
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
|
||||
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
|
||||
"status": "idle",
|
||||
"diagnostic_state": "waiting_for_session",
|
||||
"active_session_count": len(s.sessions),
|
||||
"terminal_session_count": len(s.terminalSessions),
|
||||
}
|
||||
}
|
||||
if s.sequence > 0 {
|
||||
readiness["last_adapter_session_id"] = s.last.AdapterSessionID
|
||||
readiness["last_session_state"] = s.last.SessionState
|
||||
lastSessionState := s.last.SessionState
|
||||
if terminal, ok := s.terminalSessions[s.last.AdapterSessionID]; ok {
|
||||
lastSessionState = terminal.State
|
||||
readiness["terminal_session_summary"] = map[string]any{
|
||||
"schema_version": "rap.remote_workspace_adapter_terminal_session_summary.v1",
|
||||
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
|
||||
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
|
||||
"adapter_session_id": s.last.AdapterSessionID,
|
||||
"session_state": terminal.State,
|
||||
"reason": terminal.Reason,
|
||||
"controlled_at": terminal.ControlledAt.Format(time.RFC3339Nano),
|
||||
}
|
||||
}
|
||||
readiness["last_session_state"] = lastSessionState
|
||||
readiness["diagnostic_state"] = "last_session_terminal_or_expired"
|
||||
}
|
||||
return readiness
|
||||
@@ -1299,6 +1694,13 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
|
||||
readiness["mailbox_enqueued_total"] = session.MailboxEnqueued
|
||||
readiness["mailbox_read_total"] = session.MailboxRead
|
||||
readiness["mailbox_resume_read_total"] = session.MailboxResumeRead
|
||||
readiness["mailbox_preflight_total"] = session.MailboxPreflightTotal
|
||||
readiness["mailbox_preflight_operator_status_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts)
|
||||
readiness["mailbox_preflight_operator_severity_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts)
|
||||
preflightAttentionStatus := remoteWorkspacePreflightAttentionStatus(session.MailboxPreflightOperatorStatusCounts, session.MailboxPreflightOperatorSeverityCounts)
|
||||
preflightAttentionReason := remoteWorkspacePreflightAttentionReason(preflightAttentionStatus, session.MailboxPreflightOperatorStatusCounts, session.MailboxPreflightOperatorSeverityCounts)
|
||||
readiness["preflight_attention_status"] = preflightAttentionStatus
|
||||
readiness["preflight_attention_reason"] = preflightAttentionReason
|
||||
readiness["mailbox_after_sequence_read_total"] = session.MailboxAfterSequenceRead
|
||||
readiness["mailbox_returned_total"] = session.MailboxReturnedTotal
|
||||
readiness["mailbox_skipped_total"] = session.MailboxSkippedTotal
|
||||
@@ -1315,6 +1717,66 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
|
||||
readiness["last_after_sequence"] = session.LastMailboxAfterSequence
|
||||
readiness["last_returned_count"] = session.LastMailboxReturnedCount
|
||||
readiness["last_skipped_count"] = session.LastMailboxSkippedCount
|
||||
readiness["last_preflight_consumer_id"] = session.LastMailboxPreflightConsumerID
|
||||
readiness["last_preflight_resume_from"] = session.LastMailboxPreflightResumeFrom
|
||||
readiness["last_preflight_resume_sequence"] = session.LastMailboxPreflightResumeSequence
|
||||
readiness["last_preflight_available_count"] = session.LastMailboxPreflightAvailableCount
|
||||
readiness["last_preflight_returned_count"] = session.LastMailboxPreflightReturnedCount
|
||||
readiness["last_preflight_skipped_count"] = session.LastMailboxPreflightSkippedCount
|
||||
readiness["last_preflight_diagnostic_state"] = session.LastMailboxPreflightDiagnosticState
|
||||
readiness["last_preflight_stale_cursor"] = session.LastMailboxPreflightStaleCursor
|
||||
readiness["last_preflight_missing_dropped_count"] = session.LastMailboxPreflightMissingDropped
|
||||
readiness["last_preflight_recommended_action"] = session.LastMailboxPreflightRecommendedAction
|
||||
readiness["last_preflight_action_hints"] = append([]string(nil), session.LastMailboxPreflightActionHints...)
|
||||
readiness["last_preflight_action_reason"] = session.LastMailboxPreflightActionReason
|
||||
readiness["last_preflight_action_context"] = cloneStringAnyMap(session.LastMailboxPreflightActionContext)
|
||||
readiness["last_preflight_operator_summary"] = session.LastMailboxPreflightOperatorSummary
|
||||
readiness["last_preflight_operator_status"] = session.LastMailboxPreflightOperatorStatus
|
||||
readiness["last_preflight_operator_severity"] = session.LastMailboxPreflightOperatorSeverity
|
||||
readiness["last_preflight_operator_summary_fields"] = cloneStringAnyMap(session.LastMailboxPreflightOperatorFields)
|
||||
if session.MailboxPreflightTotal > 0 {
|
||||
remediationChecklist := remoteWorkspacePreflightRemediationChecklist(session.LastMailboxPreflightOperatorStatus, session.LastMailboxPreflightActionHints)
|
||||
remediationChecklistSummary := remoteWorkspacePreflightRemediationChecklistSummary(remediationChecklist)
|
||||
readiness["last_preflight"] = map[string]any{
|
||||
"diagnostics_schema_version": "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1",
|
||||
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
|
||||
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
|
||||
"observed_at": session.LastMailboxPreflightAt.Format(time.RFC3339Nano),
|
||||
"consumer_id": session.LastMailboxPreflightConsumerID,
|
||||
"resume_from": session.LastMailboxPreflightResumeFrom,
|
||||
"resume_sequence": session.LastMailboxPreflightResumeSequence,
|
||||
"after_sequence": session.LastMailboxPreflightAfterSequence,
|
||||
"available_count": session.LastMailboxPreflightAvailableCount,
|
||||
"returned_count": session.LastMailboxPreflightReturnedCount,
|
||||
"skipped_count": session.LastMailboxPreflightSkippedCount,
|
||||
"first_sequence": session.LastMailboxPreflightFirstSequence,
|
||||
"last_sequence": session.LastMailboxPreflightLastSequence,
|
||||
"first_retained_sequence": session.LastMailboxPreflightFirstRetained,
|
||||
"last_retained_sequence": session.LastMailboxPreflightLastRetained,
|
||||
"mailbox_dropped_total": session.LastMailboxPreflightMailboxDropped,
|
||||
"diagnostic_state": session.LastMailboxPreflightDiagnosticState,
|
||||
"stale_cursor": session.LastMailboxPreflightStaleCursor,
|
||||
"missing_dropped_count": session.LastMailboxPreflightMissingDropped,
|
||||
"recommended_action": session.LastMailboxPreflightRecommendedAction,
|
||||
"action_hints": append([]string(nil), session.LastMailboxPreflightActionHints...),
|
||||
"action_reason": session.LastMailboxPreflightActionReason,
|
||||
"action_context": cloneStringAnyMap(session.LastMailboxPreflightActionContext),
|
||||
"remediation_checklist": remediationChecklist,
|
||||
"remediation_checklist_status": remediationChecklistSummary["status"],
|
||||
"remediation_checklist_counts": remediationChecklistSummary,
|
||||
"operator_summary": session.LastMailboxPreflightOperatorSummary,
|
||||
"operator_status": session.LastMailboxPreflightOperatorStatus,
|
||||
"operator_severity": session.LastMailboxPreflightOperatorSeverity,
|
||||
"operator_summary_fields": cloneStringAnyMap(session.LastMailboxPreflightOperatorFields),
|
||||
"mailbox_preflight_total": session.MailboxPreflightTotal,
|
||||
"mailbox_preflight_ack_total": session.MailboxPreflightAckTotal,
|
||||
"mailbox_preflight_checkpoint_total": session.MailboxPreflightCheckpointTotal,
|
||||
"preflight_attention_status": preflightAttentionStatus,
|
||||
"preflight_attention_reason": preflightAttentionReason,
|
||||
"operator_status_counts": cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts),
|
||||
"operator_severity_counts": cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts),
|
||||
}
|
||||
}
|
||||
if !session.LastActivityAt.IsZero() {
|
||||
readiness["last_activity_at"] = session.LastActivityAt.Format(time.RFC3339Nano)
|
||||
}
|
||||
@@ -1327,6 +1789,9 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
|
||||
if !session.LastMailboxConsumerAckAt.IsZero() {
|
||||
readiness["last_consumer_ack_at"] = session.LastMailboxConsumerAckAt.Format(time.RFC3339Nano)
|
||||
}
|
||||
if !session.LastMailboxPreflightAt.IsZero() {
|
||||
readiness["last_preflight_at"] = session.LastMailboxPreflightAt.Format(time.RFC3339Nano)
|
||||
}
|
||||
return readiness
|
||||
}
|
||||
|
||||
@@ -1445,6 +1910,9 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
|
||||
report["mailbox_after_sequence_read_total"] = s.mailboxAfterSequenceReadTotal
|
||||
report["mailbox_returned_total"] = s.mailboxReturnedTotal
|
||||
report["mailbox_skipped_total"] = s.mailboxSkippedTotal
|
||||
report["mailbox_preflight_total"] = s.mailboxPreflightTotal
|
||||
report["mailbox_preflight_ack_total"] = s.mailboxPreflightAckTotal
|
||||
report["mailbox_preflight_checkpoint_total"] = s.mailboxPreflightCheckpointTotal
|
||||
report["mailbox_consumer_capacity"] = DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity
|
||||
report["mailbox_consumer_count"] = countMailboxConsumersLocked(s.sessions)
|
||||
report["mailbox_consumer_read_total"] = s.mailboxConsumerReadTotal
|
||||
@@ -1467,6 +1935,30 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
|
||||
report["last_mailbox_resume_sequence"] = s.lastMailboxResumeSequence
|
||||
report["last_mailbox_resume_consumer_id"] = s.lastMailboxResumeConsumerID
|
||||
}
|
||||
if s.mailboxPreflightTotal > 0 {
|
||||
report["last_mailbox_preflight_at"] = s.lastMailboxPreflightAt
|
||||
report["last_mailbox_preflight_adapter_session_id"] = s.lastMailboxPreflightAdapterSessionID
|
||||
report["last_mailbox_preflight_consumer_id"] = s.lastMailboxPreflightConsumerID
|
||||
report["last_mailbox_preflight_resume_from"] = s.lastMailboxPreflightResumeFrom
|
||||
report["last_mailbox_preflight_resume_sequence"] = s.lastMailboxPreflightResumeSequence
|
||||
report["last_mailbox_preflight_after_sequence"] = s.lastMailboxPreflightAfterSequence
|
||||
report["last_mailbox_preflight_available_count"] = s.lastMailboxPreflightAvailableCount
|
||||
report["last_mailbox_preflight_returned_count"] = s.lastMailboxPreflightReturnedCount
|
||||
report["last_mailbox_preflight_skipped_count"] = s.lastMailboxPreflightSkippedCount
|
||||
report["last_mailbox_preflight_first_sequence"] = s.lastMailboxPreflightFirstSequence
|
||||
report["last_mailbox_preflight_last_sequence"] = s.lastMailboxPreflightLastSequence
|
||||
report["last_mailbox_preflight_diagnostic_state"] = s.lastMailboxPreflightDiagnosticState
|
||||
report["last_mailbox_preflight_stale_cursor"] = s.lastMailboxPreflightStaleCursor
|
||||
report["last_mailbox_preflight_missing_dropped_count"] = s.lastMailboxPreflightMissingDropped
|
||||
report["last_mailbox_preflight_recommended_action"] = s.lastMailboxPreflightRecommendedAction
|
||||
report["last_mailbox_preflight_action_hints"] = append([]string(nil), s.lastMailboxPreflightActionHints...)
|
||||
report["last_mailbox_preflight_action_reason"] = s.lastMailboxPreflightActionReason
|
||||
report["last_mailbox_preflight_action_context"] = cloneStringAnyMap(s.lastMailboxPreflightActionContext)
|
||||
report["last_mailbox_preflight_operator_summary"] = s.lastMailboxPreflightOperatorSummary
|
||||
report["last_mailbox_preflight_operator_status"] = s.lastMailboxPreflightOperatorStatus
|
||||
report["last_mailbox_preflight_operator_severity"] = s.lastMailboxPreflightOperatorSeverity
|
||||
report["last_mailbox_preflight_operator_summary_fields"] = cloneStringAnyMap(s.lastMailboxPreflightOperatorFields)
|
||||
}
|
||||
if s.mailboxConsumerReadTotal > 0 {
|
||||
report["last_mailbox_consumer_id"] = s.lastMailboxConsumerID
|
||||
report["last_mailbox_consumer_read_at"] = s.lastMailboxConsumerReadAt
|
||||
@@ -1520,6 +2012,11 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
|
||||
report["current_session_mailbox_after_sequence_read_total"] = session.MailboxAfterSequenceRead
|
||||
report["current_session_mailbox_returned_total"] = session.MailboxReturnedTotal
|
||||
report["current_session_mailbox_skipped_total"] = session.MailboxSkippedTotal
|
||||
report["current_session_mailbox_preflight_total"] = session.MailboxPreflightTotal
|
||||
report["current_session_mailbox_preflight_ack_total"] = session.MailboxPreflightAckTotal
|
||||
report["current_session_mailbox_preflight_checkpoint_total"] = session.MailboxPreflightCheckpointTotal
|
||||
report["current_session_mailbox_preflight_operator_status_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts)
|
||||
report["current_session_mailbox_preflight_operator_severity_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts)
|
||||
report["current_session_mailbox_consumer_count"] = len(session.MailboxConsumers)
|
||||
report["current_session_mailbox_consumer_read_total"] = session.MailboxConsumerReadTotal
|
||||
report["current_session_mailbox_consumer_ack_total"] = session.MailboxConsumerAckTotal
|
||||
@@ -1549,6 +2046,29 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
|
||||
report["current_session_last_mailbox_resume_sequence"] = session.LastMailboxResumeSequence
|
||||
report["current_session_last_mailbox_resume_consumer_id"] = session.LastMailboxResumeConsumerID
|
||||
}
|
||||
if session.MailboxPreflightTotal > 0 {
|
||||
report["current_session_last_mailbox_preflight_at"] = session.LastMailboxPreflightAt.Format(time.RFC3339Nano)
|
||||
report["current_session_last_mailbox_preflight_consumer_id"] = session.LastMailboxPreflightConsumerID
|
||||
report["current_session_last_mailbox_preflight_resume_from"] = session.LastMailboxPreflightResumeFrom
|
||||
report["current_session_last_mailbox_preflight_resume_sequence"] = session.LastMailboxPreflightResumeSequence
|
||||
report["current_session_last_mailbox_preflight_after_sequence"] = session.LastMailboxPreflightAfterSequence
|
||||
report["current_session_last_mailbox_preflight_available_count"] = session.LastMailboxPreflightAvailableCount
|
||||
report["current_session_last_mailbox_preflight_returned_count"] = session.LastMailboxPreflightReturnedCount
|
||||
report["current_session_last_mailbox_preflight_skipped_count"] = session.LastMailboxPreflightSkippedCount
|
||||
report["current_session_last_mailbox_preflight_first_sequence"] = session.LastMailboxPreflightFirstSequence
|
||||
report["current_session_last_mailbox_preflight_last_sequence"] = session.LastMailboxPreflightLastSequence
|
||||
report["current_session_last_mailbox_preflight_diagnostic_state"] = session.LastMailboxPreflightDiagnosticState
|
||||
report["current_session_last_mailbox_preflight_stale_cursor"] = session.LastMailboxPreflightStaleCursor
|
||||
report["current_session_last_mailbox_preflight_missing_dropped_count"] = session.LastMailboxPreflightMissingDropped
|
||||
report["current_session_last_mailbox_preflight_recommended_action"] = session.LastMailboxPreflightRecommendedAction
|
||||
report["current_session_last_mailbox_preflight_action_hints"] = append([]string(nil), session.LastMailboxPreflightActionHints...)
|
||||
report["current_session_last_mailbox_preflight_action_reason"] = session.LastMailboxPreflightActionReason
|
||||
report["current_session_last_mailbox_preflight_action_context"] = cloneStringAnyMap(session.LastMailboxPreflightActionContext)
|
||||
report["current_session_last_mailbox_preflight_operator_summary"] = session.LastMailboxPreflightOperatorSummary
|
||||
report["current_session_last_mailbox_preflight_operator_status"] = session.LastMailboxPreflightOperatorStatus
|
||||
report["current_session_last_mailbox_preflight_operator_severity"] = session.LastMailboxPreflightOperatorSeverity
|
||||
report["current_session_last_mailbox_preflight_operator_summary_fields"] = cloneStringAnyMap(session.LastMailboxPreflightOperatorFields)
|
||||
}
|
||||
if !session.LastBackpressureAt.IsZero() {
|
||||
report["current_session_last_backpressure_at"] = session.LastBackpressureAt.Format(time.RFC3339Nano)
|
||||
report["current_session_last_backpressure_reason"] = session.LastReason
|
||||
|
||||
@@ -1643,6 +1643,44 @@ func TestRemoteWorkspaceAdapterSessionControlEndpointClosesSession(t *testing.T)
|
||||
report["last_session_control_state"] != "closed" {
|
||||
t.Fatalf("control report = %+v", report)
|
||||
}
|
||||
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from control report = %+v", report)
|
||||
}
|
||||
if readiness["schema_version"] != "rap.remote_workspace_adapter_runtime_readiness.v1" ||
|
||||
readiness["status"] != "idle" ||
|
||||
readiness["diagnostic_state"] != "last_session_terminal_or_expired" ||
|
||||
readiness["ready"] != false ||
|
||||
readiness["active_session_count"] != 0 ||
|
||||
readiness["last_adapter_session_id"] != "rap-rw-adapter-session-aaaaaaaaaaaaaaaaaaaaaaaa" ||
|
||||
readiness["last_session_state"] != "closed" {
|
||||
t.Fatalf("invalid no-active-session readiness after close = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["adapter_session_id"]; ok {
|
||||
t.Fatalf("adapter_session_id should be absent without active session = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["last_preflight"]; ok {
|
||||
t.Fatalf("last_preflight should be absent without active session = %+v", readiness)
|
||||
}
|
||||
terminalSummary, ok := readiness["terminal_session_summary"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("terminal session summary missing after close = %+v", readiness)
|
||||
}
|
||||
if terminalSummary["adapter_session_id"] != "rap-rw-adapter-session-aaaaaaaaaaaaaaaaaaaaaaaa" ||
|
||||
terminalSummary["schema_version"] != "rap.remote_workspace_adapter_terminal_session_summary.v1" ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "adapter_session_id") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "session_state") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "reason") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "controlled_at") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "adapter_session_id") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "session_state") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "reason") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "controlled_at") ||
|
||||
terminalSummary["session_state"] != "closed" ||
|
||||
terminalSummary["reason"] != "unit test close" ||
|
||||
terminalSummary["controlled_at"] == "" {
|
||||
t.Fatalf("invalid terminal session summary after close = %+v", terminalSummary)
|
||||
}
|
||||
|
||||
resp, err = http.Post(controlURL, "application/json", bytes.NewReader([]byte(`{"action":"close","reason":"repeat close"}`)))
|
||||
if err != nil {
|
||||
@@ -1665,6 +1703,255 @@ func TestRemoteWorkspaceAdapterSessionControlEndpointClosesSession(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterReadinessBeforeAnySessionHasNoTerminalSummary(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
report := sink.Report(time.Now().UTC())
|
||||
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
|
||||
}
|
||||
if readiness["schema_version"] != "rap.remote_workspace_adapter_runtime_readiness.v1" ||
|
||||
readiness["status"] != "idle" ||
|
||||
readiness["diagnostic_state"] != "waiting_for_session" ||
|
||||
readiness["ready"] != false ||
|
||||
readiness["active_session_count"] != 0 ||
|
||||
readiness["terminal_session_count"] != 0 {
|
||||
t.Fatalf("invalid empty readiness = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["last_adapter_session_id"]; ok {
|
||||
t.Fatalf("last_adapter_session_id should be absent before any session = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["last_session_state"]; ok {
|
||||
t.Fatalf("last_session_state should be absent before any session = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["terminal_session_summary"]; ok {
|
||||
t.Fatalf("terminal_session_summary should be absent before terminal history = %+v", readiness)
|
||||
}
|
||||
noSessionSummary, ok := readiness["no_session_summary"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("no_session_summary should be present before any session = %+v", readiness)
|
||||
}
|
||||
if noSessionSummary["schema_version"] != "rap.remote_workspace_adapter_no_session_summary.v1" ||
|
||||
!stringAnySliceContains(noSessionSummary["summary_contract"], "status") ||
|
||||
!stringAnySliceContains(noSessionSummary["summary_contract"], "diagnostic_state") ||
|
||||
!stringAnySliceContains(noSessionSummary["summary_contract"], "active_session_count") ||
|
||||
!stringAnySliceContains(noSessionSummary["summary_contract"], "terminal_session_count") ||
|
||||
!boolMapValue(noSessionSummary["summary_features"], "status") ||
|
||||
!boolMapValue(noSessionSummary["summary_features"], "diagnostic_state") ||
|
||||
!boolMapValue(noSessionSummary["summary_features"], "active_session_count") ||
|
||||
!boolMapValue(noSessionSummary["summary_features"], "terminal_session_count") ||
|
||||
noSessionSummary["status"] != "idle" ||
|
||||
noSessionSummary["diagnostic_state"] != "waiting_for_session" ||
|
||||
noSessionSummary["active_session_count"] != 0 ||
|
||||
noSessionSummary["terminal_session_count"] != 0 {
|
||||
t.Fatalf("invalid no-session summary before any session = %+v", noSessionSummary)
|
||||
}
|
||||
if _, ok := readiness["last_preflight"]; ok {
|
||||
t.Fatalf("last_preflight should be absent before any session = %+v", readiness)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterReadinessSummaryExclusivity(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
freshReport := sink.Report(time.Now().UTC())
|
||||
freshReadiness, ok := freshReport["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("fresh readiness missing from report = %+v", freshReport)
|
||||
}
|
||||
if _, ok := freshReadiness["no_session_summary"]; !ok {
|
||||
t.Fatalf("fresh readiness should include no_session_summary = %+v", freshReadiness)
|
||||
}
|
||||
if _, ok := freshReadiness["terminal_session_summary"]; ok {
|
||||
t.Fatalf("fresh readiness should not include terminal_session_summary = %+v", freshReadiness)
|
||||
}
|
||||
|
||||
sessionID := "rap-rw-adapter-session-d1d1d1d1d1d1d1d1d1d1d1d1"
|
||||
delivery := RemoteWorkspaceFrameBatchDelivery{
|
||||
ClusterID: "cluster-1",
|
||||
ChannelID: "channel-rw",
|
||||
ResourceID: "workspace-exclusivity",
|
||||
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||
ChannelClass: FabricServiceChannelInteractive,
|
||||
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
|
||||
AdapterSessionID: sessionID,
|
||||
Frames: []RemoteWorkspaceFrameProbeRecord{{
|
||||
Channel: "display",
|
||||
Direction: "adapter_to_client",
|
||||
Droppable: true,
|
||||
}},
|
||||
}
|
||||
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
|
||||
t.Fatalf("accept frame batch: %v", err)
|
||||
}
|
||||
activeReport := sink.Report(time.Now().UTC())
|
||||
activeReadiness, ok := activeReport["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("active readiness missing from report = %+v", activeReport)
|
||||
}
|
||||
if activeReadiness["adapter_session_id"] != sessionID ||
|
||||
activeReadiness["active_session_count"] != 1 {
|
||||
t.Fatalf("invalid active readiness = %+v", activeReadiness)
|
||||
}
|
||||
if _, ok := activeReadiness["no_session_summary"]; ok {
|
||||
t.Fatalf("active readiness should not include no_session_summary = %+v", activeReadiness)
|
||||
}
|
||||
if _, ok := activeReadiness["terminal_session_summary"]; ok {
|
||||
t.Fatalf("active readiness should not include terminal_session_summary = %+v", activeReadiness)
|
||||
}
|
||||
|
||||
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
|
||||
defer server.Close()
|
||||
body := bytes.NewReader([]byte(`{"action":"close","reason":"unit summary exclusivity close"}`))
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/remote-workspace/adapter-sessions/"+sessionID+"/control", "application/json", body)
|
||||
if err != nil {
|
||||
t.Fatalf("post control: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("status = %d body=%s", resp.StatusCode, string(raw))
|
||||
}
|
||||
terminalReport := sink.Report(time.Now().UTC())
|
||||
terminalReadiness, ok := terminalReport["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("terminal readiness missing from report = %+v", terminalReport)
|
||||
}
|
||||
if _, ok := terminalReadiness["terminal_session_summary"]; !ok {
|
||||
t.Fatalf("terminal readiness should include terminal_session_summary = %+v", terminalReadiness)
|
||||
}
|
||||
if _, ok := terminalReadiness["no_session_summary"]; ok {
|
||||
t.Fatalf("terminal readiness should not include no_session_summary = %+v", terminalReadiness)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterSessionControlTerminalReadinessStates(t *testing.T) {
|
||||
tests := []struct {
|
||||
action string
|
||||
sessionID string
|
||||
wantState string
|
||||
wantClosed int64
|
||||
wantExpired int64
|
||||
wantReset int64
|
||||
wantPrevState string
|
||||
}{
|
||||
{
|
||||
action: "expire",
|
||||
sessionID: "rap-rw-adapter-session-b0b0b0b0b0b0b0b0b0b0b0b0",
|
||||
wantState: "expired",
|
||||
wantClosed: 1,
|
||||
wantExpired: 1,
|
||||
wantPrevState: "probe_bound",
|
||||
},
|
||||
{
|
||||
action: "reset",
|
||||
sessionID: "rap-rw-adapter-session-c0c0c0c0c0c0c0c0c0c0c0c0",
|
||||
wantState: "reset",
|
||||
wantClosed: 1,
|
||||
wantReset: 1,
|
||||
wantPrevState: "probe_bound",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.action, func(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
delivery := RemoteWorkspaceFrameBatchDelivery{
|
||||
ClusterID: "cluster-1",
|
||||
ChannelID: "channel-rw",
|
||||
ResourceID: "workspace-" + tt.action,
|
||||
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||
ChannelClass: FabricServiceChannelInteractive,
|
||||
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
|
||||
AdapterSessionID: tt.sessionID,
|
||||
Frames: []RemoteWorkspaceFrameProbeRecord{{
|
||||
Channel: "display",
|
||||
Direction: "adapter_to_client",
|
||||
Droppable: true,
|
||||
}},
|
||||
}
|
||||
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
|
||||
t.Fatalf("accept frame batch: %v", err)
|
||||
}
|
||||
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
body := bytes.NewReader([]byte(fmt.Sprintf(`{"action":%q,"reason":"unit terminal readiness"}`, tt.action)))
|
||||
resp, err := http.Post(server.URL+"/mesh/v1/remote-workspace/adapter-sessions/"+tt.sessionID+"/control", "application/json", body)
|
||||
if err != nil {
|
||||
t.Fatalf("post control: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("status = %d body=%s", resp.StatusCode, string(raw))
|
||||
}
|
||||
var result RemoteWorkspaceAdapterSessionControlResult
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("decode control result: %v", err)
|
||||
}
|
||||
if !result.Accepted ||
|
||||
result.Action != tt.action ||
|
||||
result.AdapterSessionID != tt.sessionID ||
|
||||
result.PreviousState != tt.wantPrevState ||
|
||||
result.SessionState != tt.wantState ||
|
||||
result.ActiveSessions != 0 {
|
||||
t.Fatalf("control result = %+v", result)
|
||||
}
|
||||
|
||||
report := sink.Report(time.Now().UTC())
|
||||
if report["active_session_count"] != 0 ||
|
||||
report["session_closed_total"] != tt.wantClosed ||
|
||||
report["session_expired_total"] != tt.wantExpired ||
|
||||
report["session_reset_total"] != tt.wantReset ||
|
||||
report["last_controlled_adapter_session_id"] != tt.sessionID ||
|
||||
report["last_session_control_action"] != tt.action ||
|
||||
report["last_session_control_state"] != tt.wantState {
|
||||
t.Fatalf("terminal control report = %+v", report)
|
||||
}
|
||||
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
|
||||
}
|
||||
if readiness["status"] != "idle" ||
|
||||
readiness["diagnostic_state"] != "last_session_terminal_or_expired" ||
|
||||
readiness["ready"] != false ||
|
||||
readiness["active_session_count"] != 0 ||
|
||||
readiness["last_adapter_session_id"] != tt.sessionID ||
|
||||
readiness["last_session_state"] != tt.wantState {
|
||||
t.Fatalf("invalid terminal readiness = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["adapter_session_id"]; ok {
|
||||
t.Fatalf("adapter_session_id should be absent without active session = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["last_preflight"]; ok {
|
||||
t.Fatalf("last_preflight should be absent without active session = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["no_session_summary"]; ok {
|
||||
t.Fatalf("no_session_summary should be absent for terminal session history = %+v", readiness)
|
||||
}
|
||||
terminalSummary, ok := readiness["terminal_session_summary"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("terminal session summary missing = %+v", readiness)
|
||||
}
|
||||
if terminalSummary["adapter_session_id"] != tt.sessionID ||
|
||||
terminalSummary["schema_version"] != "rap.remote_workspace_adapter_terminal_session_summary.v1" ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "adapter_session_id") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "session_state") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "reason") ||
|
||||
!stringAnySliceContains(terminalSummary["summary_contract"], "controlled_at") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "adapter_session_id") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "session_state") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "reason") ||
|
||||
!boolMapValue(terminalSummary["summary_features"], "controlled_at") ||
|
||||
terminalSummary["session_state"] != tt.wantState ||
|
||||
terminalSummary["reason"] != "unit terminal readiness" ||
|
||||
terminalSummary["controlled_at"] == "" {
|
||||
t.Fatalf("invalid terminal session summary = %+v", terminalSummary)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterSessionControlRejectsInvalidRequests(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
|
||||
@@ -3064,6 +3351,19 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightIsReadOnly(t *testing.T) {
|
||||
}
|
||||
if preflight.ResumeFrom != "checkpoint" ||
|
||||
preflight.ResumeSequence != 2 ||
|
||||
preflight.DiagnosticState != "ready" ||
|
||||
preflight.RecommendedAction != "resume_from_cursor" ||
|
||||
preflight.ActionReason != "cursor_window_available" ||
|
||||
preflight.OperatorSummary != "consumer cursor can resume from requested window" ||
|
||||
preflight.OperatorStatus != "ready_to_resume" ||
|
||||
preflight.OperatorSeverity != "ok" ||
|
||||
anyInt64(preflight.ActionContext["resume_sequence"]) != 2 ||
|
||||
anyInt64(preflight.ActionContext["first_retained_sequence"]) != 1 ||
|
||||
preflight.OperatorSummaryFields["diagnostic_state"] != "ready" ||
|
||||
preflight.OperatorSummaryFields["recommended_action"] != "resume_from_cursor" ||
|
||||
preflight.OperatorSummaryFields["operator_status"] != "ready_to_resume" ||
|
||||
preflight.OperatorSummaryFields["operator_severity"] != "ok" ||
|
||||
!stringSliceContains(preflight.ActionHints, "resume_from_requested_cursor") ||
|
||||
preflight.ExpectedAvailableCount != 1 ||
|
||||
preflight.ExpectedReturnedCount != 1 ||
|
||||
preflight.ExpectedSkippedCount != 2 ||
|
||||
@@ -3079,6 +3379,547 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightIsReadOnly(t *testing.T) {
|
||||
reportAfter["current_session_mailbox_consumer_ack_total"] != reportBefore["current_session_mailbox_consumer_ack_total"] {
|
||||
t.Fatalf("preflight mutated report before=%+v after=%+v", reportBefore, reportAfter)
|
||||
}
|
||||
if reportAfter["mailbox_preflight_total"] != int64(2) ||
|
||||
reportAfter["mailbox_preflight_ack_total"] != int64(1) ||
|
||||
reportAfter["mailbox_preflight_checkpoint_total"] != int64(1) ||
|
||||
reportAfter["last_mailbox_preflight_adapter_session_id"] != sessionID ||
|
||||
reportAfter["last_mailbox_preflight_consumer_id"] != "rdp-worker-probe" ||
|
||||
reportAfter["last_mailbox_preflight_resume_from"] != "checkpoint" ||
|
||||
reportAfter["last_mailbox_preflight_resume_sequence"] != int64(2) ||
|
||||
reportAfter["last_mailbox_preflight_available_count"] != 1 ||
|
||||
reportAfter["last_mailbox_preflight_returned_count"] != 1 ||
|
||||
reportAfter["last_mailbox_preflight_skipped_count"] != 2 ||
|
||||
reportAfter["current_session_mailbox_preflight_total"] != int64(2) ||
|
||||
reportAfter["current_session_mailbox_preflight_ack_total"] != int64(1) ||
|
||||
reportAfter["current_session_mailbox_preflight_checkpoint_total"] != int64(1) ||
|
||||
mapInt64Value(reportAfter["current_session_mailbox_preflight_operator_status_counts"], "ready_to_resume") != 2 ||
|
||||
mapInt64Value(reportAfter["current_session_mailbox_preflight_operator_severity_counts"], "ok") != 2 ||
|
||||
reportAfter["current_session_last_mailbox_preflight_resume_from"] != "checkpoint" ||
|
||||
reportAfter["current_session_last_mailbox_preflight_resume_sequence"] != int64(2) ||
|
||||
reportAfter["current_session_last_mailbox_preflight_returned_count"] != 1 ||
|
||||
reportAfter["current_session_last_mailbox_preflight_recommended_action"] != "resume_from_cursor" ||
|
||||
reportAfter["last_mailbox_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
|
||||
reportAfter["last_mailbox_preflight_operator_status"] != "ready_to_resume" ||
|
||||
reportAfter["last_mailbox_preflight_operator_severity"] != "ok" ||
|
||||
reportAfter["current_session_last_mailbox_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
|
||||
reportAfter["current_session_last_mailbox_preflight_operator_status"] != "ready_to_resume" ||
|
||||
reportAfter["current_session_last_mailbox_preflight_operator_severity"] != "ok" {
|
||||
t.Fatalf("invalid preflight telemetry report = %+v", reportAfter)
|
||||
}
|
||||
readiness, ok := reportAfter["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from report = %+v", reportAfter)
|
||||
}
|
||||
if readiness["mailbox_preflight_total"] != int64(2) ||
|
||||
readiness["last_preflight_consumer_id"] != "rdp-worker-probe" ||
|
||||
readiness["last_preflight_resume_from"] != "checkpoint" ||
|
||||
readiness["last_preflight_resume_sequence"] != int64(2) ||
|
||||
readiness["last_preflight_returned_count"] != 1 ||
|
||||
readiness["last_preflight_skipped_count"] != 2 ||
|
||||
readiness["last_preflight_recommended_action"] != "resume_from_cursor" ||
|
||||
readiness["last_preflight_action_reason"] != "cursor_window_available" ||
|
||||
readiness["last_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
|
||||
readiness["last_preflight_operator_status"] != "ready_to_resume" ||
|
||||
readiness["last_preflight_operator_severity"] != "ok" ||
|
||||
mapInt64Value(readiness["mailbox_preflight_operator_status_counts"], "ready_to_resume") != 2 ||
|
||||
mapInt64Value(readiness["mailbox_preflight_operator_severity_counts"], "ok") != 2 ||
|
||||
readiness["preflight_attention_status"] != "clean" ||
|
||||
readiness["preflight_attention_reason"] != "no_resync_required_preflight_observed" {
|
||||
t.Fatalf("invalid preflight readiness = %+v", readiness)
|
||||
}
|
||||
lastPreflight, ok := readiness["last_preflight"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("last preflight rollup missing from readiness = %+v", readiness)
|
||||
}
|
||||
if lastPreflight["consumer_id"] != "rdp-worker-probe" ||
|
||||
lastPreflight["diagnostics_schema_version"] != "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1" ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "retained_window") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "remediation_checklist") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "attention") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "operator_counts") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "retained_window") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "remediation_checklist") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "attention") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "operator_counts") ||
|
||||
lastPreflight["resume_from"] != "checkpoint" ||
|
||||
lastPreflight["operator_status"] != "ready_to_resume" ||
|
||||
lastPreflight["operator_severity"] != "ok" ||
|
||||
lastPreflight["recommended_action"] != "resume_from_cursor" ||
|
||||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "resume_from_requested_cursor", true, true) ||
|
||||
lastPreflight["remediation_checklist_status"] != "ready" ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "required_count")) != 1 ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "satisfied_count")) != 1 ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "pending_count")) != 0 ||
|
||||
mapInt64Value(lastPreflight["operator_status_counts"], "ready_to_resume") != 2 ||
|
||||
mapInt64Value(lastPreflight["operator_severity_counts"], "ok") != 2 ||
|
||||
lastPreflight["preflight_attention_status"] != "clean" ||
|
||||
lastPreflight["preflight_attention_reason"] != "no_resync_required_preflight_observed" ||
|
||||
anyInt64(lastPreflight["resume_sequence"]) != 2 ||
|
||||
anyInt64(lastPreflight["first_retained_sequence"]) != 1 ||
|
||||
anyInt64(lastPreflight["last_retained_sequence"]) != 3 ||
|
||||
anyInt64(lastPreflight["mailbox_dropped_total"]) != 0 ||
|
||||
anyInt64(lastPreflight["mailbox_preflight_total"]) != 2 {
|
||||
t.Fatalf("invalid last preflight rollup = %+v", lastPreflight)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterSessionReadinessBeforePreflight(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
sessionID := "rap-rw-adapter-session-a0a0a0a0a0a0a0a0a0a0a0a0"
|
||||
delivery := RemoteWorkspaceFrameBatchDelivery{
|
||||
ClusterID: "cluster-1",
|
||||
ChannelID: "channel-rw",
|
||||
ResourceID: "workspace-before-preflight",
|
||||
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||
ChannelClass: FabricServiceChannelInteractive,
|
||||
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
|
||||
AdapterSessionID: sessionID,
|
||||
Frames: []RemoteWorkspaceFrameProbeRecord{{
|
||||
Channel: "display",
|
||||
Direction: "adapter_to_client",
|
||||
Droppable: true,
|
||||
}},
|
||||
}
|
||||
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
|
||||
t.Fatalf("accept frame batch: %v", err)
|
||||
}
|
||||
|
||||
report := sink.Report(time.Now().UTC())
|
||||
if report["mailbox_preflight_total"] != int64(0) ||
|
||||
report["current_session_mailbox_preflight_total"] != int64(0) {
|
||||
t.Fatalf("unexpected preflight totals before preflight = %+v", report)
|
||||
}
|
||||
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
|
||||
}
|
||||
if readiness["adapter_session_id"] != sessionID ||
|
||||
readiness["mailbox_preflight_total"] != int64(0) ||
|
||||
readiness["preflight_attention_status"] != "unknown" ||
|
||||
readiness["preflight_attention_reason"] != "no_preflight_observed" {
|
||||
t.Fatalf("invalid no-preflight readiness = %+v", readiness)
|
||||
}
|
||||
if _, ok := readiness["last_preflight"]; ok {
|
||||
t.Fatalf("last preflight rollup should be absent before preflight = %+v", readiness["last_preflight"])
|
||||
}
|
||||
if readiness["last_preflight_diagnostic_state"] != "" ||
|
||||
readiness["last_preflight_recommended_action"] != "" ||
|
||||
len(readiness["last_preflight_action_hints"].([]string)) != 0 {
|
||||
t.Fatalf("last preflight flat fields should be empty before preflight = %+v", readiness)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterSessionMailboxPreflightReportsStaleCursorGap(t *testing.T) {
|
||||
sink := NewRemoteWorkspaceFrameProbeSink()
|
||||
sessionID := "rap-rw-adapter-session-adadadadadadadadadadadad"
|
||||
delivery := RemoteWorkspaceFrameBatchDelivery{
|
||||
ClusterID: "cluster-1",
|
||||
ChannelID: "channel-rw",
|
||||
ResourceID: "workspace-stale-0",
|
||||
ServiceClass: FabricServiceClassRemoteWorkspace,
|
||||
ChannelClass: FabricServiceChannelInteractive,
|
||||
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
|
||||
AdapterSessionID: sessionID,
|
||||
Frames: []RemoteWorkspaceFrameProbeRecord{{
|
||||
Channel: "display",
|
||||
Direction: "adapter_to_client",
|
||||
Droppable: true,
|
||||
}},
|
||||
}
|
||||
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
|
||||
t.Fatalf("accept initial frame batch: %v", err)
|
||||
}
|
||||
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox?consumer_id=rdp-worker-probe&ack_sequence=1&limit=1")
|
||||
if err != nil {
|
||||
t.Fatalf("seed ack cursor: %v", err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
for i := 1; i <= DefaultRemoteWorkspaceAdapterMailboxCapacity+2; i++ {
|
||||
delivery.ResourceID = fmt.Sprintf("workspace-stale-%d", i)
|
||||
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
|
||||
t.Fatalf("accept overflow frame batch %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
resp, err = http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox/preflight?consumer_id=rdp-worker-probe&resume_from=ack&limit=3")
|
||||
if err != nil {
|
||||
t.Fatalf("get stale preflight: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
var preflight RemoteWorkspaceAdapterMailboxPreflightSnapshot
|
||||
if err := json.NewDecoder(resp.Body).Decode(&preflight); err != nil {
|
||||
t.Fatalf("decode stale preflight: %v", err)
|
||||
}
|
||||
if preflight.ResumeFrom != "ack" ||
|
||||
preflight.ResumeSequence != 1 ||
|
||||
preflight.MailboxDepth != DefaultRemoteWorkspaceAdapterMailboxCapacity ||
|
||||
preflight.MailboxDropped != 3 ||
|
||||
preflight.ExpectedAvailableCount != DefaultRemoteWorkspaceAdapterMailboxCapacity ||
|
||||
preflight.ExpectedReturnedCount != 3 ||
|
||||
preflight.ExpectedSkippedCount != 0 ||
|
||||
preflight.FirstExpectedSequence != 4 ||
|
||||
preflight.LastExpectedSequence != 6 ||
|
||||
preflight.FirstRetainedSequence != 4 ||
|
||||
preflight.LastRetainedSequence != 19 ||
|
||||
preflight.DiagnosticState != "stale_cursor_gap" ||
|
||||
!preflight.StaleCursor ||
|
||||
preflight.MissingDroppedCount != 2 ||
|
||||
preflight.RecommendedAction != "reset_consumer_and_resync" ||
|
||||
preflight.ActionReason != "consumer_cursor_before_first_retained_sequence" ||
|
||||
preflight.OperatorSummary != "stale cursor gap: reset consumer and resync before resume" ||
|
||||
preflight.OperatorStatus != "resync_required" ||
|
||||
preflight.OperatorSeverity != "warn" ||
|
||||
anyInt64(preflight.ActionContext["resume_sequence"]) != 1 ||
|
||||
anyInt64(preflight.ActionContext["first_retained_sequence"]) != 4 ||
|
||||
anyInt64(preflight.ActionContext["missing_dropped_count"]) != 2 ||
|
||||
preflight.OperatorSummaryFields["diagnostic_state"] != "stale_cursor_gap" ||
|
||||
preflight.OperatorSummaryFields["recommended_action"] != "reset_consumer_and_resync" ||
|
||||
preflight.OperatorSummaryFields["operator_status"] != "resync_required" ||
|
||||
preflight.OperatorSummaryFields["operator_severity"] != "warn" ||
|
||||
anyInt64(preflight.OperatorSummaryFields["missing_dropped_count"]) != 2 ||
|
||||
!stringSliceContains(preflight.ActionHints, "reset_consumer_cursor") ||
|
||||
!stringSliceContains(preflight.ActionHints, "request_full_adapter_resync") ||
|
||||
!stringSliceContains(preflight.ActionHints, "resume_from_checkpoint_after_resync") {
|
||||
t.Fatalf("stale preflight = %+v", preflight)
|
||||
}
|
||||
resp, err = http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox/preflight?consumer_id=rdp-worker-probe&resume_from=ack&limit=3")
|
||||
if err != nil {
|
||||
t.Fatalf("get repeated stale preflight: %v", err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
report := sink.Report(time.Now().UTC())
|
||||
if report["last_mailbox_preflight_diagnostic_state"] != "stale_cursor_gap" ||
|
||||
report["last_mailbox_preflight_stale_cursor"] != true ||
|
||||
report["last_mailbox_preflight_missing_dropped_count"] != 2 ||
|
||||
report["last_mailbox_preflight_recommended_action"] != "reset_consumer_and_resync" ||
|
||||
report["last_mailbox_preflight_action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
|
||||
report["last_mailbox_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
|
||||
report["last_mailbox_preflight_operator_status"] != "resync_required" ||
|
||||
report["last_mailbox_preflight_operator_severity"] != "warn" ||
|
||||
report["current_session_last_mailbox_preflight_diagnostic_state"] != "stale_cursor_gap" ||
|
||||
report["current_session_last_mailbox_preflight_stale_cursor"] != true ||
|
||||
report["current_session_last_mailbox_preflight_missing_dropped_count"] != 2 ||
|
||||
report["current_session_last_mailbox_preflight_recommended_action"] != "reset_consumer_and_resync" ||
|
||||
report["current_session_last_mailbox_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
|
||||
report["current_session_last_mailbox_preflight_operator_status"] != "resync_required" ||
|
||||
report["current_session_last_mailbox_preflight_operator_severity"] != "warn" ||
|
||||
mapInt64Value(report["current_session_mailbox_preflight_operator_status_counts"], "resync_required") != 2 ||
|
||||
mapInt64Value(report["current_session_mailbox_preflight_operator_severity_counts"], "warn") != 2 {
|
||||
t.Fatalf("stale preflight report = %+v", report)
|
||||
}
|
||||
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
|
||||
}
|
||||
if readiness["last_preflight_diagnostic_state"] != "stale_cursor_gap" ||
|
||||
readiness["last_preflight_stale_cursor"] != true ||
|
||||
readiness["last_preflight_missing_dropped_count"] != 2 ||
|
||||
readiness["last_preflight_recommended_action"] != "reset_consumer_and_resync" ||
|
||||
readiness["last_preflight_action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
|
||||
readiness["last_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
|
||||
readiness["last_preflight_operator_status"] != "resync_required" ||
|
||||
readiness["last_preflight_operator_severity"] != "warn" ||
|
||||
mapInt64Value(readiness["mailbox_preflight_operator_status_counts"], "resync_required") != 2 ||
|
||||
mapInt64Value(readiness["mailbox_preflight_operator_severity_counts"], "warn") != 2 ||
|
||||
readiness["preflight_attention_status"] != "repeated_resync_required" ||
|
||||
readiness["preflight_attention_reason"] != "resync_required_preflight_repeated" {
|
||||
t.Fatalf("stale preflight readiness = %+v", readiness)
|
||||
}
|
||||
lastPreflight, ok := readiness["last_preflight"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("stale last preflight rollup missing from readiness = %+v", readiness)
|
||||
}
|
||||
if lastPreflight["diagnostic_state"] != "stale_cursor_gap" ||
|
||||
lastPreflight["diagnostics_schema_version"] != "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1" ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "retained_window") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "remediation_checklist") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "attention") ||
|
||||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "operator_counts") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "retained_window") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "remediation_checklist") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "attention") ||
|
||||
!boolMapValue(lastPreflight["diagnostics_features"], "operator_counts") ||
|
||||
lastPreflight["operator_status"] != "resync_required" ||
|
||||
lastPreflight["operator_severity"] != "warn" ||
|
||||
lastPreflight["recommended_action"] != "reset_consumer_and_resync" ||
|
||||
lastPreflight["action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
|
||||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "reset_consumer_cursor", true, false) ||
|
||||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "request_full_adapter_resync", true, false) ||
|
||||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "resume_from_checkpoint_after_resync", true, false) ||
|
||||
lastPreflight["remediation_checklist_status"] != "action_required" ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "required_count")) != 3 ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "satisfied_count")) != 0 ||
|
||||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "pending_count")) != 3 ||
|
||||
mapInt64Value(lastPreflight["operator_status_counts"], "resync_required") != 2 ||
|
||||
mapInt64Value(lastPreflight["operator_severity_counts"], "warn") != 2 ||
|
||||
lastPreflight["preflight_attention_status"] != "repeated_resync_required" ||
|
||||
lastPreflight["preflight_attention_reason"] != "resync_required_preflight_repeated" ||
|
||||
anyInt64(lastPreflight["missing_dropped_count"]) != 2 ||
|
||||
anyInt64(lastPreflight["first_retained_sequence"]) != 4 ||
|
||||
anyInt64(lastPreflight["last_retained_sequence"]) != 19 ||
|
||||
anyInt64(lastPreflight["mailbox_dropped_total"]) != 3 ||
|
||||
anyInt64(lastPreflight["resume_sequence"]) != 1 {
|
||||
t.Fatalf("invalid stale last preflight rollup = %+v", lastPreflight)
|
||||
}
|
||||
}
|
||||
|
||||
func preflightChecklistCountsValue(value any, key string) any {
|
||||
switch counts := value.(type) {
|
||||
case map[string]any:
|
||||
return counts[key]
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func mapInt64Value(value any, key string) int64 {
|
||||
switch items := value.(type) {
|
||||
case map[string]int64:
|
||||
return items[key]
|
||||
case map[string]any:
|
||||
return anyInt64(items[key])
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func boolMapValue(value any, key string) bool {
|
||||
switch items := value.(type) {
|
||||
case map[string]bool:
|
||||
return items[key]
|
||||
case map[string]any:
|
||||
item, _ := items[key].(bool)
|
||||
return item
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func preflightDiagnosticsContractCompatible(rollup map[string]any) bool {
|
||||
for _, feature := range []string{"retained_window", "remediation_checklist", "attention", "operator_counts"} {
|
||||
if !stringAnySliceContains(rollup["diagnostics_contract"], feature) || !boolMapValue(rollup["diagnostics_features"], feature) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func terminalSessionSummaryContractCompatible(summary map[string]any) bool {
|
||||
for _, feature := range []string{"adapter_session_id", "session_state", "reason", "controlled_at"} {
|
||||
if !stringAnySliceContains(summary["summary_contract"], feature) || !boolMapValue(summary["summary_features"], feature) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func noSessionSummaryContractCompatible(summary map[string]any) bool {
|
||||
for _, feature := range []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"} {
|
||||
if !stringAnySliceContains(summary["summary_contract"], feature) || !boolMapValue(summary["summary_features"], feature) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func stringAnySliceContains(value any, want string) bool {
|
||||
switch items := value.(type) {
|
||||
case []string:
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func preflightChecklistContains(value any, step string, required bool, satisfied bool) bool {
|
||||
switch items := value.(type) {
|
||||
case []map[string]any:
|
||||
for _, item := range items {
|
||||
if item["step"] == step && item["required"] == required && item["satisfied"] == satisfied && item["source_hint"] == true {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, raw := range items {
|
||||
item, ok := raw.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if item["step"] == step && item["required"] == required && item["satisfied"] == satisfied && item["source_hint"] == true {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func stringSliceContains(items []string, want string) bool {
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func anyInt64(value any) int64 {
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return int64(v)
|
||||
case int64:
|
||||
return v
|
||||
case float64:
|
||||
return int64(v)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspacePreflightDiagnosticsContractCompatibility(t *testing.T) {
|
||||
compatible := map[string]any{
|
||||
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
|
||||
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
|
||||
}
|
||||
if !preflightDiagnosticsContractCompatible(compatible) {
|
||||
t.Fatalf("expected contract/features to be compatible")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
rollup map[string]any
|
||||
}{
|
||||
{
|
||||
name: "missing contract item",
|
||||
rollup: map[string]any{
|
||||
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention"},
|
||||
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing feature flag",
|
||||
rollup: map[string]any{
|
||||
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
|
||||
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "false feature flag",
|
||||
rollup: map[string]any{
|
||||
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
|
||||
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": false},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if preflightDiagnosticsContractCompatible(tt.rollup) {
|
||||
t.Fatalf("expected incompatible contract/features for %+v", tt.rollup)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceTerminalSessionSummaryContractCompatibility(t *testing.T) {
|
||||
compatible := map[string]any{
|
||||
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
|
||||
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
|
||||
}
|
||||
if !terminalSessionSummaryContractCompatible(compatible) {
|
||||
t.Fatalf("expected summary contract/features to be compatible")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
summary map[string]any
|
||||
}{
|
||||
{
|
||||
name: "missing contract item",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"adapter_session_id", "session_state", "reason"},
|
||||
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing feature flag",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
|
||||
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "false feature flag",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
|
||||
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": false},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if terminalSessionSummaryContractCompatible(tt.summary) {
|
||||
t.Fatalf("expected incompatible summary contract/features for %+v", tt.summary)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceNoSessionSummaryContractCompatibility(t *testing.T) {
|
||||
compatible := map[string]any{
|
||||
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
|
||||
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
|
||||
}
|
||||
if !noSessionSummaryContractCompatible(compatible) {
|
||||
t.Fatalf("expected no-session summary contract/features to be compatible")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
summary map[string]any
|
||||
}{
|
||||
{
|
||||
name: "missing contract item",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"status", "diagnostic_state", "active_session_count"},
|
||||
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing feature flag",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
|
||||
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "false feature flag",
|
||||
summary: map[string]any{
|
||||
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
|
||||
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": false},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if noSessionSummaryContractCompatible(tt.summary) {
|
||||
t.Fatalf("expected incompatible no-session summary contract/features for %+v", tt.summary)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspaceAdapterSessionMailboxPreflightRejectsInvalidRequests(t *testing.T) {
|
||||
@@ -3145,6 +3986,57 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightRejectsInvalidRequests(t *
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteWorkspacePreflightAttentionReasonSummaries(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
statusCounts map[string]int64
|
||||
severityCounts map[string]int64
|
||||
wantStatus string
|
||||
wantReason string
|
||||
}{
|
||||
{
|
||||
name: "clean ready",
|
||||
statusCounts: map[string]int64{"ready_to_resume": 1},
|
||||
severityCounts: map[string]int64{"ok": 1},
|
||||
wantStatus: "clean",
|
||||
wantReason: "no_resync_required_preflight_observed",
|
||||
},
|
||||
{
|
||||
name: "single resync",
|
||||
statusCounts: map[string]int64{"resync_required": 1},
|
||||
severityCounts: map[string]int64{"warn": 1},
|
||||
wantStatus: "needs_attention",
|
||||
wantReason: "resync_required_preflight_observed",
|
||||
},
|
||||
{
|
||||
name: "repeated resync",
|
||||
statusCounts: map[string]int64{"resync_required": 2},
|
||||
severityCounts: map[string]int64{"warn": 2},
|
||||
wantStatus: "repeated_resync_required",
|
||||
wantReason: "resync_required_preflight_repeated",
|
||||
},
|
||||
{
|
||||
name: "none observed",
|
||||
statusCounts: map[string]int64{},
|
||||
severityCounts: map[string]int64{},
|
||||
wantStatus: "unknown",
|
||||
wantReason: "no_preflight_observed",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
status := remoteWorkspacePreflightAttentionStatus(tt.statusCounts, tt.severityCounts)
|
||||
if status != tt.wantStatus {
|
||||
t.Fatalf("status=%q want %q", status, tt.wantStatus)
|
||||
}
|
||||
reason := remoteWorkspacePreflightAttentionReason(status, tt.statusCounts, tt.severityCounts)
|
||||
if reason != tt.wantReason {
|
||||
t.Fatalf("reason=%q want %q", reason, tt.wantReason)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFabricServiceChannelVPNPacketIngressHonorsDisabledBackendRelayPolicy(t *testing.T) {
|
||||
publicKey, privateKey, err := ed25519.GenerateKey(nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -13,7 +13,15 @@ type Supervisor interface {
|
||||
}
|
||||
|
||||
type StubSupervisor struct {
|
||||
Version string
|
||||
Version string
|
||||
RemoteWorkspaceRealAdapter RemoteWorkspaceRealAdapterConfig
|
||||
}
|
||||
|
||||
type RemoteWorkspaceRealAdapterConfig struct {
|
||||
EnabledRequested bool
|
||||
Command string
|
||||
ArgsJSON string
|
||||
WorkDir string
|
||||
}
|
||||
|
||||
func (s StubSupervisor) Apply(_ context.Context, desired []client.DesiredWorkload) ([]client.WorkloadStatusRequest, error) {
|
||||
@@ -85,6 +93,7 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
payload["backend_relay_steady_state"] = false
|
||||
payload["channels"] = remoteWorkspaceAdapterChannels()
|
||||
payload["frame_batch_contract"] = remoteWorkspaceFrameBatchContract()
|
||||
payload["real_adapter_supervision"] = remoteWorkspaceRealAdapterSupervisionContract(s.RemoteWorkspaceRealAdapter)
|
||||
payload["traffic"] = "none"
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "running",
|
||||
@@ -93,6 +102,20 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
if serviceType == "rdp-worker" && runtimeMode == "native" && boolConfig(workload.Config, "real_adapter_supervision") {
|
||||
payload["reason"] = "remote_workspace_real_adapter_supervision_disabled"
|
||||
payload["execution_mode"] = "real_adapter_supervision_disabled"
|
||||
payload["service_class"] = "remote_workspace"
|
||||
payload["traffic"] = "blocked"
|
||||
payload["payload_traffic"] = "none"
|
||||
payload["real_adapter_supervision"] = remoteWorkspaceRealAdapterSupervisionContract(s.RemoteWorkspaceRealAdapter)
|
||||
return client.WorkloadStatusRequest{
|
||||
ReportedState: "degraded",
|
||||
RuntimeMode: runtimeMode,
|
||||
Version: version,
|
||||
StatusPayload: payload,
|
||||
}
|
||||
}
|
||||
payload["reason"] = "service_runtime_not_implemented"
|
||||
payload["traffic"] = "blocked"
|
||||
return client.WorkloadStatusRequest{
|
||||
@@ -152,6 +175,166 @@ func remoteWorkspaceFrameBatchContract() map[string]any {
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceRealAdapterSupervisionContract(configs ...RemoteWorkspaceRealAdapterConfig) map[string]any {
|
||||
var config RemoteWorkspaceRealAdapterConfig
|
||||
if len(configs) > 0 {
|
||||
config = configs[0]
|
||||
}
|
||||
return map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
|
||||
"enabled": false,
|
||||
"activation_state": "disabled_until_real_runtime_stage",
|
||||
"execution_mode": "real_adapter_supervision_disabled",
|
||||
"payload_traffic": "none",
|
||||
"process_model": "external_rdp_worker_process",
|
||||
"config_projection": remoteWorkspaceRealAdapterConfigProjection(config),
|
||||
"activation_decision": remoteWorkspaceRealAdapterActivationDecision(config),
|
||||
"process_supervisor_preconditions": remoteWorkspaceRealAdapterProcessSupervisorPreconditions(config),
|
||||
"process_health_probe": remoteWorkspaceRealAdapterProcessHealthProbe(),
|
||||
"features": map[string]any{
|
||||
"config_projection": true,
|
||||
"activation_decision": true,
|
||||
"missing_gates": true,
|
||||
"process_health_probe": true,
|
||||
"process_health_probe_disabled": true,
|
||||
"process_supervisor_preconditions": true,
|
||||
"process_supervisor_start_disabled": true,
|
||||
"raw_values_redacted": true,
|
||||
},
|
||||
"config_env": []string{
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR",
|
||||
},
|
||||
"status_contract": []string{
|
||||
"schema_version",
|
||||
"enabled",
|
||||
"activation_state",
|
||||
"execution_mode",
|
||||
"payload_traffic",
|
||||
"process_model",
|
||||
"config_projection",
|
||||
"activation_decision",
|
||||
"process_supervisor_preconditions",
|
||||
"process_health_probe",
|
||||
"features",
|
||||
"config_env",
|
||||
"status_contract",
|
||||
},
|
||||
"guardrails": []string{
|
||||
"contract_probe_remains_default",
|
||||
"no_payload_forwarding_until_real_runtime_stage",
|
||||
"backend_relay_not_steady_state",
|
||||
"fabric_service_channel_required",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceRealAdapterProcessHealthProbe() map[string]any {
|
||||
return map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1",
|
||||
"health_probe_enabled": false,
|
||||
"reason": "disabled_until_real_runtime_stage",
|
||||
"payload_traffic": "none",
|
||||
"probe_model": "external_process_health",
|
||||
"required_signals": []string{
|
||||
"process_started",
|
||||
"process_exit_status",
|
||||
"adapter_control_channel_ready",
|
||||
"fabric_service_channel_bound",
|
||||
"payload_forwarding_contract_ready",
|
||||
},
|
||||
"missing_signals": []string{
|
||||
"process_started",
|
||||
"process_exit_status",
|
||||
"adapter_control_channel_ready",
|
||||
"fabric_service_channel_bound",
|
||||
"payload_forwarding_contract_ready",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceRealAdapterProcessSupervisorPreconditions(config RemoteWorkspaceRealAdapterConfig) map[string]any {
|
||||
return map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1",
|
||||
"process_start_allowed": false,
|
||||
"reason": "disabled_until_real_runtime_stage",
|
||||
"command_config_present": strings.TrimSpace(config.Command) != "",
|
||||
"workdir_config_present": strings.TrimSpace(config.WorkDir) != "",
|
||||
"args_config_present": strings.TrimSpace(config.ArgsJSON) != "",
|
||||
"required_checks": []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"command_config_validated",
|
||||
"workdir_config_validated",
|
||||
"process_identity_policy_bound",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"payload_forwarding_contract_enabled",
|
||||
"health_probe_contract_enabled",
|
||||
},
|
||||
"missing_checks": []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"command_config_validated",
|
||||
"workdir_config_validated",
|
||||
"process_identity_policy_bound",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"payload_forwarding_contract_enabled",
|
||||
"health_probe_contract_enabled",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceRealAdapterActivationDecision(config RemoteWorkspaceRealAdapterConfig) map[string]any {
|
||||
return map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1",
|
||||
"decision": "blocked",
|
||||
"reason": "real_runtime_stage_not_enabled",
|
||||
"enabled_requested": config.EnabledRequested,
|
||||
"activation_allowed": false,
|
||||
"payload_traffic": "none",
|
||||
"required_gates": []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"adapter_process_supervisor_enabled",
|
||||
"payload_forwarding_contract_enabled",
|
||||
},
|
||||
"missing_gates": []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"adapter_process_supervisor_enabled",
|
||||
"payload_forwarding_contract_enabled",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceRealAdapterConfigProjection(config RemoteWorkspaceRealAdapterConfig) map[string]any {
|
||||
return map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1",
|
||||
"enabled_requested": config.EnabledRequested,
|
||||
"activation_allowed": false,
|
||||
"command_present": strings.TrimSpace(config.Command) != "",
|
||||
"args_json_present": strings.TrimSpace(config.ArgsJSON) != "",
|
||||
"args_json_shape": remoteWorkspaceArgsJSONShape(config.ArgsJSON),
|
||||
"workdir_present": strings.TrimSpace(config.WorkDir) != "",
|
||||
"raw_values_redacted": true,
|
||||
}
|
||||
}
|
||||
|
||||
func remoteWorkspaceArgsJSONShape(value string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
return "absent"
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(trimmed, "["):
|
||||
return "json_array"
|
||||
case strings.HasPrefix(trimmed, "{"):
|
||||
return "json_object"
|
||||
default:
|
||||
return "opaque"
|
||||
}
|
||||
}
|
||||
|
||||
func serviceTrafficMode(serviceType string) string {
|
||||
switch serviceType {
|
||||
case "core-mesh":
|
||||
|
||||
@@ -130,4 +130,469 @@ func TestStubSupervisorRunsRDPWorkerAdapterContractProbeOnly(t *testing.T) {
|
||||
frameBatch["service_class"] != "remote_workspace" {
|
||||
t.Fatalf("unexpected frame batch contract: %#v", frameBatch)
|
||||
}
|
||||
realAdapter, ok := statuses[0].StatusPayload["real_adapter_supervision"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("real_adapter_supervision = %#v", statuses[0].StatusPayload["real_adapter_supervision"])
|
||||
}
|
||||
if realAdapter["schema_version"] != "rap.remote_workspace_real_adapter_supervision.v1" ||
|
||||
realAdapter["enabled"] != false ||
|
||||
realAdapter["activation_state"] != "disabled_until_real_runtime_stage" ||
|
||||
realAdapter["payload_traffic"] != "none" {
|
||||
t.Fatalf("unexpected real adapter supervision contract: %#v", realAdapter)
|
||||
}
|
||||
if !realAdapterSupervisionContractCompatible(realAdapter) {
|
||||
t.Fatalf("real adapter supervision contract is not compatible: %#v", realAdapter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorKeepsContractProbePrecedenceWhenRealAdapterAlsoRequested(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{
|
||||
Version: "test",
|
||||
RemoteWorkspaceRealAdapter: RemoteWorkspaceRealAdapterConfig{
|
||||
EnabledRequested: true,
|
||||
Command: "/opt/rap/bin/rdp-worker",
|
||||
ArgsJSON: `["--future-probe"]`,
|
||||
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
|
||||
},
|
||||
}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "rdp-worker",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"adapter_contract_probe": true,
|
||||
"real_adapter_supervision": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "running" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
payload := statuses[0].StatusPayload
|
||||
if payload["execution_mode"] != "contract_probe" ||
|
||||
payload["reason"] != "remote_workspace_adapter_contract_probe_ready" ||
|
||||
payload["traffic"] != "none" {
|
||||
t.Fatalf("contract probe did not retain precedence: %#v", payload)
|
||||
}
|
||||
realAdapter, ok := payload["real_adapter_supervision"].(map[string]any)
|
||||
if !ok || !realAdapterSupervisionContractCompatible(realAdapter) {
|
||||
t.Fatalf("real_adapter_supervision = %#v", payload["real_adapter_supervision"])
|
||||
}
|
||||
decision := realAdapter["activation_decision"].(map[string]any)
|
||||
if realAdapter["enabled"] != false ||
|
||||
decision["decision"] != "blocked" ||
|
||||
decision["reason"] != "real_runtime_stage_not_enabled" ||
|
||||
decision["payload_traffic"] != "none" {
|
||||
t.Fatalf("unexpected activation decision under contract-probe precedence: %#v", realAdapter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubSupervisorKeepsRealAdapterSupervisionDisabled(t *testing.T) {
|
||||
statuses, err := (StubSupervisor{
|
||||
Version: "test",
|
||||
RemoteWorkspaceRealAdapter: RemoteWorkspaceRealAdapterConfig{
|
||||
EnabledRequested: true,
|
||||
Command: "/opt/rap/bin/rdp-worker",
|
||||
ArgsJSON: `["--future-probe"]`,
|
||||
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
|
||||
},
|
||||
}).Apply(context.Background(), []client.DesiredWorkload{
|
||||
{
|
||||
ServiceType: "rdp-worker",
|
||||
DesiredState: "enabled",
|
||||
RuntimeMode: "native",
|
||||
Config: map[string]any{
|
||||
"real_adapter_supervision": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("apply desired workload: %v", err)
|
||||
}
|
||||
if statuses[0].ReportedState != "degraded" {
|
||||
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
|
||||
}
|
||||
if statuses[0].StatusPayload["reason"] != "remote_workspace_real_adapter_supervision_disabled" ||
|
||||
statuses[0].StatusPayload["execution_mode"] != "real_adapter_supervision_disabled" ||
|
||||
statuses[0].StatusPayload["traffic"] != "blocked" ||
|
||||
statuses[0].StatusPayload["payload_traffic"] != "none" {
|
||||
t.Fatalf("unexpected real adapter disabled payload: %#v", statuses[0].StatusPayload)
|
||||
}
|
||||
realAdapter, ok := statuses[0].StatusPayload["real_adapter_supervision"].(map[string]any)
|
||||
if !ok || !realAdapterSupervisionContractCompatible(realAdapter) {
|
||||
t.Fatalf("real adapter supervision contract = %#v", statuses[0].StatusPayload["real_adapter_supervision"])
|
||||
}
|
||||
projection, ok := realAdapter["config_projection"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("config_projection = %#v", realAdapter["config_projection"])
|
||||
}
|
||||
if realAdapter["enabled"] != false ||
|
||||
projection["enabled_requested"] != true ||
|
||||
projection["activation_allowed"] != false ||
|
||||
projection["command_present"] != true ||
|
||||
projection["args_json_present"] != true ||
|
||||
projection["args_json_shape"] != "json_array" ||
|
||||
projection["workdir_present"] != true ||
|
||||
projection["raw_values_redacted"] != true {
|
||||
t.Fatalf("unexpected config projection: %#v", projection)
|
||||
}
|
||||
decision, ok := realAdapter["activation_decision"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("activation_decision = %#v", realAdapter["activation_decision"])
|
||||
}
|
||||
if decision["decision"] != "blocked" ||
|
||||
decision["reason"] != "real_runtime_stage_not_enabled" ||
|
||||
decision["enabled_requested"] != true ||
|
||||
decision["activation_allowed"] != false ||
|
||||
decision["payload_traffic"] != "none" {
|
||||
t.Fatalf("unexpected activation decision: %#v", decision)
|
||||
}
|
||||
features, ok := realAdapter["features"].(map[string]any)
|
||||
if !ok ||
|
||||
features["config_projection"] != true ||
|
||||
features["activation_decision"] != true ||
|
||||
features["process_supervisor_preconditions"] != true ||
|
||||
features["process_supervisor_start_disabled"] != true ||
|
||||
features["missing_gates"] != true ||
|
||||
features["raw_values_redacted"] != true {
|
||||
t.Fatalf("unexpected real adapter features: %#v", realAdapter["features"])
|
||||
}
|
||||
preconditions, ok := realAdapter["process_supervisor_preconditions"].(map[string]any)
|
||||
if !ok ||
|
||||
preconditions["schema_version"] != "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1" ||
|
||||
preconditions["process_start_allowed"] != false ||
|
||||
preconditions["command_config_present"] != true ||
|
||||
preconditions["args_config_present"] != true ||
|
||||
preconditions["workdir_config_present"] != true {
|
||||
t.Fatalf("unexpected process supervisor preconditions: %#v", realAdapter["process_supervisor_preconditions"])
|
||||
}
|
||||
healthProbe, ok := realAdapter["process_health_probe"].(map[string]any)
|
||||
if !ok ||
|
||||
healthProbe["schema_version"] != "rap.remote_workspace_real_adapter_process_health_probe.v1" ||
|
||||
healthProbe["health_probe_enabled"] != false ||
|
||||
healthProbe["payload_traffic"] != "none" {
|
||||
t.Fatalf("unexpected process health probe: %#v", realAdapter["process_health_probe"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestRealAdapterSupervisionContractCompatibility(t *testing.T) {
|
||||
compatible := remoteWorkspaceRealAdapterSupervisionContract()
|
||||
if !realAdapterSupervisionContractCompatible(compatible) {
|
||||
t.Fatalf("expected real adapter supervision contract to be compatible")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
contract map[string]any
|
||||
}{
|
||||
{
|
||||
name: "enabled",
|
||||
contract: map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
|
||||
"enabled": true,
|
||||
"activation_state": "disabled_until_real_runtime_stage",
|
||||
"payload_traffic": "none",
|
||||
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
|
||||
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
|
||||
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
|
||||
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
|
||||
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
|
||||
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR"},
|
||||
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
|
||||
"guardrails": []string{"contract_probe_remains_default", "no_payload_forwarding_until_real_runtime_stage", "backend_relay_not_steady_state", "fabric_service_channel_required"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing env",
|
||||
contract: map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
|
||||
"enabled": false,
|
||||
"activation_state": "disabled_until_real_runtime_stage",
|
||||
"payload_traffic": "none",
|
||||
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
|
||||
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
|
||||
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
|
||||
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
|
||||
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
|
||||
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED"},
|
||||
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
|
||||
"guardrails": []string{"contract_probe_remains_default", "no_payload_forwarding_until_real_runtime_stage", "backend_relay_not_steady_state", "fabric_service_channel_required"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing guardrail",
|
||||
contract: map[string]any{
|
||||
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
|
||||
"enabled": false,
|
||||
"activation_state": "disabled_until_real_runtime_stage",
|
||||
"payload_traffic": "none",
|
||||
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
|
||||
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
|
||||
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
|
||||
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
|
||||
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
|
||||
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR"},
|
||||
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
|
||||
"guardrails": []string{"contract_probe_remains_default"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if realAdapterSupervisionContractCompatible(tt.contract) {
|
||||
t.Fatalf("expected incompatible contract for %+v", tt.contract)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRealAdapterConfigProjectionCompatibility(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config RemoteWorkspaceRealAdapterConfig
|
||||
enabledRequested bool
|
||||
commandPresent bool
|
||||
argsJSONPresent bool
|
||||
argsJSONShape string
|
||||
workdirPresent bool
|
||||
}{
|
||||
{
|
||||
name: "default empty",
|
||||
argsJSONShape: "absent",
|
||||
},
|
||||
{
|
||||
name: "requested array args",
|
||||
config: RemoteWorkspaceRealAdapterConfig{
|
||||
EnabledRequested: true,
|
||||
Command: "/opt/rap/bin/rdp-worker",
|
||||
ArgsJSON: `["--future-probe"]`,
|
||||
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
|
||||
},
|
||||
enabledRequested: true,
|
||||
commandPresent: true,
|
||||
argsJSONPresent: true,
|
||||
argsJSONShape: "json_array",
|
||||
workdirPresent: true,
|
||||
},
|
||||
{
|
||||
name: "object args shape",
|
||||
config: RemoteWorkspaceRealAdapterConfig{
|
||||
ArgsJSON: `{"arg":"value"}`,
|
||||
},
|
||||
argsJSONPresent: true,
|
||||
argsJSONShape: "json_object",
|
||||
},
|
||||
{
|
||||
name: "opaque args shape",
|
||||
config: RemoteWorkspaceRealAdapterConfig{
|
||||
ArgsJSON: "--future-probe",
|
||||
},
|
||||
argsJSONPresent: true,
|
||||
argsJSONShape: "opaque",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
contract := remoteWorkspaceRealAdapterSupervisionContract(tt.config)
|
||||
if !realAdapterSupervisionContractCompatible(contract) {
|
||||
t.Fatalf("contract is not compatible: %#v", contract)
|
||||
}
|
||||
projection := contract["config_projection"].(map[string]any)
|
||||
if projection["enabled_requested"] != tt.enabledRequested ||
|
||||
projection["activation_allowed"] != false ||
|
||||
projection["command_present"] != tt.commandPresent ||
|
||||
projection["args_json_present"] != tt.argsJSONPresent ||
|
||||
projection["args_json_shape"] != tt.argsJSONShape ||
|
||||
projection["workdir_present"] != tt.workdirPresent ||
|
||||
projection["raw_values_redacted"] != true {
|
||||
t.Fatalf("unexpected config projection: %#v", projection)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRealAdapterProjectionAndActivationDecisionStayAligned(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config RemoteWorkspaceRealAdapterConfig
|
||||
enabledRequested bool
|
||||
}{
|
||||
{name: "default"},
|
||||
{
|
||||
name: "requested",
|
||||
config: RemoteWorkspaceRealAdapterConfig{
|
||||
EnabledRequested: true,
|
||||
Command: "/opt/rap/bin/rdp-worker",
|
||||
ArgsJSON: `["--future-probe"]`,
|
||||
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
|
||||
},
|
||||
enabledRequested: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
contract := remoteWorkspaceRealAdapterSupervisionContract(tt.config)
|
||||
projection := contract["config_projection"].(map[string]any)
|
||||
decision := contract["activation_decision"].(map[string]any)
|
||||
if projection["enabled_requested"] != decision["enabled_requested"] ||
|
||||
projection["enabled_requested"] != tt.enabledRequested ||
|
||||
projection["activation_allowed"] != false ||
|
||||
decision["activation_allowed"] != false ||
|
||||
contract["enabled"] != false ||
|
||||
contract["payload_traffic"] != decision["payload_traffic"] {
|
||||
t.Fatalf("projection and activation decision are not aligned: contract=%#v", contract)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func realAdapterSupervisionContractCompatible(contract map[string]any) bool {
|
||||
if contract["schema_version"] != "rap.remote_workspace_real_adapter_supervision.v1" ||
|
||||
contract["enabled"] != false ||
|
||||
contract["activation_state"] != "disabled_until_real_runtime_stage" ||
|
||||
contract["payload_traffic"] != "none" {
|
||||
return false
|
||||
}
|
||||
projection, ok := contract["config_projection"].(map[string]any)
|
||||
if !ok ||
|
||||
projection["schema_version"] != "rap.remote_workspace_real_adapter_config_projection.v1" ||
|
||||
projection["activation_allowed"] != false ||
|
||||
projection["raw_values_redacted"] != true {
|
||||
return false
|
||||
}
|
||||
decision, ok := contract["activation_decision"].(map[string]any)
|
||||
if !ok ||
|
||||
decision["schema_version"] != "rap.remote_workspace_real_adapter_activation_decision.v1" ||
|
||||
decision["decision"] != "blocked" ||
|
||||
decision["reason"] != "real_runtime_stage_not_enabled" ||
|
||||
decision["activation_allowed"] != false ||
|
||||
decision["payload_traffic"] != "none" {
|
||||
return false
|
||||
}
|
||||
for _, item := range []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"adapter_process_supervisor_enabled",
|
||||
"payload_forwarding_contract_enabled",
|
||||
} {
|
||||
if !anyStringSliceContains(decision["required_gates"], item) || !anyStringSliceContains(decision["missing_gates"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
preconditions, ok := contract["process_supervisor_preconditions"].(map[string]any)
|
||||
if !ok ||
|
||||
preconditions["schema_version"] != "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1" ||
|
||||
preconditions["process_start_allowed"] != false ||
|
||||
preconditions["reason"] != "disabled_until_real_runtime_stage" {
|
||||
return false
|
||||
}
|
||||
for _, item := range []string{
|
||||
"real_runtime_stage_enabled",
|
||||
"command_config_validated",
|
||||
"workdir_config_validated",
|
||||
"process_identity_policy_bound",
|
||||
"fabric_service_channel_runtime_ready",
|
||||
"payload_forwarding_contract_enabled",
|
||||
"health_probe_contract_enabled",
|
||||
} {
|
||||
if !anyStringSliceContains(preconditions["required_checks"], item) || !anyStringSliceContains(preconditions["missing_checks"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
healthProbe, ok := contract["process_health_probe"].(map[string]any)
|
||||
if !ok ||
|
||||
healthProbe["schema_version"] != "rap.remote_workspace_real_adapter_process_health_probe.v1" ||
|
||||
healthProbe["health_probe_enabled"] != false ||
|
||||
healthProbe["reason"] != "disabled_until_real_runtime_stage" ||
|
||||
healthProbe["payload_traffic"] != "none" ||
|
||||
healthProbe["probe_model"] != "external_process_health" {
|
||||
return false
|
||||
}
|
||||
for _, item := range []string{
|
||||
"process_started",
|
||||
"process_exit_status",
|
||||
"adapter_control_channel_ready",
|
||||
"fabric_service_channel_bound",
|
||||
"payload_forwarding_contract_ready",
|
||||
} {
|
||||
if !anyStringSliceContains(healthProbe["required_signals"], item) || !anyStringSliceContains(healthProbe["missing_signals"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
features, ok := contract["features"].(map[string]any)
|
||||
if !ok ||
|
||||
features["config_projection"] != true ||
|
||||
features["activation_decision"] != true ||
|
||||
features["missing_gates"] != true ||
|
||||
features["process_health_probe"] != true ||
|
||||
features["process_health_probe_disabled"] != true ||
|
||||
features["process_supervisor_preconditions"] != true ||
|
||||
features["process_supervisor_start_disabled"] != true ||
|
||||
features["raw_values_redacted"] != true {
|
||||
return false
|
||||
}
|
||||
for _, item := range []string{
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON",
|
||||
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR",
|
||||
} {
|
||||
if !anyStringSliceContains(contract["config_env"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, item := range []string{
|
||||
"schema_version",
|
||||
"enabled",
|
||||
"activation_state",
|
||||
"execution_mode",
|
||||
"payload_traffic",
|
||||
"process_model",
|
||||
"config_projection",
|
||||
"activation_decision",
|
||||
"process_supervisor_preconditions",
|
||||
"process_health_probe",
|
||||
"features",
|
||||
"config_env",
|
||||
"status_contract",
|
||||
} {
|
||||
if !anyStringSliceContains(contract["status_contract"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, item := range []string{
|
||||
"contract_probe_remains_default",
|
||||
"no_payload_forwarding_until_real_runtime_stage",
|
||||
"backend_relay_not_steady_state",
|
||||
"fabric_service_channel_required",
|
||||
} {
|
||||
if !anyStringSliceContains(contract["guardrails"], item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func anyStringSliceContains(value any, want string) bool {
|
||||
switch items := value.(type) {
|
||||
case []string:
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -184,6 +184,9 @@ func (g *Gateway) Snapshot() map[string]any {
|
||||
if !lastRuntimeActivityAt.IsZero() {
|
||||
out["last_runtime_activity_at"] = lastRuntimeActivityAt.UTC().Format(time.RFC3339Nano)
|
||||
}
|
||||
if platform := gatewayPlatformSnapshot(g.InterfaceName, g.RouteCIDR); len(platform) > 0 {
|
||||
out["platform"] = platform
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@ const (
|
||||
iffNoPI = 0x1000
|
||||
tunSetIFF = 0x400454ca
|
||||
ifNameSize = 16
|
||||
gatewayTunMTU = "1000"
|
||||
gatewayTCPMSS = "900"
|
||||
)
|
||||
|
||||
type tunDevice struct {
|
||||
@@ -86,6 +88,9 @@ func configureGatewayInterface(name, addressCIDR, routeCIDR string) error {
|
||||
if err := runCommand("ip", "addr", "replace", addressCIDR, "dev", name); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := runCommand("ip", "link", "set", "dev", name, "mtu", gatewayTunMTU); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := runCommand("ip", "link", "set", name, "up"); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -118,11 +123,10 @@ func ensureMasqueradeRules(routeCIDR string) error {
|
||||
}
|
||||
|
||||
func ensureMSSClampRule(interfaceName string) error {
|
||||
err := ensureIPTablesRule("mangle", "FORWARD", "-i", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
|
||||
if err == nil {
|
||||
return nil
|
||||
if err := ensureIPTablesRule("mangle", "FORWARD", "-i", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--set-mss", gatewayTCPMSS); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return ensureIPTablesRule("mangle", "FORWARD", "-o", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--set-mss", gatewayTCPMSS)
|
||||
}
|
||||
|
||||
func defaultIPv4Interface() (string, error) {
|
||||
@@ -204,3 +208,47 @@ func runCommand(name string, args ...string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func gatewayPlatformSnapshot(interfaceName, routeCIDR string) map[string]any {
|
||||
out := map[string]any{
|
||||
"os": "linux",
|
||||
"interface": interfaceName,
|
||||
"route_cidr": routeCIDR,
|
||||
}
|
||||
if value, err := readTrimmedFile("/proc/sys/net/ipv4/ip_forward"); err == nil {
|
||||
out["ipv4_forward"] = value
|
||||
}
|
||||
for _, key := range []string{"all", "default", interfaceName} {
|
||||
if strings.TrimSpace(key) == "" {
|
||||
continue
|
||||
}
|
||||
if value, err := readTrimmedFile(fmt.Sprintf("/proc/sys/net/ipv4/conf/%s/rp_filter", key)); err == nil {
|
||||
out["rp_filter_"+key] = value
|
||||
}
|
||||
}
|
||||
if interfaceName != "" {
|
||||
out["forward_in_rule"] = iptablesRulePresent("filter", "FORWARD", "-i", interfaceName, "-j", "ACCEPT")
|
||||
out["forward_out_established_rule"] = iptablesRulePresent("filter", "FORWARD", "-o", interfaceName, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT")
|
||||
}
|
||||
if routeCIDR != "" {
|
||||
out["masquerade_rule"] = iptablesRulePresent("nat", "POSTROUTING", "-s", routeCIDR, "-j", "MASQUERADE")
|
||||
if egress, err := defaultIPv4Interface(); err == nil && egress != "" {
|
||||
out["default_egress"] = egress
|
||||
out["egress_masquerade_rule"] = iptablesRulePresent("nat", "POSTROUTING", "-s", routeCIDR, "-o", egress, "-j", "MASQUERADE")
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func readTrimmedFile(path string) (string, error) {
|
||||
payload, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(payload)), nil
|
||||
}
|
||||
|
||||
func iptablesRulePresent(table, chain string, rule ...string) bool {
|
||||
checkArgs := append([]string{"-t", table, "-C", chain}, rule...)
|
||||
return exec.Command("iptables", checkArgs...).Run() == nil
|
||||
}
|
||||
|
||||
@@ -21,3 +21,11 @@ func (d *tunDevice) Write(packet []byte) (int, error) {
|
||||
func (d *tunDevice) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func gatewayPlatformSnapshot(interfaceName, routeCIDR string) map[string]any {
|
||||
return map[string]any{
|
||||
"os": "unsupported",
|
||||
"interface": interfaceName,
|
||||
"route_cidr": routeCIDR,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user