This commit is contained in:
2026-05-14 23:30:34 +03:00
parent 26cb65e936
commit 04c46042d9
239 changed files with 34102 additions and 438 deletions
@@ -2,6 +2,7 @@ package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
@@ -58,6 +59,14 @@ func main() {
if err := runUpdateLoop(ctx, os.Args[2:]); err != nil {
log.Fatalf("update-loop failed: %v", err)
}
case "monitor-loop":
if err := runMonitorLoop(ctx, os.Args[2:]); err != nil {
log.Fatalf("monitor-loop failed: %v", err)
}
case "monitor-once":
if err := runMonitorOnce(ctx, os.Args[2:]); err != nil {
log.Fatalf("monitor-once failed: %v", err)
}
case "install-updater":
if err := runInstallUpdater(ctx, os.Args[2:]); err != nil {
log.Fatalf("install-updater failed: %v", err)
@@ -288,6 +297,9 @@ func runInstall(ctx context.Context, args []string) error {
return err
}
fmt.Print(result.Unit)
if result.MonitorUnit != "" {
fmt.Print(result.MonitorUnit)
}
}
return nil
}
@@ -304,7 +316,7 @@ func runInstall(ctx context.Context, args []string) error {
if err != nil {
return err
}
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t\n", serviceResult.UnitName, serviceResult.UnitPath, serviceResult.BinaryPath, serviceResult.Started)
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t monitor_service=%s\n", serviceResult.UnitName, serviceResult.UnitPath, serviceResult.BinaryPath, serviceResult.Started, serviceResult.MonitorUnitName)
}
fmt.Println("next: approve the join request in the platform admin panel, then the node-agent will finish bootstrap and start heartbeats")
return nil
@@ -429,6 +441,75 @@ func runUpdateLoop(ctx context.Context, args []string) error {
return (hostagent.DockerManager{}).RunUpdateLoop(ctx, cfg)
}
func runMonitorLoop(ctx context.Context, args []string) error {
cfg, err := parseMonitor(args)
if err != nil {
return err
}
return hostagent.RunMonitorLoop(ctx, cfg)
}
func runMonitorOnce(ctx context.Context, args []string) error {
cfg, err := parseMonitor(args)
if err != nil {
return err
}
cfg.MaxRuns = 1
result := hostagent.RunMonitorOnce(ctx, cfg)
if err := json.NewEncoder(os.Stdout).Encode(result); err != nil {
return err
}
return nil
}
func parseMonitor(args []string) (hostagent.MonitorConfig, error) {
fs := flag.NewFlagSet("monitor-loop", flag.ContinueOnError)
cfg := hostagent.MonitorConfig{}
var intervalSeconds int
var initialDelaySeconds int
var maxRuns int
var restartCooldownSeconds int
var staleRestartingSeconds int
var tmpMinAgeMinutes int
watchContainers := repeatedFlag{}
fs.StringVar(&cfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL used for monitor status reports.")
fs.StringVar(&cfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
fs.StringVar(&cfg.NodeID, "node-id", getenv("RAP_NODE_ID", ""), "Already enrolled node ID.")
fs.StringVar(&cfg.StateDir, "state-dir", getenv("RAP_NODE_STATE_DIR", hostagent.DefaultStateDir), "Host path containing node-agent identity.json.")
fs.StringVar(&cfg.Product, "product", getenv("RAP_MONITOR_PRODUCT", hostagent.DefaultMonitorProduct), "Status product name.")
fs.StringVar(&cfg.CurrentVersion, "current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Current rap-host-agent version.")
fs.StringVar(&cfg.DockerBinary, "docker-binary", getenv("RAP_DOCKER_BINARY", "docker"), "Docker CLI binary.")
fs.StringVar(&cfg.DiskPath, "disk-path", getenv("RAP_MONITOR_DISK_PATH", "/"), "Filesystem path used for disk usage checks.")
fs.StringVar(&cfg.TmpDir, "tmp-dir", getenv("RAP_MONITOR_TMP_DIR", "/tmp"), "Temporary directory cleaned under pressure.")
fs.StringVar(&cfg.StatusFile, "status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written after every run.")
fs.IntVar(&intervalSeconds, "interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
fs.IntVar(&initialDelaySeconds, "initial-delay-seconds", getenvInt("RAP_MONITOR_INITIAL_DELAY_SECONDS", 0), "Seconds to wait before first monitor check.")
fs.IntVar(&maxRuns, "max-runs", getenvInt("RAP_MONITOR_MAX_RUNS", 0), "Maximum monitor iterations. Use 0 to run until stopped.")
fs.IntVar(&cfg.DiskWarnPercent, "disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
fs.IntVar(&cfg.DiskCleanupPercent, "disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
fs.IntVar(&cfg.DiskCriticalPercent, "disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
fs.IntVar(&restartCooldownSeconds, "restart-cooldown-seconds", getenvInt("RAP_MONITOR_RESTART_COOLDOWN_SECONDS", hostagent.DefaultMonitorRestartCooldownSec), "Minimum seconds between repeated restarts of the same target.")
fs.IntVar(&staleRestartingSeconds, "stale-restarting-seconds", getenvInt("RAP_MONITOR_STALE_RESTARTING_SECONDS", hostagent.DefaultMonitorStaleRestartingSec), "Seconds after which docker restarting state is considered stuck.")
fs.IntVar(&tmpMinAgeMinutes, "tmp-min-age-minutes", getenvInt("RAP_MONITOR_TMP_MIN_AGE_MINUTES", hostagent.DefaultMonitorTmpMinAgeMinutes), "Minimum age for /tmp rap-* and go-build* cleanup.")
fs.BoolVar(&cfg.RestartContainers, "restart-containers", getenvBool("RAP_MONITOR_RESTART_CONTAINERS", true), "Start/restart watched containers when they are stopped, unhealthy, or stuck restarting.")
fs.BoolVar(&cfg.CleanupDocker, "cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
fs.Var(&watchContainers, "watch-container", "Docker container to watch and heal; may be repeated.")
if err := fs.Parse(args); err != nil {
return hostagent.MonitorConfig{}, err
}
cfg.WatchContainers = watchContainers
cfg.Interval = time.Duration(intervalSeconds) * time.Second
cfg.InitialDelay = time.Duration(initialDelaySeconds) * time.Second
cfg.MaxRuns = maxRuns
cfg.RestartCooldown = time.Duration(restartCooldownSeconds) * time.Second
cfg.StaleRestartingAfter = time.Duration(staleRestartingSeconds) * time.Second
cfg.TmpMinAge = time.Duration(tmpMinAgeMinutes) * time.Minute
cfg.Logf = func(format string, args ...any) {
fmt.Printf(format+"\n", args...)
}
return cfg, nil
}
func firstNonEmptyLocal(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
@@ -444,6 +525,8 @@ func runInstallUpdater(ctx context.Context, args []string) error {
service := hostagent.UpdateServiceConfig{}
var dryRun bool
var selfUpdater bool
var monitorEnabled bool
monitorContainers := repeatedFlag{}
fs.StringVar(&runtimeCfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL.")
fs.StringVar(&runtimeCfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
fs.StringVar(&runtimeCfg.ContainerName, "container-name", getenv("RAP_NODE_AGENT_CONTAINER", hostagent.DefaultContainerName), "Docker container name to update.")
@@ -456,6 +539,14 @@ func runInstallUpdater(ctx context.Context, args []string) error {
fs.IntVar(&service.HealthTimeoutSec, "health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated container running-state timeout in seconds.")
fs.StringVar(&service.BinaryInstallPath, "binary-path", getenv("RAP_HOST_AGENT_BINARY_PATH", hostagent.DefaultHostAgentInstallPath), "Persistent host path for rap-host-agent binary used by the service.")
fs.BoolVar(&selfUpdater, "self-updater-enabled", getenvBool("RAP_HOST_AGENT_SELF_UPDATE_ENABLED", true), "Install and start one global host-agent binary self-updater service.")
fs.BoolVar(&monitorEnabled, "monitor-enabled", getenvBool("RAP_HOST_AGENT_MONITOR_ENABLED", true), "Install and start the local host monitor service.")
fs.IntVar(&service.MonitorIntervalSec, "monitor-interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
fs.StringVar(&service.MonitorStatusFile, "monitor-status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written by the monitor.")
fs.IntVar(&service.MonitorDiskWarn, "monitor-disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
fs.IntVar(&service.MonitorDiskCleanup, "monitor-disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
fs.IntVar(&service.MonitorDiskCritical, "monitor-disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
fs.BoolVar(&service.MonitorCleanupDocker, "monitor-cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
fs.Var(&monitorContainers, "monitor-container", "Extra Docker container watched by monitor; may be repeated.")
fs.BoolVar(&dryRun, "dry-run", false, "Print the systemd unit without installing it.")
if err := fs.Parse(args); err != nil {
return err
@@ -465,6 +556,8 @@ func runInstallUpdater(ctx context.Context, args []string) error {
service.DryRun = dryRun
service.InstallSelfUpdater = selfUpdater
service.SelfUpdateVersion = agent.Version
service.InstallMonitor = monitorEnabled
service.MonitorContainers = monitorContainers
result, err := (hostagent.DockerManager{}).InstallUpdateService(ctx, service)
if err != nil {
return err
@@ -474,9 +567,12 @@ func runInstallUpdater(ctx context.Context, args []string) error {
if result.SelfUnit != "" {
fmt.Print(result.SelfUnit)
}
if result.MonitorUnit != "" {
fmt.Print(result.MonitorUnit)
}
return nil
}
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t self_updater=%s\n", result.UnitName, result.UnitPath, result.BinaryPath, result.Started, result.SelfUnitName)
fmt.Printf("updater_service=%s unit=%s binary=%s started=%t self_updater=%s monitor_service=%s\n", result.UnitName, result.UnitPath, result.BinaryPath, result.Started, result.SelfUnitName, result.MonitorUnitName)
return nil
}
@@ -572,6 +668,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
var installToken string
var autoUpdateEnabled bool
autoUpdate := hostagent.UpdateServiceConfig{}
monitorContainers := repeatedFlag{}
fs.StringVar(&cfg.BackendURL, "backend-url", getenv("RAP_BACKEND_URL", ""), "Control Plane API base URL.")
fs.StringVar(&cfg.ClusterID, "cluster-id", getenv("RAP_CLUSTER_ID", ""), "Cluster ID.")
fs.StringVar(&cfg.JoinToken, "join-token", getenv("RAP_JOIN_TOKEN", ""), "One-time join token for first enrollment.")
@@ -591,6 +688,7 @@ func parseInstall(args []string) (installCommandConfig, error) {
fs.BoolVar(&dryRun, "dry-run", false, "Print the docker command with secrets redacted.")
fs.BoolVar(&autoUpdateEnabled, "auto-update-enabled", getenvBool("RAP_AUTO_UPDATE_ENABLED", true), "Install and start the local update-loop service.")
fs.BoolVar(&autoUpdate.InstallSelfUpdater, "host-agent-self-update-enabled", getenvBool("RAP_HOST_AGENT_SELF_UPDATE_ENABLED", true), "Install and start one global host-agent binary self-updater service.")
fs.BoolVar(&autoUpdate.InstallMonitor, "host-agent-monitor-enabled", getenvBool("RAP_HOST_AGENT_MONITOR_ENABLED", true), "Install and start the local host monitor service.")
fs.StringVar(&autoUpdate.CurrentVersion, "auto-update-current-version", getenv("RAP_NODE_AGENT_VERSION", agent.Version), "Initial node-agent version used by update-loop before the first successful update.")
fs.StringVar(&autoUpdate.SelfUpdateVersion, "host-agent-current-version", getenv("RAP_HOST_AGENT_VERSION", agent.Version), "Initial host-agent binary version used by the self-updater.")
fs.StringVar(&autoUpdate.Channel, "auto-update-channel", getenv("RAP_UPDATE_CHANNEL", ""), "Optional update channel override for update-loop.")
@@ -599,6 +697,12 @@ func parseInstall(args []string) (installCommandConfig, error) {
fs.Float64Var(&autoUpdate.Jitter, "auto-update-jitter", getenvFloat("RAP_UPDATE_JITTER", 0.15), "Update-loop interval jitter, 0..1.")
fs.IntVar(&autoUpdate.HealthTimeoutSec, "auto-update-health-timeout-seconds", getenvInt("RAP_UPDATE_HEALTH_TIMEOUT_SECONDS", 30), "Updated container running-state timeout in seconds.")
fs.StringVar(&autoUpdate.BinaryInstallPath, "auto-update-binary-path", getenv("RAP_HOST_AGENT_BINARY_PATH", hostagent.DefaultHostAgentInstallPath), "Persistent host path for rap-host-agent binary used by the service.")
fs.IntVar(&autoUpdate.MonitorIntervalSec, "monitor-interval-seconds", getenvInt("RAP_MONITOR_INTERVAL_SECONDS", hostagent.DefaultMonitorIntervalSeconds), "Seconds between monitor checks.")
fs.StringVar(&autoUpdate.MonitorStatusFile, "monitor-status-file", getenv("RAP_MONITOR_STATUS_FILE", ""), "Optional JSON status file written by the monitor.")
fs.IntVar(&autoUpdate.MonitorDiskWarn, "monitor-disk-warn-percent", getenvInt("RAP_MONITOR_DISK_WARN_PERCENT", hostagent.DefaultMonitorDiskWarnPercent), "Disk used percent that reports warning.")
fs.IntVar(&autoUpdate.MonitorDiskCleanup, "monitor-disk-cleanup-percent", getenvInt("RAP_MONITOR_DISK_CLEANUP_PERCENT", hostagent.DefaultMonitorDiskCleanupPercent), "Disk used percent that triggers cleanup.")
fs.IntVar(&autoUpdate.MonitorDiskCritical, "monitor-disk-critical-percent", getenvInt("RAP_MONITOR_DISK_CRITICAL_PERCENT", hostagent.DefaultMonitorDiskCriticalPercent), "Disk used percent that reports failure after cleanup.")
fs.BoolVar(&autoUpdate.MonitorCleanupDocker, "monitor-cleanup-docker", getenvBool("RAP_MONITOR_CLEANUP_DOCKER", true), "Run safe docker prune cleanup when disk is above cleanup threshold.")
fs.BoolVar(&cfg.WorkloadSupervisionEnabled, "workload-supervision-enabled", getenvBool("RAP_WORKLOAD_SUPERVISION_ENABLED", false), "Enable node-agent workload status reporting.")
fs.BoolVar(&cfg.MeshSyntheticRuntimeEnabled, "mesh-synthetic-runtime-enabled", getenvBool("RAP_MESH_SYNTHETIC_RUNTIME_ENABLED", false), "Enable synthetic mesh runtime.")
fs.BoolVar(&cfg.MeshProductionForwardingEnabled, "mesh-production-forwarding-enabled", getenvBool("RAP_MESH_PRODUCTION_FORWARDING_ENABLED", false), "Enable production forwarding gate; runtime still fail-closed if unavailable.")
@@ -622,12 +726,14 @@ func parseInstall(args []string) (installCommandConfig, error) {
fs.Var(&extraEnv, "env", "Extra KEY=VALUE env passed to node-agent container; may be repeated.")
fs.Var(&extraRunArg, "docker-run-arg", "Extra raw docker run argument; may be repeated.")
fs.Var(&imageArtifactURL, "image-artifact-url", "Docker image tar artifact URL to docker load before running; may be repeated.")
fs.Var(&monitorContainers, "monitor-container", "Extra Docker container watched by monitor; may be repeated.")
if err := fs.Parse(args); err != nil {
return installCommandConfig{}, err
}
cfg.ExtraEnv = extraEnv
cfg.AdditionalDockerRunArgs = extraRunArg
cfg.ImageArtifactURLs = append(cfg.ImageArtifactURLs, imageArtifactURL...)
autoUpdate.MonitorContainers = monitorContainers
if strings.TrimSpace(profileURL) != "" || strings.TrimSpace(installToken) != "" {
profile, err := hostagent.FetchDockerInstallProfile(context.Background(), hostagent.ProfileRequest{
URL: profileURL,
@@ -738,6 +844,8 @@ func usage() {
rap-host-agent install-updater -backend-url URL -cluster-id ID -state-dir DIR -container-name NAME
rap-host-agent update-host-agent -backend-url URL -cluster-id ID -state-dir DIR
rap-host-agent update-host-agent-loop -backend-url URL -cluster-id ID -state-dir DIR
rap-host-agent monitor-loop -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
rap-host-agent monitor-once -backend-url URL -cluster-id ID -state-dir DIR --watch-container NAME
rap-host-agent update -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
rap-host-agent update-loop -backend-url URL -cluster-id ID -node-id ID [-container-name NAME]
rap-host-agent status [-container-name NAME]`)
@@ -222,6 +222,11 @@ type NodeVPNAssignmentLeaseRenewRequest struct {
TTLSeconds int `json:"ttl_seconds"`
}
type NodeVPNAssignmentLeaseAcquireRequest struct {
TTLSeconds int `json:"ttl_seconds"`
Metadata map[string]any `json:"metadata,omitempty"`
}
type MeshLinkObservationRequest struct {
SourceNodeID string `json:"source_node_id"`
TargetNodeID string `json:"target_node_id"`
@@ -658,6 +663,17 @@ func (c *Client) ReportNodeVPNAssignmentStatus(ctx context.Context, clusterID, n
return c.postJSON(ctx, path, request, nil)
}
func (c *Client) AcquireNodeVPNAssignmentLease(ctx context.Context, clusterID, nodeID, vpnConnectionID string, request NodeVPNAssignmentLeaseAcquireRequest) (*NodeVPNAssignmentLease, error) {
var response struct {
Lease NodeVPNAssignmentLease `json:"lease"`
}
path := fmt.Sprintf("/clusters/%s/nodes/%s/vpn/assignments/%s/lease/acquire", clusterID, nodeID, vpnConnectionID)
if err := c.postJSON(ctx, path, request, &response); err != nil {
return nil, err
}
return &response.Lease, nil
}
func (c *Client) RenewNodeVPNAssignmentLease(ctx context.Context, clusterID, nodeID, vpnConnectionID, leaseID string, request NodeVPNAssignmentLeaseRenewRequest) error {
path := fmt.Sprintf("/clusters/%s/nodes/%s/vpn/assignments/%s/lease/%s/renew", clusterID, nodeID, vpnConnectionID, leaseID)
return c.postJSON(ctx, path, request, nil)
@@ -40,6 +40,10 @@ type Config struct {
MeshSyntheticConfigPath string
MeshPeerEndpointsJSON string
MeshSyntheticRoutesJSON string
RemoteWorkspaceRealAdapterEnabled bool
RemoteWorkspaceRealAdapterCommand string
RemoteWorkspaceRealAdapterArgsJSON string
RemoteWorkspaceRealAdapterWorkDir string
}
func Load(args []string, env map[string]string) (Config, error) {
@@ -73,6 +77,10 @@ func Load(args []string, env map[string]string) (Config, error) {
fs.StringVar(&cfg.MeshSyntheticConfigPath, "mesh-synthetic-config", getEnv(env, "RAP_MESH_SYNTHETIC_CONFIG", ""), "Path to scoped synthetic mesh config snapshot. Preferred over debug JSON env.")
fs.StringVar(&cfg.MeshPeerEndpointsJSON, "mesh-peer-endpoints-json", getEnv(env, "RAP_MESH_PEER_ENDPOINTS_JSON", ""), "JSON object mapping peer node_id to synthetic mesh endpoint URL.")
fs.StringVar(&cfg.MeshSyntheticRoutesJSON, "mesh-synthetic-routes-json", getEnv(env, "RAP_MESH_SYNTHETIC_ROUTES_JSON", ""), "JSON array of synthetic mesh routes for test-only runtime.")
fs.BoolVar(&cfg.RemoteWorkspaceRealAdapterEnabled, "remote-workspace-real-adapter-enabled", getEnvBool(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", false), "Request future real remote workspace adapter supervision. Disabled until the real runtime stage is implemented.")
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterCommand, "remote-workspace-real-adapter-command", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", ""), "Future real remote workspace adapter command path. Redacted from status payloads.")
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterArgsJSON, "remote-workspace-real-adapter-args-json", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", ""), "Future real remote workspace adapter args JSON. Redacted from status payloads.")
fs.StringVar(&cfg.RemoteWorkspaceRealAdapterWorkDir, "remote-workspace-real-adapter-workdir", getEnv(env, "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR", ""), "Future real remote workspace adapter working directory. Redacted from status payloads.")
heartbeatSeconds := getEnvInt(env, "RAP_HEARTBEAT_INTERVAL_SECONDS", 15)
fs.DurationVar(&cfg.HeartbeatInterval, "heartbeat-interval", time.Duration(heartbeatSeconds)*time.Second, "Heartbeat interval.")
enrollmentPollIntervalSeconds := getEnvInt(env, "RAP_ENROLLMENT_POLL_INTERVAL_SECONDS", 5)
@@ -100,6 +108,9 @@ func Load(args []string, env map[string]string) (Config, error) {
cfg.MeshSyntheticConfigPath = strings.TrimSpace(cfg.MeshSyntheticConfigPath)
cfg.MeshPeerEndpointsJSON = strings.TrimSpace(cfg.MeshPeerEndpointsJSON)
cfg.MeshSyntheticRoutesJSON = strings.TrimSpace(cfg.MeshSyntheticRoutesJSON)
cfg.RemoteWorkspaceRealAdapterCommand = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterCommand)
cfg.RemoteWorkspaceRealAdapterArgsJSON = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterArgsJSON)
cfg.RemoteWorkspaceRealAdapterWorkDir = strings.TrimSpace(cfg.RemoteWorkspaceRealAdapterWorkDir)
if cfg.BackendURL == "" {
return Config{}, errors.New("backend URL is required")
}
@@ -34,6 +34,10 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
"RAP_MESH_SYNTHETIC_CONFIG": "/tmp/rap-node/mesh-synthetic.json",
"RAP_MESH_PEER_ENDPOINTS_JSON": `{"node-b":"http://127.0.0.1:19002"}`,
"RAP_MESH_SYNTHETIC_ROUTES_JSON": `[{"route_id":"route-1"}]`,
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED": "true",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND": " /opt/rap/bin/rdp-worker ",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON": ` ["--future-probe"] `,
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR": " /var/lib/rap-node-agent/rdp-worker ",
})
if err != nil {
t.Fatalf("load config: %v", err)
@@ -85,6 +89,12 @@ func TestLoadConfigFromEnvAndArgs(t *testing.T) {
if cfg.MeshPeerEndpointsJSON == "" || cfg.MeshSyntheticRoutesJSON == "" {
t.Fatalf("mesh live synthetic config was not loaded: %+v", cfg)
}
if !cfg.RemoteWorkspaceRealAdapterEnabled ||
cfg.RemoteWorkspaceRealAdapterCommand != "/opt/rap/bin/rdp-worker" ||
cfg.RemoteWorkspaceRealAdapterArgsJSON != `["--future-probe"]` ||
cfg.RemoteWorkspaceRealAdapterWorkDir != "/var/lib/rap-node-agent/rdp-worker" {
t.Fatalf("unexpected remote workspace real adapter config: %+v", cfg)
}
}
func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
@@ -98,6 +108,12 @@ func TestLoadConfigDefaultsEnrollmentPollingToNoTimeout(t *testing.T) {
if cfg.EnrollmentPollTimeout != 0 {
t.Fatalf("EnrollmentPollTimeout = %s, want no timeout", cfg.EnrollmentPollTimeout)
}
if cfg.RemoteWorkspaceRealAdapterEnabled ||
cfg.RemoteWorkspaceRealAdapterCommand != "" ||
cfg.RemoteWorkspaceRealAdapterArgsJSON != "" ||
cfg.RemoteWorkspaceRealAdapterWorkDir != "" {
t.Fatalf("real adapter config should default disabled and empty: %+v", cfg)
}
}
func TestLoadConfigRejectsNegativeProductionObservationSinkCapacity(t *testing.T) {
@@ -0,0 +1,27 @@
//go:build !windows
package hostagent
import "syscall"
func diskUsage(path string) (DiskUsage, error) {
var stat syscall.Statfs_t
if err := syscall.Statfs(path, &stat); err != nil {
return DiskUsage{}, err
}
total := stat.Blocks * uint64(stat.Bsize)
free := stat.Bavail * uint64(stat.Bsize)
used := total - free
percent := 0
if total > 0 {
percent = int((used*100 + total - 1) / total)
}
return DiskUsage{
Path: path,
TotalBytes: total,
FreeBytes: free,
UsedBytes: used,
UsedPercent: percent,
AvailablePercent: 100 - percent,
}, nil
}
@@ -0,0 +1,9 @@
//go:build windows
package hostagent
import "fmt"
func diskUsage(path string) (DiskUsage, error) {
return DiskUsage{Path: path}, fmt.Errorf("disk usage monitor is not implemented on windows")
}
@@ -0,0 +1,494 @@
package hostagent
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/example/remote-access-platform/agents/rap-node-agent/internal/state"
)
const (
DefaultMonitorProduct = "rap-host-agent"
DefaultMonitorPhase = "host_monitor"
DefaultMonitorIntervalSeconds = 60
DefaultMonitorDiskWarnPercent = 80
DefaultMonitorDiskCleanupPercent = 85
DefaultMonitorDiskCriticalPercent = 95
DefaultMonitorRestartCooldownSec = 300
DefaultMonitorTmpMinAgeMinutes = 240
DefaultMonitorStaleRestartingSec = 180
DefaultMonitorDockerBinary = "docker"
DefaultMonitorDiskPath = "/"
DefaultMonitorTmpDir = "/tmp"
DefaultMonitorStatusSchemaVersion = "rap.host_monitor_status.v1"
DefaultMonitorRemediationSucceeded = "remediated"
)
type MonitorConfig struct {
BackendURL string
ClusterID string
NodeID string
StateDir string
Product string
CurrentVersion string
Interval time.Duration
InitialDelay time.Duration
MaxRuns int
DockerBinary string
WatchContainers []string
RestartContainers bool
RestartCooldown time.Duration
StaleRestartingAfter time.Duration
DiskPath string
TmpDir string
DiskWarnPercent int
DiskCleanupPercent int
DiskCriticalPercent int
TmpMinAge time.Duration
CleanupDocker bool
StatusFile string
Runner CommandRunner
Logf func(format string, args ...any)
restartHistory map[string]time.Time
}
type DiskUsage struct {
Path string `json:"path"`
TotalBytes uint64 `json:"total_bytes"`
FreeBytes uint64 `json:"free_bytes"`
UsedBytes uint64 `json:"used_bytes"`
UsedPercent int `json:"used_percent"`
AvailablePercent int `json:"available_percent"`
}
type MonitorContainerStatus struct {
Name string `json:"name"`
Status string `json:"status,omitempty"`
Running bool `json:"running"`
Restarting bool `json:"restarting"`
ExitCode int `json:"exit_code,omitempty"`
Health string `json:"health,omitempty"`
RestartCount int `json:"restart_count,omitempty"`
StartedAt string `json:"started_at,omitempty"`
FinishedAt string `json:"finished_at,omitempty"`
LastAction string `json:"last_action,omitempty"`
LastActionOK bool `json:"last_action_ok,omitempty"`
LastActionError string `json:"last_action_error,omitempty"`
}
type MonitorAction struct {
Kind string `json:"kind"`
Target string `json:"target,omitempty"`
Reason string `json:"reason,omitempty"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
type MonitorResult struct {
SchemaVersion string `json:"schema_version"`
Status string `json:"status"`
ObservedAt time.Time `json:"observed_at"`
Disk *DiskUsage `json:"disk,omitempty"`
Containers []MonitorContainerStatus `json:"containers,omitempty"`
Actions []MonitorAction `json:"actions,omitempty"`
Errors []string `json:"errors,omitempty"`
}
type monitorDockerInspect struct {
Name string `json:"Name"`
RestartCount int `json:"RestartCount"`
State struct {
Status string `json:"Status"`
Running bool `json:"Running"`
Restarting bool `json:"Restarting"`
ExitCode int `json:"ExitCode"`
Error string `json:"Error"`
StartedAt string `json:"StartedAt"`
FinishedAt string `json:"FinishedAt"`
Health *struct {
Status string `json:"Status"`
} `json:"Health"`
} `json:"State"`
}
func RunMonitorLoop(ctx context.Context, cfg MonitorConfig) error {
cfg = normalizeMonitorConfig(cfg)
if cfg.InitialDelay > 0 {
if err := sleepContext(ctx, cfg.InitialDelay); err != nil {
return err
}
}
runs := 0
restartHistory := map[string]time.Time{}
for {
cfg.restartHistory = restartHistory
result := RunMonitorOnce(ctx, cfg)
logMonitorResult(cfg, result)
if err := writeMonitorStatusFile(cfg.StatusFile, result); err != nil && cfg.Logf != nil {
cfg.Logf("monitor status-file failed: %v", err)
}
if err := reportMonitorStatus(ctx, cfg, result); err != nil && cfg.Logf != nil {
cfg.Logf("monitor report failed: %v", err)
}
runs++
if cfg.MaxRuns > 0 && runs >= cfg.MaxRuns {
return nil
}
if err := sleepContext(ctx, cfg.Interval); err != nil {
return err
}
}
}
func RunMonitorOnce(ctx context.Context, cfg MonitorConfig) MonitorResult {
cfg = normalizeMonitorConfig(cfg)
result := MonitorResult{
SchemaVersion: DefaultMonitorStatusSchemaVersion,
Status: "ok",
ObservedAt: time.Now().UTC(),
}
if usage, err := diskUsage(cfg.DiskPath); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("disk usage %s: %v", cfg.DiskPath, err))
} else {
result.Disk = &usage
if usage.UsedPercent >= cfg.DiskWarnPercent {
result.Status = "warning"
}
if usage.UsedPercent >= cfg.DiskCleanupPercent {
runCleanup(ctx, cfg, &result, fmt.Sprintf("disk_used_%d_percent", usage.UsedPercent))
if refreshed, err := diskUsage(cfg.DiskPath); err == nil {
result.Disk = &refreshed
}
}
if result.Disk != nil && result.Disk.UsedPercent >= cfg.DiskCriticalPercent {
result.Status = "failed"
result.Errors = append(result.Errors, fmt.Sprintf("disk %s critical: %d%% used", cfg.DiskPath, result.Disk.UsedPercent))
}
}
for _, name := range uniqueTrimmed(cfg.WatchContainers) {
status := inspectMonitorContainer(ctx, cfg, name)
if cfg.RestartContainers {
remediateMonitorContainer(ctx, cfg, &status, &result)
}
if !status.Running || status.Health == "unhealthy" || status.Restarting || status.LastActionError != "" {
if result.Status == "ok" {
result.Status = "warning"
}
}
result.Containers = append(result.Containers, status)
}
for _, action := range result.Actions {
if !action.Success {
result.Status = "failed"
if action.Error != "" {
result.Errors = append(result.Errors, action.Error)
}
}
}
return result
}
func normalizeMonitorConfig(cfg MonitorConfig) MonitorConfig {
cfg.BackendURL = strings.TrimRight(strings.TrimSpace(cfg.BackendURL), "/")
cfg.ClusterID = strings.TrimSpace(cfg.ClusterID)
cfg.NodeID = strings.TrimSpace(cfg.NodeID)
cfg.StateDir = strings.TrimSpace(cfg.StateDir)
cfg.Product = firstNonEmpty(cfg.Product, DefaultMonitorProduct)
if cfg.Interval <= 0 {
cfg.Interval = time.Duration(DefaultMonitorIntervalSeconds) * time.Second
}
if cfg.DockerBinary == "" {
cfg.DockerBinary = DefaultMonitorDockerBinary
}
if cfg.DiskPath == "" {
cfg.DiskPath = DefaultMonitorDiskPath
}
if cfg.TmpDir == "" {
cfg.TmpDir = DefaultMonitorTmpDir
}
if cfg.DiskWarnPercent == 0 {
cfg.DiskWarnPercent = DefaultMonitorDiskWarnPercent
}
if cfg.DiskCleanupPercent == 0 {
cfg.DiskCleanupPercent = DefaultMonitorDiskCleanupPercent
}
if cfg.DiskCriticalPercent == 0 {
cfg.DiskCriticalPercent = DefaultMonitorDiskCriticalPercent
}
if cfg.RestartCooldown == 0 {
cfg.RestartCooldown = time.Duration(DefaultMonitorRestartCooldownSec) * time.Second
}
if cfg.StaleRestartingAfter == 0 {
cfg.StaleRestartingAfter = time.Duration(DefaultMonitorStaleRestartingSec) * time.Second
}
if cfg.TmpMinAge == 0 {
cfg.TmpMinAge = time.Duration(DefaultMonitorTmpMinAgeMinutes) * time.Minute
}
if cfg.Runner == nil {
cfg.Runner = ExecRunner{}
}
return cfg
}
func inspectMonitorContainer(ctx context.Context, cfg MonitorConfig, name string) MonitorContainerStatus {
out := MonitorContainerStatus{Name: name}
raw, err := cfg.Runner.Run(ctx, cfg.DockerBinary, "inspect", name)
if err != nil {
out.LastActionError = strings.TrimSpace(err.Error())
return out
}
var inspected []monitorDockerInspect
if err := json.Unmarshal([]byte(raw), &inspected); err != nil {
out.LastActionError = fmt.Sprintf("parse docker inspect: %v", err)
return out
}
if len(inspected) == 0 {
out.LastActionError = "docker inspect returned no containers"
return out
}
item := inspected[0]
out.Name = strings.TrimPrefix(firstNonEmpty(item.Name, name), "/")
out.Status = item.State.Status
out.Running = item.State.Running
out.Restarting = item.State.Restarting
out.ExitCode = item.State.ExitCode
out.RestartCount = item.RestartCount
out.StartedAt = item.State.StartedAt
out.FinishedAt = item.State.FinishedAt
if item.State.Health != nil {
out.Health = strings.TrimSpace(item.State.Health.Status)
}
if item.State.Error != "" {
out.LastActionError = item.State.Error
}
return out
}
func remediateMonitorContainer(ctx context.Context, cfg MonitorConfig, status *MonitorContainerStatus, result *MonitorResult) {
if status.Name == "" {
return
}
action := ""
reason := ""
switch {
case status.LastActionError != "" && status.Status == "":
action = "start"
reason = "inspect_failed_or_missing"
case status.Health == "unhealthy":
action = "restart"
reason = "health_unhealthy"
case status.Restarting && restartingIsStale(status.StartedAt, status.FinishedAt, cfg.StaleRestartingAfter):
action = "restart"
reason = "restarting_stale"
case !status.Running && status.Status != "":
action = "start"
reason = "not_running"
default:
return
}
if cfg.restartHistory != nil {
if last, ok := cfg.restartHistory[status.Name]; ok && time.Since(last) < cfg.RestartCooldown {
result.Actions = append(result.Actions, MonitorAction{
Kind: "docker_" + action + "_skipped",
Target: status.Name,
Reason: "restart_cooldown",
Success: true,
})
return
}
}
args := []string{action, status.Name}
_, err := cfg.Runner.Run(ctx, cfg.DockerBinary, args...)
monitorAction := MonitorAction{Kind: "docker_" + action, Target: status.Name, Reason: reason, Success: err == nil}
status.LastAction = action
status.LastActionOK = err == nil
if err != nil {
monitorAction.Error = strings.TrimSpace(err.Error())
status.LastActionError = monitorAction.Error
} else {
if cfg.restartHistory != nil {
cfg.restartHistory[status.Name] = time.Now()
}
status.LastActionError = ""
status.Running = true
status.Restarting = false
status.Status = DefaultMonitorRemediationSucceeded
}
result.Actions = append(result.Actions, monitorAction)
}
func restartingIsStale(startedAt, finishedAt string, threshold time.Duration) bool {
for _, value := range []string{finishedAt, startedAt} {
parsed, err := time.Parse(time.RFC3339Nano, strings.TrimSpace(value))
if err == nil && !parsed.IsZero() {
return time.Since(parsed) >= threshold
}
}
return true
}
func runCleanup(ctx context.Context, cfg MonitorConfig, result *MonitorResult, reason string) {
if cfg.CleanupDocker {
for _, args := range [][]string{
{"builder", "prune", "-af"},
{"image", "prune", "-f"},
{"container", "prune", "-f"},
} {
_, err := cfg.Runner.Run(ctx, cfg.DockerBinary, args...)
action := MonitorAction{Kind: "docker_" + strings.Join(args[:len(args)-1], "_"), Reason: reason, Success: err == nil}
if err != nil {
action.Error = strings.TrimSpace(err.Error())
}
result.Actions = append(result.Actions, action)
}
}
removed, err := cleanupTmpBuildDirs(cfg.TmpDir, cfg.TmpMinAge)
action := MonitorAction{Kind: "tmp_cleanup", Target: cfg.TmpDir, Reason: reason, Success: err == nil}
if err != nil {
action.Error = err.Error()
} else {
action.Target = fmt.Sprintf("%s removed=%d", cfg.TmpDir, removed)
}
result.Actions = append(result.Actions, action)
}
func cleanupTmpBuildDirs(tmpDir string, minAge time.Duration) (int, error) {
tmpDir = filepath.Clean(strings.TrimSpace(tmpDir))
if tmpDir == "" || tmpDir == "." || tmpDir == string(filepath.Separator) {
return 0, fmt.Errorf("unsafe tmp dir: %q", tmpDir)
}
entries, err := os.ReadDir(tmpDir)
if err != nil {
return 0, err
}
now := time.Now()
removed := 0
for _, entry := range entries {
name := entry.Name()
if !strings.HasPrefix(name, "rap-") && !strings.HasPrefix(name, "go-build") {
continue
}
info, err := entry.Info()
if err != nil || now.Sub(info.ModTime()) < minAge {
continue
}
if err := os.RemoveAll(filepath.Join(tmpDir, name)); err != nil {
return removed, err
}
removed++
}
return removed, nil
}
func reportMonitorStatus(ctx context.Context, cfg MonitorConfig, result MonitorResult) error {
cfg = normalizeMonitorConfig(cfg)
nodeID, clusterID, err := resolveMonitorIdentity(cfg)
if err != nil {
if errors.Is(err, ErrNodeIdentityNotReady) {
return nil
}
return err
}
if cfg.BackendURL == "" || clusterID == "" || nodeID == "" {
return nil
}
payload := map[string]any{
"schema_version": result.SchemaVersion,
"monitor_status": result.Status,
"disk": result.Disk,
"containers": result.Containers,
"actions": result.Actions,
"errors": result.Errors,
}
errText := ""
if len(result.Errors) > 0 {
errText = strings.Join(result.Errors, "; ")
}
req := NodeUpdateStatusRequest{
Product: cfg.Product,
CurrentVersion: cfg.CurrentVersion,
Phase: DefaultMonitorPhase,
Status: result.Status,
Payload: payload,
ObservedAt: result.ObservedAt,
}
if errText != "" {
req.ErrorMessage = &errText
}
return ReportNodeUpdateStatus(ctx, cfg.BackendURL, clusterID, nodeID, req)
}
func resolveMonitorIdentity(cfg MonitorConfig) (string, string, error) {
nodeID := strings.TrimSpace(cfg.NodeID)
clusterID := strings.TrimSpace(cfg.ClusterID)
if nodeID != "" {
return nodeID, clusterID, nil
}
if strings.TrimSpace(cfg.StateDir) == "" {
return "", clusterID, ErrNodeIdentityNotReady
}
identity, err := state.Load(filepath.Join(cfg.StateDir, state.FileName))
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return "", clusterID, ErrNodeIdentityNotReady
}
return "", clusterID, err
}
nodeID = strings.TrimSpace(identity.NodeID)
if nodeID == "" {
return "", clusterID, ErrNodeIdentityNotReady
}
if clusterID == "" {
clusterID = strings.TrimSpace(identity.ClusterID)
}
return nodeID, clusterID, nil
}
func writeMonitorStatusFile(path string, result MonitorResult) error {
path = strings.TrimSpace(path)
if path == "" {
return nil
}
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
payload, err := json.MarshalIndent(result, "", " ")
if err != nil {
return err
}
tmp := path + ".tmp"
if err := os.WriteFile(tmp, payload, 0o644); err != nil {
return err
}
return os.Rename(tmp, path)
}
func logMonitorResult(cfg MonitorConfig, result MonitorResult) {
if cfg.Logf == nil {
return
}
cfg.Logf("monitor status=%s containers=%d actions=%d errors=%d", result.Status, len(result.Containers), len(result.Actions), len(result.Errors))
}
func uniqueTrimmed(values []string) []string {
seen := map[string]struct{}{}
out := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
continue
}
if _, ok := seen[value]; ok {
continue
}
seen[value] = struct{}{}
out = append(out, value)
}
return out
}
@@ -0,0 +1,87 @@
package hostagent
import (
"context"
"fmt"
"strings"
"testing"
"time"
)
type monitorRunner struct {
inspect map[string]string
calls []string
}
func (r *monitorRunner) Run(_ context.Context, name string, args ...string) (string, error) {
call := strings.TrimSpace(name + " " + strings.Join(args, " "))
r.calls = append(r.calls, call)
if len(args) >= 2 && args[0] == "inspect" {
out, ok := r.inspect[args[1]]
if !ok {
return "", fmt.Errorf("not found")
}
return out, nil
}
return "", nil
}
func TestRunMonitorOnceStartsExitedContainer(t *testing.T) {
runner := &monitorRunner{inspect: map[string]string{
"rap-node-agent": `[{"Name":"/rap-node-agent","State":{"Status":"exited","Running":false,"ExitCode":137,"StartedAt":"2026-05-13T00:00:00Z","FinishedAt":"2026-05-13T00:01:00Z"}}]`,
}}
result := RunMonitorOnce(context.Background(), MonitorConfig{
WatchContainers: []string{"rap-node-agent"},
RestartContainers: true,
Runner: runner,
DiskPath: t.TempDir(),
DiskCleanupPercent: 101,
DiskWarnPercent: 101,
DiskCriticalPercent: 101,
})
if len(result.Actions) != 1 || result.Actions[0].Kind != "docker_start" || !result.Actions[0].Success {
t.Fatalf("unexpected actions: %+v", result.Actions)
}
if !containsCall(runner.calls, "docker start rap-node-agent") {
t.Fatalf("start call missing: %+v", runner.calls)
}
}
func TestRunMonitorOnceRestartsUnhealthyContainer(t *testing.T) {
runner := &monitorRunner{inspect: map[string]string{
"rap-backend": `[{"Name":"/rap-backend","State":{"Status":"running","Running":true,"StartedAt":"2026-05-13T00:00:00Z","Health":{"Status":"unhealthy"}}}]`,
}}
result := RunMonitorOnce(context.Background(), MonitorConfig{
WatchContainers: []string{"rap-backend"},
RestartContainers: true,
Runner: runner,
DiskPath: t.TempDir(),
DiskCleanupPercent: 101,
DiskWarnPercent: 101,
DiskCriticalPercent: 101,
})
if len(result.Actions) != 1 || result.Actions[0].Kind != "docker_restart" || !result.Actions[0].Success {
t.Fatalf("unexpected actions: %+v", result.Actions)
}
if !containsCall(runner.calls, "docker restart rap-backend") {
t.Fatalf("restart call missing: %+v", runner.calls)
}
}
func TestRestartingIsStale(t *testing.T) {
if !restartingIsStale(time.Now().Add(-10*time.Minute).UTC().Format(time.RFC3339Nano), "", time.Minute) {
t.Fatalf("old restarting container should be stale")
}
if restartingIsStale(time.Now().UTC().Format(time.RFC3339Nano), "", time.Hour) {
t.Fatalf("fresh restarting container should not be stale")
}
}
func containsCall(calls []string, want string) bool {
for _, call := range calls {
if call == want {
return true
}
}
return false
}
@@ -16,33 +16,44 @@ const (
)
type UpdateServiceConfig struct {
RuntimeConfig RuntimeConfig
Product string
CurrentVersion string
Channel string
IntervalSeconds int
InitialDelaySeconds int
Jitter float64
HealthTimeoutSec int
BinaryInstallPath string
SourceBinaryPath string
UnitDir string
ManageSystemd bool
DryRun bool
InstallSelfUpdater bool
SelfUpdateVersion string
RuntimeConfig RuntimeConfig
Product string
CurrentVersion string
Channel string
IntervalSeconds int
InitialDelaySeconds int
Jitter float64
HealthTimeoutSec int
BinaryInstallPath string
SourceBinaryPath string
UnitDir string
ManageSystemd bool
DryRun bool
InstallSelfUpdater bool
SelfUpdateVersion string
InstallMonitor bool
MonitorIntervalSec int
MonitorContainers []string
MonitorStatusFile string
MonitorDiskWarn int
MonitorDiskCleanup int
MonitorDiskCritical int
MonitorCleanupDocker bool
}
type UpdateServiceResult struct {
Installed bool
Started bool
UnitName string
UnitPath string
BinaryPath string
Unit string
SelfUnitName string
SelfUnitPath string
SelfUnit string
Installed bool
Started bool
UnitName string
UnitPath string
BinaryPath string
Unit string
SelfUnitName string
SelfUnitPath string
SelfUnit string
MonitorUnitName string
MonitorUnitPath string
MonitorUnit string
}
func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServiceConfig) (UpdateServiceResult, error) {
@@ -59,6 +70,9 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
if cfg.HealthTimeoutSec == 0 {
cfg.HealthTimeoutSec = 30
}
if cfg.MonitorIntervalSec == 0 {
cfg.MonitorIntervalSec = DefaultMonitorIntervalSeconds
}
cfg.BinaryInstallPath = firstNonEmpty(cfg.BinaryInstallPath, DefaultHostAgentInstallPath)
cfg.UnitDir = firstNonEmpty(cfg.UnitDir, DefaultSystemdUnitDir)
unitName := "rap-host-agent-updater-" + safeUnitSlug(cfg.RuntimeConfig.ContainerName) + ".service"
@@ -82,6 +96,15 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
result.SelfUnitName = selfUnitName
result.SelfUnitPath = selfUnitPath
}
if cfg.InstallMonitor {
monitorUnit, monitorUnitName, monitorUnitPath, err := buildHostAgentMonitorUnit(cfg)
if err != nil {
return result, err
}
result.MonitorUnit = monitorUnit
result.MonitorUnitName = monitorUnitName
result.MonitorUnitPath = monitorUnitPath
}
return result, nil
}
if runtime.GOOS != "linux" && cfg.UnitDir == DefaultSystemdUnitDir {
@@ -108,6 +131,18 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
result.SelfUnitName = selfUnitName
result.SelfUnitPath = selfUnitPath
}
if cfg.InstallMonitor {
monitorUnit, monitorUnitName, monitorUnitPath, err := buildHostAgentMonitorUnit(cfg)
if err != nil {
return result, err
}
if err := os.WriteFile(monitorUnitPath, []byte(monitorUnit), 0o644); err != nil {
return result, err
}
result.MonitorUnit = monitorUnit
result.MonitorUnitName = monitorUnitName
result.MonitorUnitPath = monitorUnitPath
}
result.Installed = true
if cfg.ManageSystemd {
runner := m.Runner
@@ -125,6 +160,11 @@ func (m DockerManager) InstallUpdateService(ctx context.Context, cfg UpdateServi
return result, err
}
}
if cfg.InstallMonitor && result.MonitorUnitName != "" {
if _, err := runner.Run(ctx, "systemctl", "enable", "--now", result.MonitorUnitName); err != nil {
return result, err
}
}
result.Started = true
}
return result, nil
@@ -223,6 +263,64 @@ WantedBy=multi-user.target
`, systemdJoin(args)), unitName, unitPath, nil
}
func buildHostAgentMonitorUnit(cfg UpdateServiceConfig) (string, string, string, error) {
runtimeCfg := cfg.RuntimeConfig.Normalize()
if runtimeCfg.BackendURL == "" || runtimeCfg.ClusterID == "" || runtimeCfg.StateDir == "" {
return "", "", "", fmt.Errorf("backend-url, cluster-id, and state-dir are required for host monitor")
}
containers := uniqueTrimmed(append([]string{runtimeCfg.ContainerName}, cfg.MonitorContainers...))
if len(containers) == 0 {
return "", "", "", fmt.Errorf("at least one monitor container is required")
}
unitName := "rap-host-agent-monitor-" + safeUnitSlug(runtimeCfg.ContainerName) + ".service"
unitPath := filepath.Join(firstNonEmpty(cfg.UnitDir, DefaultSystemdUnitDir), unitName)
args := []string{
cfg.BinaryInstallPath,
"monitor-loop",
"--backend-url", runtimeCfg.BackendURL,
"--cluster-id", runtimeCfg.ClusterID,
"--state-dir", runtimeCfg.StateDir,
"--current-version", firstNonEmpty(cfg.SelfUpdateVersion, cfg.CurrentVersion),
"--interval-seconds", fmt.Sprintf("%d", firstNonZero(cfg.MonitorIntervalSec, DefaultMonitorIntervalSeconds)),
"--disk-warn-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskWarn, DefaultMonitorDiskWarnPercent)),
"--disk-cleanup-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCleanup, DefaultMonitorDiskCleanupPercent)),
"--disk-critical-percent", fmt.Sprintf("%d", firstNonZero(cfg.MonitorDiskCritical, DefaultMonitorDiskCriticalPercent)),
}
if cfg.MonitorCleanupDocker {
args = append(args, "--cleanup-docker")
}
if strings.TrimSpace(cfg.MonitorStatusFile) != "" {
args = append(args, "--status-file", strings.TrimSpace(cfg.MonitorStatusFile))
}
for _, container := range containers {
args = append(args, "--watch-container", container)
}
return fmt.Sprintf(`[Unit]
Description=RAP host-agent monitor for %s
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
[Service]
Type=simple
ExecStart=%s
Restart=always
RestartSec=30
[Install]
WantedBy=multi-user.target
`, runtimeCfg.ContainerName, systemdJoin(args)), unitName, unitPath, nil
}
func firstNonZero(values ...int) int {
for _, value := range values {
if value != 0 {
return value
}
}
return 0
}
func installHostAgentBinary(sourcePath, targetPath string) error {
sourcePath = strings.TrimSpace(sourcePath)
targetPath = strings.TrimSpace(targetPath)
@@ -24,15 +24,18 @@ func TestInstallUpdateServiceWritesSystemdUnit(t *testing.T) {
ContainerName: "rap-node-agent-node-a",
StateDir: "/var/lib/rap/nodes/node-a",
},
CurrentVersion: "0.1.0-current",
IntervalSeconds: 60,
Jitter: 0.2,
SourceBinaryPath: source,
BinaryInstallPath: binaryPath,
UnitDir: unitDir,
ManageSystemd: false,
InstallSelfUpdater: true,
SelfUpdateVersion: "0.1.0-host",
CurrentVersion: "0.1.0-current",
IntervalSeconds: 60,
Jitter: 0.2,
SourceBinaryPath: source,
BinaryInstallPath: binaryPath,
UnitDir: unitDir,
ManageSystemd: false,
InstallSelfUpdater: true,
SelfUpdateVersion: "0.1.0-host",
InstallMonitor: true,
MonitorContainers: []string{"rap-test-backend"},
MonitorCleanupDocker: true,
})
if err != nil {
t.Fatalf("install update service: %v", err)
@@ -73,6 +76,25 @@ func TestInstallUpdateServiceWritesSystemdUnit(t *testing.T) {
if text := string(selfUnit); !strings.Contains(text, "update-host-agent-loop") || !strings.Contains(text, "--current-version 0.1.0-host") {
t.Fatalf("unexpected self unit:\n%s", text)
}
if result.MonitorUnitName == "" || result.MonitorUnitPath == "" {
t.Fatalf("monitor result = %+v", result)
}
monitorUnit, err := os.ReadFile(result.MonitorUnitPath)
if err != nil {
t.Fatalf("read monitor unit: %v", err)
}
monitorText := string(monitorUnit)
for _, want := range []string{
"monitor-loop",
"--watch-container rap-node-agent-node-a",
"--watch-container rap-test-backend",
"--cleanup-docker",
"Restart=always",
} {
if !strings.Contains(monitorText, want) {
t.Fatalf("monitor unit missing %q:\n%s", want, monitorText)
}
}
}
func TestWindowsHostAgentUpdateScriptTargetsWindowsService(t *testing.T) {
@@ -313,6 +313,9 @@ func (m DockerManager) ApplyUpdate(ctx context.Context, req UpdateRequest) (Upda
cfg.ClusterID = firstNonEmpty(cfg.ClusterID, req.ClusterID)
cfg.ContainerName = req.ContainerName
cfg.Image = artifactImage(*plan.Artifact, cfg.Image)
if artifactDockerVPNGatewayEnabled(*plan.Artifact) {
cfg.DockerVPNGatewayEnabled = true
}
cfg.ImageArtifactURLs = artifactURLsForBackend(*plan.Artifact, req.BackendURL)
cfg.ImageArtifactSHA256 = plan.Artifact.SHA256
cfg.ImageArtifactSizeBytes = plan.Artifact.SizeBytes
@@ -681,6 +684,20 @@ func artifactImage(artifact ReleaseArtifact, fallback string) string {
return firstNonEmpty(fallback, DefaultImage)
}
func artifactDockerVPNGatewayEnabled(artifact ReleaseArtifact) bool {
if len(artifact.Metadata) == 0 {
return false
}
var metadata struct {
DockerVPNGatewayEnabled bool `json:"docker_vpn_gateway_enabled"`
VPNGatewayEnabled bool `json:"vpn_gateway_enabled"`
}
if err := json.Unmarshal(artifact.Metadata, &metadata); err != nil {
return false
}
return metadata.DockerVPNGatewayEnabled || metadata.VPNGatewayEnabled
}
func artifactURLs(artifact ReleaseArtifact) []string {
out := make([]string, 0, 1+len(artifact.URLs))
for _, raw := range append([]string{artifact.URL}, artifact.URLs...) {
@@ -596,6 +596,18 @@ func TestArtifactImageDerivesDockerTagFromProductAndVersion(t *testing.T) {
}
}
func TestArtifactDockerVPNGatewayEnabledFromMetadata(t *testing.T) {
if !artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"docker_vpn_gateway_enabled":true}`)}) {
t.Fatal("expected docker vpn gateway metadata to enable gateway runtime")
}
if !artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"vpn_gateway_enabled":true}`)}) {
t.Fatal("expected legacy vpn gateway metadata to enable gateway runtime")
}
if artifactDockerVPNGatewayEnabled(ReleaseArtifact{Metadata: json.RawMessage(`{"docker_vpn_gateway_enabled":false}`)}) {
t.Fatal("expected disabled metadata to remain disabled")
}
}
func serverArtifactURL(r *http.Request) string {
scheme := "http"
if r.TLS != nil {
@@ -16,119 +16,176 @@ const DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity = 32
const RemoteWorkspaceFrameProbeSinkRuntimeID = "node_agent_rdp_worker_contract_probe"
type RemoteWorkspaceFrameProbeSink struct {
mu sync.Mutex
sequence int64
queueCapacity int
sessionTTL time.Duration
sessions map[string]*remoteWorkspaceAdapterProbeSession
terminalSessions map[string]remoteWorkspaceAdapterProbeTerminalSession
sessionCreatedTotal int64
sessionBoundTotal int64
sessionBackpressureTotal int64
sessionExpiredTotal int64
sessionClosedTotal int64
sessionResetTotal int64
sessionControlTotal int64
mailboxEventSequence int64
mailboxEnqueuedTotal int64
mailboxDrainedTotal int64
mailboxDroppedTotal int64
mailboxReadTotal int64
mailboxWaitTotal int64
mailboxWaitTimeoutTotal int64
mailboxEmptyReadTotal int64
mailboxResumeReadTotal int64
mailboxAfterSequenceReadTotal int64
mailboxReturnedTotal int64
mailboxSkippedTotal int64
mailboxConsumerReadTotal int64
mailboxConsumerAckTotal int64
mailboxConsumerResetTotal int64
mailboxConsumerEvictedTotal int64
lastMailboxReadAt string
lastMailboxAdapterSessionID string
lastMailboxWaitMs int
lastMailboxWaited bool
lastMailboxWaitTimeout bool
lastMailboxEmpty bool
lastMailboxResumeFrom string
lastMailboxResumeSequence int64
lastMailboxResumeConsumerID string
lastMailboxAfterSequence int64
lastMailboxSkippedCount int
lastMailboxReturnedCount int
lastMailboxConsumerID string
lastMailboxConsumerAdapterSessionID string
lastMailboxConsumerReadAt string
lastMailboxConsumerAckAt string
lastMailboxConsumerCheckpoint int64
lastMailboxConsumerAck int64
acceptedFramesTotal int64
droppedFramesTotal int64
ackedFramesTotal int64
backpressureCount int64
lastBackpressureAt string
lastBackpressureReason string
lastRejectedFrameCount int
lastRejectedAdapterSessionID string
lastRejectedChannelClass string
lastRejectedAdapterContractID string
lastRejectedQueueCapacity int
lastRejectedQueueDepth int
lastControl RemoteWorkspaceAdapterSessionControlResult
last RemoteWorkspaceFrameBatchDeliveryReceipt
mu sync.Mutex
sequence int64
queueCapacity int
sessionTTL time.Duration
sessions map[string]*remoteWorkspaceAdapterProbeSession
terminalSessions map[string]remoteWorkspaceAdapterProbeTerminalSession
sessionCreatedTotal int64
sessionBoundTotal int64
sessionBackpressureTotal int64
sessionExpiredTotal int64
sessionClosedTotal int64
sessionResetTotal int64
sessionControlTotal int64
mailboxEventSequence int64
mailboxEnqueuedTotal int64
mailboxDrainedTotal int64
mailboxDroppedTotal int64
mailboxReadTotal int64
mailboxWaitTotal int64
mailboxWaitTimeoutTotal int64
mailboxEmptyReadTotal int64
mailboxResumeReadTotal int64
mailboxAfterSequenceReadTotal int64
mailboxReturnedTotal int64
mailboxSkippedTotal int64
mailboxPreflightTotal int64
mailboxPreflightAckTotal int64
mailboxPreflightCheckpointTotal int64
mailboxConsumerReadTotal int64
mailboxConsumerAckTotal int64
mailboxConsumerResetTotal int64
mailboxConsumerEvictedTotal int64
lastMailboxReadAt string
lastMailboxAdapterSessionID string
lastMailboxWaitMs int
lastMailboxWaited bool
lastMailboxWaitTimeout bool
lastMailboxEmpty bool
lastMailboxResumeFrom string
lastMailboxResumeSequence int64
lastMailboxResumeConsumerID string
lastMailboxAfterSequence int64
lastMailboxSkippedCount int
lastMailboxReturnedCount int
lastMailboxPreflightAt string
lastMailboxPreflightAdapterSessionID string
lastMailboxPreflightConsumerID string
lastMailboxPreflightResumeFrom string
lastMailboxPreflightResumeSequence int64
lastMailboxPreflightAfterSequence int64
lastMailboxPreflightAvailableCount int
lastMailboxPreflightReturnedCount int
lastMailboxPreflightSkippedCount int
lastMailboxPreflightFirstSequence int64
lastMailboxPreflightLastSequence int64
lastMailboxPreflightFirstRetained int64
lastMailboxPreflightLastRetained int64
lastMailboxPreflightMailboxDropped int64
lastMailboxPreflightDiagnosticState string
lastMailboxPreflightStaleCursor bool
lastMailboxPreflightMissingDropped int
lastMailboxPreflightRecommendedAction string
lastMailboxPreflightActionHints []string
lastMailboxPreflightActionReason string
lastMailboxPreflightActionContext map[string]any
lastMailboxPreflightOperatorSummary string
lastMailboxPreflightOperatorStatus string
lastMailboxPreflightOperatorSeverity string
lastMailboxPreflightOperatorFields map[string]any
lastMailboxConsumerID string
lastMailboxConsumerAdapterSessionID string
lastMailboxConsumerReadAt string
lastMailboxConsumerAckAt string
lastMailboxConsumerCheckpoint int64
lastMailboxConsumerAck int64
acceptedFramesTotal int64
droppedFramesTotal int64
ackedFramesTotal int64
backpressureCount int64
lastBackpressureAt string
lastBackpressureReason string
lastRejectedFrameCount int
lastRejectedAdapterSessionID string
lastRejectedChannelClass string
lastRejectedAdapterContractID string
lastRejectedQueueCapacity int
lastRejectedQueueDepth int
lastControl RemoteWorkspaceAdapterSessionControlResult
last RemoteWorkspaceFrameBatchDeliveryReceipt
}
type remoteWorkspaceAdapterProbeSession struct {
ID string
State string
CreatedAt time.Time
BoundAt time.Time
LastActivityAt time.Time
LastBackpressureAt time.Time
ClosedAt time.Time
DeliveryCount int64
BackpressureCount int64
AcceptedFrames int64
DroppedFrames int64
AckedFrames int64
Mailbox []RemoteWorkspaceAdapterMailboxEvent
MailboxEnqueued int64
MailboxDrained int64
MailboxDropped int64
MailboxRead int64
MailboxWait int64
MailboxWaitTimeout int64
MailboxEmptyRead int64
MailboxResumeRead int64
MailboxAfterSequenceRead int64
MailboxReturnedTotal int64
MailboxSkippedTotal int64
MailboxConsumers map[string]*remoteWorkspaceAdapterMailboxConsumerState
MailboxConsumerReadTotal int64
MailboxConsumerAckTotal int64
MailboxConsumerResetTotal int64
MailboxConsumerEvictedTotal int64
LastMailboxConsumerID string
LastMailboxConsumerReadAt time.Time
LastMailboxConsumerAckAt time.Time
LastMailboxConsumerCheckpoint int64
LastMailboxConsumerAck int64
LastMailboxReadAt time.Time
LastMailboxWaitMs int
LastMailboxWaited bool
LastMailboxTimeout bool
LastMailboxEmpty bool
LastMailboxResumeFrom string
LastMailboxResumeSequence int64
LastMailboxResumeConsumerID string
LastMailboxAfterSequence int64
LastMailboxSkippedCount int
LastMailboxReturnedCount int
LastChannelID string
LastResourceID string
LastRouteID string
LastReason string
ID string
State string
CreatedAt time.Time
BoundAt time.Time
LastActivityAt time.Time
LastBackpressureAt time.Time
ClosedAt time.Time
DeliveryCount int64
BackpressureCount int64
AcceptedFrames int64
DroppedFrames int64
AckedFrames int64
Mailbox []RemoteWorkspaceAdapterMailboxEvent
MailboxEnqueued int64
MailboxDrained int64
MailboxDropped int64
MailboxRead int64
MailboxWait int64
MailboxWaitTimeout int64
MailboxEmptyRead int64
MailboxResumeRead int64
MailboxAfterSequenceRead int64
MailboxReturnedTotal int64
MailboxSkippedTotal int64
MailboxPreflightTotal int64
MailboxPreflightAckTotal int64
MailboxPreflightCheckpointTotal int64
MailboxPreflightOperatorStatusCounts map[string]int64
MailboxPreflightOperatorSeverityCounts map[string]int64
MailboxConsumers map[string]*remoteWorkspaceAdapterMailboxConsumerState
MailboxConsumerReadTotal int64
MailboxConsumerAckTotal int64
MailboxConsumerResetTotal int64
MailboxConsumerEvictedTotal int64
LastMailboxConsumerID string
LastMailboxConsumerReadAt time.Time
LastMailboxConsumerAckAt time.Time
LastMailboxConsumerCheckpoint int64
LastMailboxConsumerAck int64
LastMailboxReadAt time.Time
LastMailboxWaitMs int
LastMailboxWaited bool
LastMailboxTimeout bool
LastMailboxEmpty bool
LastMailboxResumeFrom string
LastMailboxResumeSequence int64
LastMailboxResumeConsumerID string
LastMailboxAfterSequence int64
LastMailboxSkippedCount int
LastMailboxReturnedCount int
LastMailboxPreflightAt time.Time
LastMailboxPreflightConsumerID string
LastMailboxPreflightResumeFrom string
LastMailboxPreflightResumeSequence int64
LastMailboxPreflightAfterSequence int64
LastMailboxPreflightAvailableCount int
LastMailboxPreflightReturnedCount int
LastMailboxPreflightSkippedCount int
LastMailboxPreflightFirstSequence int64
LastMailboxPreflightLastSequence int64
LastMailboxPreflightFirstRetained int64
LastMailboxPreflightLastRetained int64
LastMailboxPreflightMailboxDropped int64
LastMailboxPreflightDiagnosticState string
LastMailboxPreflightStaleCursor bool
LastMailboxPreflightMissingDropped int
LastMailboxPreflightRecommendedAction string
LastMailboxPreflightActionHints []string
LastMailboxPreflightActionReason string
LastMailboxPreflightActionContext map[string]any
LastMailboxPreflightOperatorSummary string
LastMailboxPreflightOperatorStatus string
LastMailboxPreflightOperatorSeverity string
LastMailboxPreflightOperatorFields map[string]any
LastChannelID string
LastResourceID string
LastRouteID string
LastReason string
}
type remoteWorkspaceAdapterMailboxConsumerState struct {
@@ -251,29 +308,43 @@ type RemoteWorkspaceAdapterMailboxConsumer struct {
}
type RemoteWorkspaceAdapterMailboxPreflightSnapshot struct {
SchemaVersion string `json:"schema_version"`
AdapterRuntimeID string `json:"adapter_runtime_id"`
AdapterSessionID string `json:"adapter_session_id"`
ObservedAt string `json:"observed_at"`
ReadOnly bool `json:"read_only"`
ConsumerID string `json:"consumer_id"`
ResumeFrom string `json:"resume_from"`
ResumeSequence int64 `json:"resume_sequence"`
AfterSequence int64 `json:"after_sequence"`
Limit int `json:"limit"`
MailboxDepth int `json:"mailbox_depth"`
MailboxEnqueued int64 `json:"mailbox_enqueued_total"`
MailboxReadTotal int64 `json:"mailbox_read_total"`
ConsumerReadTotal int64 `json:"consumer_read_total"`
ConsumerAckTotal int64 `json:"consumer_ack_total"`
ConsumerCheckpointSequence int64 `json:"consumer_checkpoint_sequence"`
ConsumerAckSequence int64 `json:"consumer_ack_sequence"`
ConsumerLagCount int `json:"consumer_lag_count"`
ExpectedAvailableCount int `json:"expected_available_count"`
ExpectedReturnedCount int `json:"expected_returned_count"`
ExpectedSkippedCount int `json:"expected_skipped_count"`
FirstExpectedSequence int64 `json:"first_expected_sequence,omitempty"`
LastExpectedSequence int64 `json:"last_expected_sequence,omitempty"`
SchemaVersion string `json:"schema_version"`
AdapterRuntimeID string `json:"adapter_runtime_id"`
AdapterSessionID string `json:"adapter_session_id"`
ObservedAt string `json:"observed_at"`
ReadOnly bool `json:"read_only"`
ConsumerID string `json:"consumer_id"`
ResumeFrom string `json:"resume_from"`
ResumeSequence int64 `json:"resume_sequence"`
AfterSequence int64 `json:"after_sequence"`
Limit int `json:"limit"`
MailboxDepth int `json:"mailbox_depth"`
MailboxEnqueued int64 `json:"mailbox_enqueued_total"`
MailboxDropped int64 `json:"mailbox_dropped_total"`
MailboxReadTotal int64 `json:"mailbox_read_total"`
ConsumerReadTotal int64 `json:"consumer_read_total"`
ConsumerAckTotal int64 `json:"consumer_ack_total"`
ConsumerCheckpointSequence int64 `json:"consumer_checkpoint_sequence"`
ConsumerAckSequence int64 `json:"consumer_ack_sequence"`
ConsumerLagCount int `json:"consumer_lag_count"`
ExpectedAvailableCount int `json:"expected_available_count"`
ExpectedReturnedCount int `json:"expected_returned_count"`
ExpectedSkippedCount int `json:"expected_skipped_count"`
FirstExpectedSequence int64 `json:"first_expected_sequence,omitempty"`
LastExpectedSequence int64 `json:"last_expected_sequence,omitempty"`
FirstRetainedSequence int64 `json:"first_retained_sequence,omitempty"`
LastRetainedSequence int64 `json:"last_retained_sequence,omitempty"`
DiagnosticState string `json:"diagnostic_state"`
StaleCursor bool `json:"stale_cursor"`
MissingDroppedCount int `json:"missing_dropped_count"`
RecommendedAction string `json:"recommended_action"`
ActionHints []string `json:"action_hints"`
ActionReason string `json:"action_reason"`
ActionContext map[string]any `json:"action_context"`
OperatorSummary string `json:"operator_summary"`
OperatorStatus string `json:"operator_status"`
OperatorSeverity string `json:"operator_severity"`
OperatorSummaryFields map[string]any `json:"operator_summary_fields"`
}
type RemoteWorkspaceAdapterSessionSnapshot struct {
@@ -646,11 +717,13 @@ func (s *RemoteWorkspaceFrameProbeSink) ensureSessionLocked(delivery RemoteWorks
session := s.sessions[sessionID]
if session == nil {
session = &remoteWorkspaceAdapterProbeSession{
ID: sessionID,
State: "created",
CreatedAt: now,
LastActivityAt: now,
MailboxConsumers: map[string]*remoteWorkspaceAdapterMailboxConsumerState{},
ID: sessionID,
State: "created",
CreatedAt: now,
LastActivityAt: now,
MailboxConsumers: map[string]*remoteWorkspaceAdapterMailboxConsumerState{},
MailboxPreflightOperatorStatusCounts: map[string]int64{},
MailboxPreflightOperatorSeverityCounts: map[string]int64{},
}
s.sessions[sessionID] = session
s.sessionCreatedTotal++
@@ -1180,7 +1253,74 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
firstExpected = session.Mailbox[startIndex].Sequence
lastExpected = session.Mailbox[startIndex+returned-1].Sequence
}
return RemoteWorkspaceAdapterMailboxPreflightSnapshot{
var firstRetained int64
var lastRetained int64
if len(session.Mailbox) > 0 {
firstRetained = session.Mailbox[0].Sequence
lastRetained = session.Mailbox[len(session.Mailbox)-1].Sequence
}
diagnosticState := "ready"
staleCursor := false
missingDropped := 0
recommendedAction := "resume_from_cursor"
actionHints := []string{"resume_from_requested_cursor"}
actionReason := "cursor_window_available"
if firstRetained > 0 && resumeSequence < firstRetained-1 {
diagnosticState = "stale_cursor_gap"
staleCursor = true
missingDropped = int(firstRetained - resumeSequence - 1)
recommendedAction = "reset_consumer_and_resync"
actionHints = []string{"reset_consumer_cursor", "request_full_adapter_resync", "resume_from_checkpoint_after_resync"}
actionReason = "consumer_cursor_before_first_retained_sequence"
} else if returned == 0 {
diagnosticState = "caught_up"
recommendedAction = "wait_for_new_mailbox_events"
actionHints = []string{"keep_consumer_cursor", "long_poll_after_sequence"}
actionReason = "cursor_caught_up_to_retained_mailbox"
}
actionContext := map[string]any{
"consumer_id": consumerID,
"resume_from": resumeFrom,
"resume_sequence": resumeSequence,
"first_retained_sequence": firstRetained,
"last_retained_sequence": lastRetained,
"mailbox_depth": len(session.Mailbox),
"mailbox_dropped_total": session.MailboxDropped,
"missing_dropped_count": missingDropped,
"expected_available_count": available,
"expected_returned_count": returned,
"expected_skipped_count": startIndex,
"consumer_checkpoint_sequence": consumer.CheckpointSequence,
"consumer_ack_sequence": consumer.AckSequence,
}
operatorSummary := "consumer cursor can resume from requested window"
operatorStatus := "ready_to_resume"
operatorSeverity := "ok"
if diagnosticState == "stale_cursor_gap" {
operatorSummary = "stale cursor gap: reset consumer and resync before resume"
operatorStatus = "resync_required"
operatorSeverity = "warn"
} else if diagnosticState == "caught_up" {
operatorSummary = "consumer cursor is caught up; wait for new mailbox events"
operatorStatus = "caught_up"
operatorSeverity = "info"
}
operatorSummaryFields := map[string]any{
"diagnostic_state": diagnosticState,
"recommended_action": recommendedAction,
"action_reason": actionReason,
"operator_status": operatorStatus,
"operator_severity": operatorSeverity,
"resume_from": resumeFrom,
"resume_sequence": resumeSequence,
"first_retained_sequence": firstRetained,
"last_retained_sequence": lastRetained,
"missing_dropped_count": missingDropped,
"expected_available_count": available,
"expected_returned_count": returned,
"expected_skipped_count": startIndex,
}
snapshot := RemoteWorkspaceAdapterMailboxPreflightSnapshot{
SchemaVersion: "rap.remote_workspace_adapter_mailbox_preflight.v1",
AdapterRuntimeID: RemoteWorkspaceFrameProbeSinkRuntimeID,
AdapterSessionID: adapterSessionID,
@@ -1193,6 +1333,7 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
Limit: limit,
MailboxDepth: len(session.Mailbox),
MailboxEnqueued: session.MailboxEnqueued,
MailboxDropped: session.MailboxDropped,
MailboxReadTotal: session.MailboxRead,
ConsumerReadTotal: session.MailboxConsumerReadTotal,
ConsumerAckTotal: session.MailboxConsumerAckTotal,
@@ -1204,7 +1345,236 @@ func (s *RemoteWorkspaceFrameProbeSink) PreflightAdapterSessionMailboxConsumerRe
ExpectedSkippedCount: startIndex,
FirstExpectedSequence: firstExpected,
LastExpectedSequence: lastExpected,
}, nil
FirstRetainedSequence: firstRetained,
LastRetainedSequence: lastRetained,
DiagnosticState: diagnosticState,
StaleCursor: staleCursor,
MissingDroppedCount: missingDropped,
RecommendedAction: recommendedAction,
ActionHints: actionHints,
ActionReason: actionReason,
ActionContext: actionContext,
OperatorSummary: operatorSummary,
OperatorStatus: operatorStatus,
OperatorSeverity: operatorSeverity,
OperatorSummaryFields: operatorSummaryFields,
}
s.recordAdapterSessionMailboxPreflightLocked(session, snapshot, now)
return snapshot, nil
}
func (s *RemoteWorkspaceFrameProbeSink) recordAdapterSessionMailboxPreflightLocked(session *remoteWorkspaceAdapterProbeSession, snapshot RemoteWorkspaceAdapterMailboxPreflightSnapshot, now time.Time) {
s.mailboxPreflightTotal++
if snapshot.ResumeFrom == "ack" {
s.mailboxPreflightAckTotal++
}
if snapshot.ResumeFrom == "checkpoint" {
s.mailboxPreflightCheckpointTotal++
}
s.lastMailboxPreflightAt = now.Format(time.RFC3339Nano)
s.lastMailboxPreflightAdapterSessionID = snapshot.AdapterSessionID
s.lastMailboxPreflightConsumerID = snapshot.ConsumerID
s.lastMailboxPreflightResumeFrom = snapshot.ResumeFrom
s.lastMailboxPreflightResumeSequence = snapshot.ResumeSequence
s.lastMailboxPreflightAfterSequence = snapshot.AfterSequence
s.lastMailboxPreflightAvailableCount = snapshot.ExpectedAvailableCount
s.lastMailboxPreflightReturnedCount = snapshot.ExpectedReturnedCount
s.lastMailboxPreflightSkippedCount = snapshot.ExpectedSkippedCount
s.lastMailboxPreflightFirstSequence = snapshot.FirstExpectedSequence
s.lastMailboxPreflightLastSequence = snapshot.LastExpectedSequence
s.lastMailboxPreflightFirstRetained = snapshot.FirstRetainedSequence
s.lastMailboxPreflightLastRetained = snapshot.LastRetainedSequence
s.lastMailboxPreflightMailboxDropped = snapshot.MailboxDropped
s.lastMailboxPreflightDiagnosticState = snapshot.DiagnosticState
s.lastMailboxPreflightStaleCursor = snapshot.StaleCursor
s.lastMailboxPreflightMissingDropped = snapshot.MissingDroppedCount
s.lastMailboxPreflightRecommendedAction = snapshot.RecommendedAction
s.lastMailboxPreflightActionHints = append([]string(nil), snapshot.ActionHints...)
s.lastMailboxPreflightActionReason = snapshot.ActionReason
s.lastMailboxPreflightActionContext = cloneStringAnyMap(snapshot.ActionContext)
s.lastMailboxPreflightOperatorSummary = snapshot.OperatorSummary
s.lastMailboxPreflightOperatorStatus = snapshot.OperatorStatus
s.lastMailboxPreflightOperatorSeverity = snapshot.OperatorSeverity
s.lastMailboxPreflightOperatorFields = cloneStringAnyMap(snapshot.OperatorSummaryFields)
if session == nil {
return
}
session.MailboxPreflightTotal++
if snapshot.ResumeFrom == "ack" {
session.MailboxPreflightAckTotal++
}
if snapshot.ResumeFrom == "checkpoint" {
session.MailboxPreflightCheckpointTotal++
}
incrementStringInt64Map(&session.MailboxPreflightOperatorStatusCounts, snapshot.OperatorStatus)
incrementStringInt64Map(&session.MailboxPreflightOperatorSeverityCounts, snapshot.OperatorSeverity)
session.LastMailboxPreflightAt = now
session.LastMailboxPreflightConsumerID = snapshot.ConsumerID
session.LastMailboxPreflightResumeFrom = snapshot.ResumeFrom
session.LastMailboxPreflightResumeSequence = snapshot.ResumeSequence
session.LastMailboxPreflightAfterSequence = snapshot.AfterSequence
session.LastMailboxPreflightAvailableCount = snapshot.ExpectedAvailableCount
session.LastMailboxPreflightReturnedCount = snapshot.ExpectedReturnedCount
session.LastMailboxPreflightSkippedCount = snapshot.ExpectedSkippedCount
session.LastMailboxPreflightFirstSequence = snapshot.FirstExpectedSequence
session.LastMailboxPreflightLastSequence = snapshot.LastExpectedSequence
session.LastMailboxPreflightFirstRetained = snapshot.FirstRetainedSequence
session.LastMailboxPreflightLastRetained = snapshot.LastRetainedSequence
session.LastMailboxPreflightMailboxDropped = snapshot.MailboxDropped
session.LastMailboxPreflightDiagnosticState = snapshot.DiagnosticState
session.LastMailboxPreflightStaleCursor = snapshot.StaleCursor
session.LastMailboxPreflightMissingDropped = snapshot.MissingDroppedCount
session.LastMailboxPreflightRecommendedAction = snapshot.RecommendedAction
session.LastMailboxPreflightActionHints = append([]string(nil), snapshot.ActionHints...)
session.LastMailboxPreflightActionReason = snapshot.ActionReason
session.LastMailboxPreflightActionContext = cloneStringAnyMap(snapshot.ActionContext)
session.LastMailboxPreflightOperatorSummary = snapshot.OperatorSummary
session.LastMailboxPreflightOperatorStatus = snapshot.OperatorStatus
session.LastMailboxPreflightOperatorSeverity = snapshot.OperatorSeverity
session.LastMailboxPreflightOperatorFields = cloneStringAnyMap(snapshot.OperatorSummaryFields)
}
func cloneStringAnyMap(source map[string]any) map[string]any {
if source == nil {
return nil
}
clone := make(map[string]any, len(source))
for key, value := range source {
clone[key] = value
}
return clone
}
func cloneStringInt64Map(source map[string]int64) map[string]int64 {
if source == nil {
return nil
}
clone := make(map[string]int64, len(source))
for key, value := range source {
clone[key] = value
}
return clone
}
func incrementStringInt64Map(target *map[string]int64, key string) {
key = strings.TrimSpace(key)
if key == "" || target == nil {
return
}
if *target == nil {
*target = map[string]int64{}
}
(*target)[key]++
}
func remoteWorkspacePreflightAttentionStatus(statusCounts map[string]int64, severityCounts map[string]int64) string {
resyncCount := statusCounts["resync_required"]
warnCount := severityCounts["warn"]
if resyncCount > 1 || warnCount > 1 {
return "repeated_resync_required"
}
if resyncCount > 0 || warnCount > 0 {
return "needs_attention"
}
if statusCounts["ready_to_resume"] > 0 || statusCounts["caught_up"] > 0 || severityCounts["ok"] > 0 || severityCounts["info"] > 0 {
return "clean"
}
return "unknown"
}
func remoteWorkspacePreflightAttentionReason(status string, statusCounts map[string]int64, severityCounts map[string]int64) string {
switch status {
case "repeated_resync_required":
return "resync_required_preflight_repeated"
case "needs_attention":
if statusCounts["resync_required"] > 0 {
return "resync_required_preflight_observed"
}
if severityCounts["warn"] > 0 {
return "warn_preflight_observed"
}
return "attention_preflight_observed"
case "clean":
return "no_resync_required_preflight_observed"
default:
return "no_preflight_observed"
}
}
func remoteWorkspacePreflightRemediationChecklist(operatorStatus string, actionHints []string) []map[string]any {
hints := map[string]bool{}
for _, hint := range actionHints {
hints[hint] = true
}
if operatorStatus == "resync_required" {
return []map[string]any{
{
"step": "reset_consumer_cursor",
"required": true,
"satisfied": false,
"source_hint": hints["reset_consumer_cursor"],
},
{
"step": "request_full_adapter_resync",
"required": true,
"satisfied": false,
"source_hint": hints["request_full_adapter_resync"],
},
{
"step": "resume_from_checkpoint_after_resync",
"required": true,
"satisfied": false,
"source_hint": hints["resume_from_checkpoint_after_resync"],
},
}
}
if operatorStatus == "ready_to_resume" {
return []map[string]any{{
"step": "resume_from_requested_cursor",
"required": true,
"satisfied": true,
"source_hint": hints["resume_from_requested_cursor"],
}}
}
return []map[string]any{{
"step": "wait_for_new_mailbox_events",
"required": true,
"satisfied": false,
"source_hint": hints["long_poll_after_sequence"] || hints["keep_consumer_cursor"],
}}
}
func remoteWorkspacePreflightRemediationChecklistSummary(checklist []map[string]any) map[string]any {
total := len(checklist)
required := 0
satisfied := 0
for _, item := range checklist {
itemRequired, _ := item["required"].(bool)
itemSatisfied, _ := item["satisfied"].(bool)
if itemRequired {
required++
if itemSatisfied {
satisfied++
}
}
}
pending := required - satisfied
if pending < 0 {
pending = 0
}
status := "not_required"
if required > 0 && pending == 0 {
status = "ready"
} else if pending > 0 {
status = "action_required"
}
return map[string]any{
"status": status,
"total_count": total,
"required_count": required,
"satisfied_count": satisfied,
"pending_count": pending,
}
}
func (s *RemoteWorkspaceFrameProbeSink) evictOldestMailboxConsumerLocked(session *remoteWorkspaceAdapterProbeSession) bool {
@@ -1256,25 +1626,50 @@ func countMailboxConsumersLocked(sessions map[string]*remoteWorkspaceAdapterProb
func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSink, session *remoteWorkspaceAdapterProbeSession, now time.Time) map[string]any {
readiness := map[string]any{
"schema_version": "rap.remote_workspace_adapter_runtime_readiness.v1",
"adapter_runtime_id": RemoteWorkspaceFrameProbeSinkRuntimeID,
"observed_at": now.UTC().Format(time.RFC3339Nano),
"probe_only": true,
"payload_traffic": "none",
"status": "idle",
"diagnostic_state": "waiting_for_session",
"ready": false,
"active_session_count": len(s.sessions),
"terminal_session_count": len(s.terminalSessions),
"mailbox_capacity": DefaultRemoteWorkspaceAdapterMailboxCapacity,
"consumer_capacity": DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity,
"mailbox_read_total": s.mailboxReadTotal,
"mailbox_resume_total": s.mailboxResumeReadTotal,
"schema_version": "rap.remote_workspace_adapter_runtime_readiness.v1",
"adapter_runtime_id": RemoteWorkspaceFrameProbeSinkRuntimeID,
"observed_at": now.UTC().Format(time.RFC3339Nano),
"probe_only": true,
"payload_traffic": "none",
"status": "idle",
"diagnostic_state": "waiting_for_session",
"ready": false,
"active_session_count": len(s.sessions),
"terminal_session_count": len(s.terminalSessions),
"mailbox_capacity": DefaultRemoteWorkspaceAdapterMailboxCapacity,
"consumer_capacity": DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity,
"mailbox_read_total": s.mailboxReadTotal,
"mailbox_resume_total": s.mailboxResumeReadTotal,
"mailbox_preflight_total": s.mailboxPreflightTotal,
}
if session == nil {
if s.sequence == 0 {
readiness["no_session_summary"] = map[string]any{
"schema_version": "rap.remote_workspace_adapter_no_session_summary.v1",
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
"status": "idle",
"diagnostic_state": "waiting_for_session",
"active_session_count": len(s.sessions),
"terminal_session_count": len(s.terminalSessions),
}
}
if s.sequence > 0 {
readiness["last_adapter_session_id"] = s.last.AdapterSessionID
readiness["last_session_state"] = s.last.SessionState
lastSessionState := s.last.SessionState
if terminal, ok := s.terminalSessions[s.last.AdapterSessionID]; ok {
lastSessionState = terminal.State
readiness["terminal_session_summary"] = map[string]any{
"schema_version": "rap.remote_workspace_adapter_terminal_session_summary.v1",
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
"adapter_session_id": s.last.AdapterSessionID,
"session_state": terminal.State,
"reason": terminal.Reason,
"controlled_at": terminal.ControlledAt.Format(time.RFC3339Nano),
}
}
readiness["last_session_state"] = lastSessionState
readiness["diagnostic_state"] = "last_session_terminal_or_expired"
}
return readiness
@@ -1299,6 +1694,13 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
readiness["mailbox_enqueued_total"] = session.MailboxEnqueued
readiness["mailbox_read_total"] = session.MailboxRead
readiness["mailbox_resume_read_total"] = session.MailboxResumeRead
readiness["mailbox_preflight_total"] = session.MailboxPreflightTotal
readiness["mailbox_preflight_operator_status_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts)
readiness["mailbox_preflight_operator_severity_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts)
preflightAttentionStatus := remoteWorkspacePreflightAttentionStatus(session.MailboxPreflightOperatorStatusCounts, session.MailboxPreflightOperatorSeverityCounts)
preflightAttentionReason := remoteWorkspacePreflightAttentionReason(preflightAttentionStatus, session.MailboxPreflightOperatorStatusCounts, session.MailboxPreflightOperatorSeverityCounts)
readiness["preflight_attention_status"] = preflightAttentionStatus
readiness["preflight_attention_reason"] = preflightAttentionReason
readiness["mailbox_after_sequence_read_total"] = session.MailboxAfterSequenceRead
readiness["mailbox_returned_total"] = session.MailboxReturnedTotal
readiness["mailbox_skipped_total"] = session.MailboxSkippedTotal
@@ -1315,6 +1717,66 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
readiness["last_after_sequence"] = session.LastMailboxAfterSequence
readiness["last_returned_count"] = session.LastMailboxReturnedCount
readiness["last_skipped_count"] = session.LastMailboxSkippedCount
readiness["last_preflight_consumer_id"] = session.LastMailboxPreflightConsumerID
readiness["last_preflight_resume_from"] = session.LastMailboxPreflightResumeFrom
readiness["last_preflight_resume_sequence"] = session.LastMailboxPreflightResumeSequence
readiness["last_preflight_available_count"] = session.LastMailboxPreflightAvailableCount
readiness["last_preflight_returned_count"] = session.LastMailboxPreflightReturnedCount
readiness["last_preflight_skipped_count"] = session.LastMailboxPreflightSkippedCount
readiness["last_preflight_diagnostic_state"] = session.LastMailboxPreflightDiagnosticState
readiness["last_preflight_stale_cursor"] = session.LastMailboxPreflightStaleCursor
readiness["last_preflight_missing_dropped_count"] = session.LastMailboxPreflightMissingDropped
readiness["last_preflight_recommended_action"] = session.LastMailboxPreflightRecommendedAction
readiness["last_preflight_action_hints"] = append([]string(nil), session.LastMailboxPreflightActionHints...)
readiness["last_preflight_action_reason"] = session.LastMailboxPreflightActionReason
readiness["last_preflight_action_context"] = cloneStringAnyMap(session.LastMailboxPreflightActionContext)
readiness["last_preflight_operator_summary"] = session.LastMailboxPreflightOperatorSummary
readiness["last_preflight_operator_status"] = session.LastMailboxPreflightOperatorStatus
readiness["last_preflight_operator_severity"] = session.LastMailboxPreflightOperatorSeverity
readiness["last_preflight_operator_summary_fields"] = cloneStringAnyMap(session.LastMailboxPreflightOperatorFields)
if session.MailboxPreflightTotal > 0 {
remediationChecklist := remoteWorkspacePreflightRemediationChecklist(session.LastMailboxPreflightOperatorStatus, session.LastMailboxPreflightActionHints)
remediationChecklistSummary := remoteWorkspacePreflightRemediationChecklistSummary(remediationChecklist)
readiness["last_preflight"] = map[string]any{
"diagnostics_schema_version": "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1",
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
"observed_at": session.LastMailboxPreflightAt.Format(time.RFC3339Nano),
"consumer_id": session.LastMailboxPreflightConsumerID,
"resume_from": session.LastMailboxPreflightResumeFrom,
"resume_sequence": session.LastMailboxPreflightResumeSequence,
"after_sequence": session.LastMailboxPreflightAfterSequence,
"available_count": session.LastMailboxPreflightAvailableCount,
"returned_count": session.LastMailboxPreflightReturnedCount,
"skipped_count": session.LastMailboxPreflightSkippedCount,
"first_sequence": session.LastMailboxPreflightFirstSequence,
"last_sequence": session.LastMailboxPreflightLastSequence,
"first_retained_sequence": session.LastMailboxPreflightFirstRetained,
"last_retained_sequence": session.LastMailboxPreflightLastRetained,
"mailbox_dropped_total": session.LastMailboxPreflightMailboxDropped,
"diagnostic_state": session.LastMailboxPreflightDiagnosticState,
"stale_cursor": session.LastMailboxPreflightStaleCursor,
"missing_dropped_count": session.LastMailboxPreflightMissingDropped,
"recommended_action": session.LastMailboxPreflightRecommendedAction,
"action_hints": append([]string(nil), session.LastMailboxPreflightActionHints...),
"action_reason": session.LastMailboxPreflightActionReason,
"action_context": cloneStringAnyMap(session.LastMailboxPreflightActionContext),
"remediation_checklist": remediationChecklist,
"remediation_checklist_status": remediationChecklistSummary["status"],
"remediation_checklist_counts": remediationChecklistSummary,
"operator_summary": session.LastMailboxPreflightOperatorSummary,
"operator_status": session.LastMailboxPreflightOperatorStatus,
"operator_severity": session.LastMailboxPreflightOperatorSeverity,
"operator_summary_fields": cloneStringAnyMap(session.LastMailboxPreflightOperatorFields),
"mailbox_preflight_total": session.MailboxPreflightTotal,
"mailbox_preflight_ack_total": session.MailboxPreflightAckTotal,
"mailbox_preflight_checkpoint_total": session.MailboxPreflightCheckpointTotal,
"preflight_attention_status": preflightAttentionStatus,
"preflight_attention_reason": preflightAttentionReason,
"operator_status_counts": cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts),
"operator_severity_counts": cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts),
}
}
if !session.LastActivityAt.IsZero() {
readiness["last_activity_at"] = session.LastActivityAt.Format(time.RFC3339Nano)
}
@@ -1327,6 +1789,9 @@ func remoteWorkspaceAdapterRuntimeReadinessLocked(s *RemoteWorkspaceFrameProbeSi
if !session.LastMailboxConsumerAckAt.IsZero() {
readiness["last_consumer_ack_at"] = session.LastMailboxConsumerAckAt.Format(time.RFC3339Nano)
}
if !session.LastMailboxPreflightAt.IsZero() {
readiness["last_preflight_at"] = session.LastMailboxPreflightAt.Format(time.RFC3339Nano)
}
return readiness
}
@@ -1445,6 +1910,9 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
report["mailbox_after_sequence_read_total"] = s.mailboxAfterSequenceReadTotal
report["mailbox_returned_total"] = s.mailboxReturnedTotal
report["mailbox_skipped_total"] = s.mailboxSkippedTotal
report["mailbox_preflight_total"] = s.mailboxPreflightTotal
report["mailbox_preflight_ack_total"] = s.mailboxPreflightAckTotal
report["mailbox_preflight_checkpoint_total"] = s.mailboxPreflightCheckpointTotal
report["mailbox_consumer_capacity"] = DefaultRemoteWorkspaceAdapterMailboxConsumerCapacity
report["mailbox_consumer_count"] = countMailboxConsumersLocked(s.sessions)
report["mailbox_consumer_read_total"] = s.mailboxConsumerReadTotal
@@ -1467,6 +1935,30 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
report["last_mailbox_resume_sequence"] = s.lastMailboxResumeSequence
report["last_mailbox_resume_consumer_id"] = s.lastMailboxResumeConsumerID
}
if s.mailboxPreflightTotal > 0 {
report["last_mailbox_preflight_at"] = s.lastMailboxPreflightAt
report["last_mailbox_preflight_adapter_session_id"] = s.lastMailboxPreflightAdapterSessionID
report["last_mailbox_preflight_consumer_id"] = s.lastMailboxPreflightConsumerID
report["last_mailbox_preflight_resume_from"] = s.lastMailboxPreflightResumeFrom
report["last_mailbox_preflight_resume_sequence"] = s.lastMailboxPreflightResumeSequence
report["last_mailbox_preflight_after_sequence"] = s.lastMailboxPreflightAfterSequence
report["last_mailbox_preflight_available_count"] = s.lastMailboxPreflightAvailableCount
report["last_mailbox_preflight_returned_count"] = s.lastMailboxPreflightReturnedCount
report["last_mailbox_preflight_skipped_count"] = s.lastMailboxPreflightSkippedCount
report["last_mailbox_preflight_first_sequence"] = s.lastMailboxPreflightFirstSequence
report["last_mailbox_preflight_last_sequence"] = s.lastMailboxPreflightLastSequence
report["last_mailbox_preflight_diagnostic_state"] = s.lastMailboxPreflightDiagnosticState
report["last_mailbox_preflight_stale_cursor"] = s.lastMailboxPreflightStaleCursor
report["last_mailbox_preflight_missing_dropped_count"] = s.lastMailboxPreflightMissingDropped
report["last_mailbox_preflight_recommended_action"] = s.lastMailboxPreflightRecommendedAction
report["last_mailbox_preflight_action_hints"] = append([]string(nil), s.lastMailboxPreflightActionHints...)
report["last_mailbox_preflight_action_reason"] = s.lastMailboxPreflightActionReason
report["last_mailbox_preflight_action_context"] = cloneStringAnyMap(s.lastMailboxPreflightActionContext)
report["last_mailbox_preflight_operator_summary"] = s.lastMailboxPreflightOperatorSummary
report["last_mailbox_preflight_operator_status"] = s.lastMailboxPreflightOperatorStatus
report["last_mailbox_preflight_operator_severity"] = s.lastMailboxPreflightOperatorSeverity
report["last_mailbox_preflight_operator_summary_fields"] = cloneStringAnyMap(s.lastMailboxPreflightOperatorFields)
}
if s.mailboxConsumerReadTotal > 0 {
report["last_mailbox_consumer_id"] = s.lastMailboxConsumerID
report["last_mailbox_consumer_read_at"] = s.lastMailboxConsumerReadAt
@@ -1520,6 +2012,11 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
report["current_session_mailbox_after_sequence_read_total"] = session.MailboxAfterSequenceRead
report["current_session_mailbox_returned_total"] = session.MailboxReturnedTotal
report["current_session_mailbox_skipped_total"] = session.MailboxSkippedTotal
report["current_session_mailbox_preflight_total"] = session.MailboxPreflightTotal
report["current_session_mailbox_preflight_ack_total"] = session.MailboxPreflightAckTotal
report["current_session_mailbox_preflight_checkpoint_total"] = session.MailboxPreflightCheckpointTotal
report["current_session_mailbox_preflight_operator_status_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorStatusCounts)
report["current_session_mailbox_preflight_operator_severity_counts"] = cloneStringInt64Map(session.MailboxPreflightOperatorSeverityCounts)
report["current_session_mailbox_consumer_count"] = len(session.MailboxConsumers)
report["current_session_mailbox_consumer_read_total"] = session.MailboxConsumerReadTotal
report["current_session_mailbox_consumer_ack_total"] = session.MailboxConsumerAckTotal
@@ -1549,6 +2046,29 @@ func (s *RemoteWorkspaceFrameProbeSink) Report(now time.Time) map[string]any {
report["current_session_last_mailbox_resume_sequence"] = session.LastMailboxResumeSequence
report["current_session_last_mailbox_resume_consumer_id"] = session.LastMailboxResumeConsumerID
}
if session.MailboxPreflightTotal > 0 {
report["current_session_last_mailbox_preflight_at"] = session.LastMailboxPreflightAt.Format(time.RFC3339Nano)
report["current_session_last_mailbox_preflight_consumer_id"] = session.LastMailboxPreflightConsumerID
report["current_session_last_mailbox_preflight_resume_from"] = session.LastMailboxPreflightResumeFrom
report["current_session_last_mailbox_preflight_resume_sequence"] = session.LastMailboxPreflightResumeSequence
report["current_session_last_mailbox_preflight_after_sequence"] = session.LastMailboxPreflightAfterSequence
report["current_session_last_mailbox_preflight_available_count"] = session.LastMailboxPreflightAvailableCount
report["current_session_last_mailbox_preflight_returned_count"] = session.LastMailboxPreflightReturnedCount
report["current_session_last_mailbox_preflight_skipped_count"] = session.LastMailboxPreflightSkippedCount
report["current_session_last_mailbox_preflight_first_sequence"] = session.LastMailboxPreflightFirstSequence
report["current_session_last_mailbox_preflight_last_sequence"] = session.LastMailboxPreflightLastSequence
report["current_session_last_mailbox_preflight_diagnostic_state"] = session.LastMailboxPreflightDiagnosticState
report["current_session_last_mailbox_preflight_stale_cursor"] = session.LastMailboxPreflightStaleCursor
report["current_session_last_mailbox_preflight_missing_dropped_count"] = session.LastMailboxPreflightMissingDropped
report["current_session_last_mailbox_preflight_recommended_action"] = session.LastMailboxPreflightRecommendedAction
report["current_session_last_mailbox_preflight_action_hints"] = append([]string(nil), session.LastMailboxPreflightActionHints...)
report["current_session_last_mailbox_preflight_action_reason"] = session.LastMailboxPreflightActionReason
report["current_session_last_mailbox_preflight_action_context"] = cloneStringAnyMap(session.LastMailboxPreflightActionContext)
report["current_session_last_mailbox_preflight_operator_summary"] = session.LastMailboxPreflightOperatorSummary
report["current_session_last_mailbox_preflight_operator_status"] = session.LastMailboxPreflightOperatorStatus
report["current_session_last_mailbox_preflight_operator_severity"] = session.LastMailboxPreflightOperatorSeverity
report["current_session_last_mailbox_preflight_operator_summary_fields"] = cloneStringAnyMap(session.LastMailboxPreflightOperatorFields)
}
if !session.LastBackpressureAt.IsZero() {
report["current_session_last_backpressure_at"] = session.LastBackpressureAt.Format(time.RFC3339Nano)
report["current_session_last_backpressure_reason"] = session.LastReason
@@ -1643,6 +1643,44 @@ func TestRemoteWorkspaceAdapterSessionControlEndpointClosesSession(t *testing.T)
report["last_session_control_state"] != "closed" {
t.Fatalf("control report = %+v", report)
}
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from control report = %+v", report)
}
if readiness["schema_version"] != "rap.remote_workspace_adapter_runtime_readiness.v1" ||
readiness["status"] != "idle" ||
readiness["diagnostic_state"] != "last_session_terminal_or_expired" ||
readiness["ready"] != false ||
readiness["active_session_count"] != 0 ||
readiness["last_adapter_session_id"] != "rap-rw-adapter-session-aaaaaaaaaaaaaaaaaaaaaaaa" ||
readiness["last_session_state"] != "closed" {
t.Fatalf("invalid no-active-session readiness after close = %+v", readiness)
}
if _, ok := readiness["adapter_session_id"]; ok {
t.Fatalf("adapter_session_id should be absent without active session = %+v", readiness)
}
if _, ok := readiness["last_preflight"]; ok {
t.Fatalf("last_preflight should be absent without active session = %+v", readiness)
}
terminalSummary, ok := readiness["terminal_session_summary"].(map[string]any)
if !ok {
t.Fatalf("terminal session summary missing after close = %+v", readiness)
}
if terminalSummary["adapter_session_id"] != "rap-rw-adapter-session-aaaaaaaaaaaaaaaaaaaaaaaa" ||
terminalSummary["schema_version"] != "rap.remote_workspace_adapter_terminal_session_summary.v1" ||
!stringAnySliceContains(terminalSummary["summary_contract"], "adapter_session_id") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "session_state") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "reason") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "controlled_at") ||
!boolMapValue(terminalSummary["summary_features"], "adapter_session_id") ||
!boolMapValue(terminalSummary["summary_features"], "session_state") ||
!boolMapValue(terminalSummary["summary_features"], "reason") ||
!boolMapValue(terminalSummary["summary_features"], "controlled_at") ||
terminalSummary["session_state"] != "closed" ||
terminalSummary["reason"] != "unit test close" ||
terminalSummary["controlled_at"] == "" {
t.Fatalf("invalid terminal session summary after close = %+v", terminalSummary)
}
resp, err = http.Post(controlURL, "application/json", bytes.NewReader([]byte(`{"action":"close","reason":"repeat close"}`)))
if err != nil {
@@ -1665,6 +1703,255 @@ func TestRemoteWorkspaceAdapterSessionControlEndpointClosesSession(t *testing.T)
}
}
func TestRemoteWorkspaceAdapterReadinessBeforeAnySessionHasNoTerminalSummary(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
report := sink.Report(time.Now().UTC())
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
}
if readiness["schema_version"] != "rap.remote_workspace_adapter_runtime_readiness.v1" ||
readiness["status"] != "idle" ||
readiness["diagnostic_state"] != "waiting_for_session" ||
readiness["ready"] != false ||
readiness["active_session_count"] != 0 ||
readiness["terminal_session_count"] != 0 {
t.Fatalf("invalid empty readiness = %+v", readiness)
}
if _, ok := readiness["last_adapter_session_id"]; ok {
t.Fatalf("last_adapter_session_id should be absent before any session = %+v", readiness)
}
if _, ok := readiness["last_session_state"]; ok {
t.Fatalf("last_session_state should be absent before any session = %+v", readiness)
}
if _, ok := readiness["terminal_session_summary"]; ok {
t.Fatalf("terminal_session_summary should be absent before terminal history = %+v", readiness)
}
noSessionSummary, ok := readiness["no_session_summary"].(map[string]any)
if !ok {
t.Fatalf("no_session_summary should be present before any session = %+v", readiness)
}
if noSessionSummary["schema_version"] != "rap.remote_workspace_adapter_no_session_summary.v1" ||
!stringAnySliceContains(noSessionSummary["summary_contract"], "status") ||
!stringAnySliceContains(noSessionSummary["summary_contract"], "diagnostic_state") ||
!stringAnySliceContains(noSessionSummary["summary_contract"], "active_session_count") ||
!stringAnySliceContains(noSessionSummary["summary_contract"], "terminal_session_count") ||
!boolMapValue(noSessionSummary["summary_features"], "status") ||
!boolMapValue(noSessionSummary["summary_features"], "diagnostic_state") ||
!boolMapValue(noSessionSummary["summary_features"], "active_session_count") ||
!boolMapValue(noSessionSummary["summary_features"], "terminal_session_count") ||
noSessionSummary["status"] != "idle" ||
noSessionSummary["diagnostic_state"] != "waiting_for_session" ||
noSessionSummary["active_session_count"] != 0 ||
noSessionSummary["terminal_session_count"] != 0 {
t.Fatalf("invalid no-session summary before any session = %+v", noSessionSummary)
}
if _, ok := readiness["last_preflight"]; ok {
t.Fatalf("last_preflight should be absent before any session = %+v", readiness)
}
}
func TestRemoteWorkspaceAdapterReadinessSummaryExclusivity(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
freshReport := sink.Report(time.Now().UTC())
freshReadiness, ok := freshReport["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("fresh readiness missing from report = %+v", freshReport)
}
if _, ok := freshReadiness["no_session_summary"]; !ok {
t.Fatalf("fresh readiness should include no_session_summary = %+v", freshReadiness)
}
if _, ok := freshReadiness["terminal_session_summary"]; ok {
t.Fatalf("fresh readiness should not include terminal_session_summary = %+v", freshReadiness)
}
sessionID := "rap-rw-adapter-session-d1d1d1d1d1d1d1d1d1d1d1d1"
delivery := RemoteWorkspaceFrameBatchDelivery{
ClusterID: "cluster-1",
ChannelID: "channel-rw",
ResourceID: "workspace-exclusivity",
ServiceClass: FabricServiceClassRemoteWorkspace,
ChannelClass: FabricServiceChannelInteractive,
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
AdapterSessionID: sessionID,
Frames: []RemoteWorkspaceFrameProbeRecord{{
Channel: "display",
Direction: "adapter_to_client",
Droppable: true,
}},
}
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
t.Fatalf("accept frame batch: %v", err)
}
activeReport := sink.Report(time.Now().UTC())
activeReadiness, ok := activeReport["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("active readiness missing from report = %+v", activeReport)
}
if activeReadiness["adapter_session_id"] != sessionID ||
activeReadiness["active_session_count"] != 1 {
t.Fatalf("invalid active readiness = %+v", activeReadiness)
}
if _, ok := activeReadiness["no_session_summary"]; ok {
t.Fatalf("active readiness should not include no_session_summary = %+v", activeReadiness)
}
if _, ok := activeReadiness["terminal_session_summary"]; ok {
t.Fatalf("active readiness should not include terminal_session_summary = %+v", activeReadiness)
}
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
defer server.Close()
body := bytes.NewReader([]byte(`{"action":"close","reason":"unit summary exclusivity close"}`))
resp, err := http.Post(server.URL+"/mesh/v1/remote-workspace/adapter-sessions/"+sessionID+"/control", "application/json", body)
if err != nil {
t.Fatalf("post control: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
raw, _ := io.ReadAll(resp.Body)
t.Fatalf("status = %d body=%s", resp.StatusCode, string(raw))
}
terminalReport := sink.Report(time.Now().UTC())
terminalReadiness, ok := terminalReport["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("terminal readiness missing from report = %+v", terminalReport)
}
if _, ok := terminalReadiness["terminal_session_summary"]; !ok {
t.Fatalf("terminal readiness should include terminal_session_summary = %+v", terminalReadiness)
}
if _, ok := terminalReadiness["no_session_summary"]; ok {
t.Fatalf("terminal readiness should not include no_session_summary = %+v", terminalReadiness)
}
}
func TestRemoteWorkspaceAdapterSessionControlTerminalReadinessStates(t *testing.T) {
tests := []struct {
action string
sessionID string
wantState string
wantClosed int64
wantExpired int64
wantReset int64
wantPrevState string
}{
{
action: "expire",
sessionID: "rap-rw-adapter-session-b0b0b0b0b0b0b0b0b0b0b0b0",
wantState: "expired",
wantClosed: 1,
wantExpired: 1,
wantPrevState: "probe_bound",
},
{
action: "reset",
sessionID: "rap-rw-adapter-session-c0c0c0c0c0c0c0c0c0c0c0c0",
wantState: "reset",
wantClosed: 1,
wantReset: 1,
wantPrevState: "probe_bound",
},
}
for _, tt := range tests {
t.Run(tt.action, func(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
delivery := RemoteWorkspaceFrameBatchDelivery{
ClusterID: "cluster-1",
ChannelID: "channel-rw",
ResourceID: "workspace-" + tt.action,
ServiceClass: FabricServiceClassRemoteWorkspace,
ChannelClass: FabricServiceChannelInteractive,
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
AdapterSessionID: tt.sessionID,
Frames: []RemoteWorkspaceFrameProbeRecord{{
Channel: "display",
Direction: "adapter_to_client",
Droppable: true,
}},
}
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
t.Fatalf("accept frame batch: %v", err)
}
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
defer server.Close()
body := bytes.NewReader([]byte(fmt.Sprintf(`{"action":%q,"reason":"unit terminal readiness"}`, tt.action)))
resp, err := http.Post(server.URL+"/mesh/v1/remote-workspace/adapter-sessions/"+tt.sessionID+"/control", "application/json", body)
if err != nil {
t.Fatalf("post control: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
raw, _ := io.ReadAll(resp.Body)
t.Fatalf("status = %d body=%s", resp.StatusCode, string(raw))
}
var result RemoteWorkspaceAdapterSessionControlResult
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
t.Fatalf("decode control result: %v", err)
}
if !result.Accepted ||
result.Action != tt.action ||
result.AdapterSessionID != tt.sessionID ||
result.PreviousState != tt.wantPrevState ||
result.SessionState != tt.wantState ||
result.ActiveSessions != 0 {
t.Fatalf("control result = %+v", result)
}
report := sink.Report(time.Now().UTC())
if report["active_session_count"] != 0 ||
report["session_closed_total"] != tt.wantClosed ||
report["session_expired_total"] != tt.wantExpired ||
report["session_reset_total"] != tt.wantReset ||
report["last_controlled_adapter_session_id"] != tt.sessionID ||
report["last_session_control_action"] != tt.action ||
report["last_session_control_state"] != tt.wantState {
t.Fatalf("terminal control report = %+v", report)
}
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
}
if readiness["status"] != "idle" ||
readiness["diagnostic_state"] != "last_session_terminal_or_expired" ||
readiness["ready"] != false ||
readiness["active_session_count"] != 0 ||
readiness["last_adapter_session_id"] != tt.sessionID ||
readiness["last_session_state"] != tt.wantState {
t.Fatalf("invalid terminal readiness = %+v", readiness)
}
if _, ok := readiness["adapter_session_id"]; ok {
t.Fatalf("adapter_session_id should be absent without active session = %+v", readiness)
}
if _, ok := readiness["last_preflight"]; ok {
t.Fatalf("last_preflight should be absent without active session = %+v", readiness)
}
if _, ok := readiness["no_session_summary"]; ok {
t.Fatalf("no_session_summary should be absent for terminal session history = %+v", readiness)
}
terminalSummary, ok := readiness["terminal_session_summary"].(map[string]any)
if !ok {
t.Fatalf("terminal session summary missing = %+v", readiness)
}
if terminalSummary["adapter_session_id"] != tt.sessionID ||
terminalSummary["schema_version"] != "rap.remote_workspace_adapter_terminal_session_summary.v1" ||
!stringAnySliceContains(terminalSummary["summary_contract"], "adapter_session_id") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "session_state") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "reason") ||
!stringAnySliceContains(terminalSummary["summary_contract"], "controlled_at") ||
!boolMapValue(terminalSummary["summary_features"], "adapter_session_id") ||
!boolMapValue(terminalSummary["summary_features"], "session_state") ||
!boolMapValue(terminalSummary["summary_features"], "reason") ||
!boolMapValue(terminalSummary["summary_features"], "controlled_at") ||
terminalSummary["session_state"] != tt.wantState ||
terminalSummary["reason"] != "unit terminal readiness" ||
terminalSummary["controlled_at"] == "" {
t.Fatalf("invalid terminal session summary = %+v", terminalSummary)
}
})
}
}
func TestRemoteWorkspaceAdapterSessionControlRejectsInvalidRequests(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
@@ -3064,6 +3351,19 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightIsReadOnly(t *testing.T) {
}
if preflight.ResumeFrom != "checkpoint" ||
preflight.ResumeSequence != 2 ||
preflight.DiagnosticState != "ready" ||
preflight.RecommendedAction != "resume_from_cursor" ||
preflight.ActionReason != "cursor_window_available" ||
preflight.OperatorSummary != "consumer cursor can resume from requested window" ||
preflight.OperatorStatus != "ready_to_resume" ||
preflight.OperatorSeverity != "ok" ||
anyInt64(preflight.ActionContext["resume_sequence"]) != 2 ||
anyInt64(preflight.ActionContext["first_retained_sequence"]) != 1 ||
preflight.OperatorSummaryFields["diagnostic_state"] != "ready" ||
preflight.OperatorSummaryFields["recommended_action"] != "resume_from_cursor" ||
preflight.OperatorSummaryFields["operator_status"] != "ready_to_resume" ||
preflight.OperatorSummaryFields["operator_severity"] != "ok" ||
!stringSliceContains(preflight.ActionHints, "resume_from_requested_cursor") ||
preflight.ExpectedAvailableCount != 1 ||
preflight.ExpectedReturnedCount != 1 ||
preflight.ExpectedSkippedCount != 2 ||
@@ -3079,6 +3379,547 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightIsReadOnly(t *testing.T) {
reportAfter["current_session_mailbox_consumer_ack_total"] != reportBefore["current_session_mailbox_consumer_ack_total"] {
t.Fatalf("preflight mutated report before=%+v after=%+v", reportBefore, reportAfter)
}
if reportAfter["mailbox_preflight_total"] != int64(2) ||
reportAfter["mailbox_preflight_ack_total"] != int64(1) ||
reportAfter["mailbox_preflight_checkpoint_total"] != int64(1) ||
reportAfter["last_mailbox_preflight_adapter_session_id"] != sessionID ||
reportAfter["last_mailbox_preflight_consumer_id"] != "rdp-worker-probe" ||
reportAfter["last_mailbox_preflight_resume_from"] != "checkpoint" ||
reportAfter["last_mailbox_preflight_resume_sequence"] != int64(2) ||
reportAfter["last_mailbox_preflight_available_count"] != 1 ||
reportAfter["last_mailbox_preflight_returned_count"] != 1 ||
reportAfter["last_mailbox_preflight_skipped_count"] != 2 ||
reportAfter["current_session_mailbox_preflight_total"] != int64(2) ||
reportAfter["current_session_mailbox_preflight_ack_total"] != int64(1) ||
reportAfter["current_session_mailbox_preflight_checkpoint_total"] != int64(1) ||
mapInt64Value(reportAfter["current_session_mailbox_preflight_operator_status_counts"], "ready_to_resume") != 2 ||
mapInt64Value(reportAfter["current_session_mailbox_preflight_operator_severity_counts"], "ok") != 2 ||
reportAfter["current_session_last_mailbox_preflight_resume_from"] != "checkpoint" ||
reportAfter["current_session_last_mailbox_preflight_resume_sequence"] != int64(2) ||
reportAfter["current_session_last_mailbox_preflight_returned_count"] != 1 ||
reportAfter["current_session_last_mailbox_preflight_recommended_action"] != "resume_from_cursor" ||
reportAfter["last_mailbox_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
reportAfter["last_mailbox_preflight_operator_status"] != "ready_to_resume" ||
reportAfter["last_mailbox_preflight_operator_severity"] != "ok" ||
reportAfter["current_session_last_mailbox_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
reportAfter["current_session_last_mailbox_preflight_operator_status"] != "ready_to_resume" ||
reportAfter["current_session_last_mailbox_preflight_operator_severity"] != "ok" {
t.Fatalf("invalid preflight telemetry report = %+v", reportAfter)
}
readiness, ok := reportAfter["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from report = %+v", reportAfter)
}
if readiness["mailbox_preflight_total"] != int64(2) ||
readiness["last_preflight_consumer_id"] != "rdp-worker-probe" ||
readiness["last_preflight_resume_from"] != "checkpoint" ||
readiness["last_preflight_resume_sequence"] != int64(2) ||
readiness["last_preflight_returned_count"] != 1 ||
readiness["last_preflight_skipped_count"] != 2 ||
readiness["last_preflight_recommended_action"] != "resume_from_cursor" ||
readiness["last_preflight_action_reason"] != "cursor_window_available" ||
readiness["last_preflight_operator_summary"] != "consumer cursor can resume from requested window" ||
readiness["last_preflight_operator_status"] != "ready_to_resume" ||
readiness["last_preflight_operator_severity"] != "ok" ||
mapInt64Value(readiness["mailbox_preflight_operator_status_counts"], "ready_to_resume") != 2 ||
mapInt64Value(readiness["mailbox_preflight_operator_severity_counts"], "ok") != 2 ||
readiness["preflight_attention_status"] != "clean" ||
readiness["preflight_attention_reason"] != "no_resync_required_preflight_observed" {
t.Fatalf("invalid preflight readiness = %+v", readiness)
}
lastPreflight, ok := readiness["last_preflight"].(map[string]any)
if !ok {
t.Fatalf("last preflight rollup missing from readiness = %+v", readiness)
}
if lastPreflight["consumer_id"] != "rdp-worker-probe" ||
lastPreflight["diagnostics_schema_version"] != "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1" ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "retained_window") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "remediation_checklist") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "attention") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "operator_counts") ||
!boolMapValue(lastPreflight["diagnostics_features"], "retained_window") ||
!boolMapValue(lastPreflight["diagnostics_features"], "remediation_checklist") ||
!boolMapValue(lastPreflight["diagnostics_features"], "attention") ||
!boolMapValue(lastPreflight["diagnostics_features"], "operator_counts") ||
lastPreflight["resume_from"] != "checkpoint" ||
lastPreflight["operator_status"] != "ready_to_resume" ||
lastPreflight["operator_severity"] != "ok" ||
lastPreflight["recommended_action"] != "resume_from_cursor" ||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "resume_from_requested_cursor", true, true) ||
lastPreflight["remediation_checklist_status"] != "ready" ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "required_count")) != 1 ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "satisfied_count")) != 1 ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "pending_count")) != 0 ||
mapInt64Value(lastPreflight["operator_status_counts"], "ready_to_resume") != 2 ||
mapInt64Value(lastPreflight["operator_severity_counts"], "ok") != 2 ||
lastPreflight["preflight_attention_status"] != "clean" ||
lastPreflight["preflight_attention_reason"] != "no_resync_required_preflight_observed" ||
anyInt64(lastPreflight["resume_sequence"]) != 2 ||
anyInt64(lastPreflight["first_retained_sequence"]) != 1 ||
anyInt64(lastPreflight["last_retained_sequence"]) != 3 ||
anyInt64(lastPreflight["mailbox_dropped_total"]) != 0 ||
anyInt64(lastPreflight["mailbox_preflight_total"]) != 2 {
t.Fatalf("invalid last preflight rollup = %+v", lastPreflight)
}
}
func TestRemoteWorkspaceAdapterSessionReadinessBeforePreflight(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
sessionID := "rap-rw-adapter-session-a0a0a0a0a0a0a0a0a0a0a0a0"
delivery := RemoteWorkspaceFrameBatchDelivery{
ClusterID: "cluster-1",
ChannelID: "channel-rw",
ResourceID: "workspace-before-preflight",
ServiceClass: FabricServiceClassRemoteWorkspace,
ChannelClass: FabricServiceChannelInteractive,
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
AdapterSessionID: sessionID,
Frames: []RemoteWorkspaceFrameProbeRecord{{
Channel: "display",
Direction: "adapter_to_client",
Droppable: true,
}},
}
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
t.Fatalf("accept frame batch: %v", err)
}
report := sink.Report(time.Now().UTC())
if report["mailbox_preflight_total"] != int64(0) ||
report["current_session_mailbox_preflight_total"] != int64(0) {
t.Fatalf("unexpected preflight totals before preflight = %+v", report)
}
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
}
if readiness["adapter_session_id"] != sessionID ||
readiness["mailbox_preflight_total"] != int64(0) ||
readiness["preflight_attention_status"] != "unknown" ||
readiness["preflight_attention_reason"] != "no_preflight_observed" {
t.Fatalf("invalid no-preflight readiness = %+v", readiness)
}
if _, ok := readiness["last_preflight"]; ok {
t.Fatalf("last preflight rollup should be absent before preflight = %+v", readiness["last_preflight"])
}
if readiness["last_preflight_diagnostic_state"] != "" ||
readiness["last_preflight_recommended_action"] != "" ||
len(readiness["last_preflight_action_hints"].([]string)) != 0 {
t.Fatalf("last preflight flat fields should be empty before preflight = %+v", readiness)
}
}
func TestRemoteWorkspaceAdapterSessionMailboxPreflightReportsStaleCursorGap(t *testing.T) {
sink := NewRemoteWorkspaceFrameProbeSink()
sessionID := "rap-rw-adapter-session-adadadadadadadadadadadad"
delivery := RemoteWorkspaceFrameBatchDelivery{
ClusterID: "cluster-1",
ChannelID: "channel-rw",
ResourceID: "workspace-stale-0",
ServiceClass: FabricServiceClassRemoteWorkspace,
ChannelClass: FabricServiceChannelInteractive,
AdapterContractID: "rap.rdp_worker.remote_workspace_adapter_contract_probe.v1",
AdapterSessionID: sessionID,
Frames: []RemoteWorkspaceFrameProbeRecord{{
Channel: "display",
Direction: "adapter_to_client",
Droppable: true,
}},
}
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
t.Fatalf("accept initial frame batch: %v", err)
}
server := httptest.NewServer(Server{RemoteWorkspaceFrameSink: sink}.Handler())
defer server.Close()
resp, err := http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox?consumer_id=rdp-worker-probe&ack_sequence=1&limit=1")
if err != nil {
t.Fatalf("seed ack cursor: %v", err)
}
resp.Body.Close()
for i := 1; i <= DefaultRemoteWorkspaceAdapterMailboxCapacity+2; i++ {
delivery.ResourceID = fmt.Sprintf("workspace-stale-%d", i)
if _, err := sink.AcceptRemoteWorkspaceFrameBatchProbe(context.Background(), delivery); err != nil {
t.Fatalf("accept overflow frame batch %d: %v", i, err)
}
}
resp, err = http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox/preflight?consumer_id=rdp-worker-probe&resume_from=ack&limit=3")
if err != nil {
t.Fatalf("get stale preflight: %v", err)
}
defer resp.Body.Close()
var preflight RemoteWorkspaceAdapterMailboxPreflightSnapshot
if err := json.NewDecoder(resp.Body).Decode(&preflight); err != nil {
t.Fatalf("decode stale preflight: %v", err)
}
if preflight.ResumeFrom != "ack" ||
preflight.ResumeSequence != 1 ||
preflight.MailboxDepth != DefaultRemoteWorkspaceAdapterMailboxCapacity ||
preflight.MailboxDropped != 3 ||
preflight.ExpectedAvailableCount != DefaultRemoteWorkspaceAdapterMailboxCapacity ||
preflight.ExpectedReturnedCount != 3 ||
preflight.ExpectedSkippedCount != 0 ||
preflight.FirstExpectedSequence != 4 ||
preflight.LastExpectedSequence != 6 ||
preflight.FirstRetainedSequence != 4 ||
preflight.LastRetainedSequence != 19 ||
preflight.DiagnosticState != "stale_cursor_gap" ||
!preflight.StaleCursor ||
preflight.MissingDroppedCount != 2 ||
preflight.RecommendedAction != "reset_consumer_and_resync" ||
preflight.ActionReason != "consumer_cursor_before_first_retained_sequence" ||
preflight.OperatorSummary != "stale cursor gap: reset consumer and resync before resume" ||
preflight.OperatorStatus != "resync_required" ||
preflight.OperatorSeverity != "warn" ||
anyInt64(preflight.ActionContext["resume_sequence"]) != 1 ||
anyInt64(preflight.ActionContext["first_retained_sequence"]) != 4 ||
anyInt64(preflight.ActionContext["missing_dropped_count"]) != 2 ||
preflight.OperatorSummaryFields["diagnostic_state"] != "stale_cursor_gap" ||
preflight.OperatorSummaryFields["recommended_action"] != "reset_consumer_and_resync" ||
preflight.OperatorSummaryFields["operator_status"] != "resync_required" ||
preflight.OperatorSummaryFields["operator_severity"] != "warn" ||
anyInt64(preflight.OperatorSummaryFields["missing_dropped_count"]) != 2 ||
!stringSliceContains(preflight.ActionHints, "reset_consumer_cursor") ||
!stringSliceContains(preflight.ActionHints, "request_full_adapter_resync") ||
!stringSliceContains(preflight.ActionHints, "resume_from_checkpoint_after_resync") {
t.Fatalf("stale preflight = %+v", preflight)
}
resp, err = http.Get(server.URL + "/mesh/v1/remote-workspace/adapter-sessions/" + sessionID + "/mailbox/preflight?consumer_id=rdp-worker-probe&resume_from=ack&limit=3")
if err != nil {
t.Fatalf("get repeated stale preflight: %v", err)
}
resp.Body.Close()
report := sink.Report(time.Now().UTC())
if report["last_mailbox_preflight_diagnostic_state"] != "stale_cursor_gap" ||
report["last_mailbox_preflight_stale_cursor"] != true ||
report["last_mailbox_preflight_missing_dropped_count"] != 2 ||
report["last_mailbox_preflight_recommended_action"] != "reset_consumer_and_resync" ||
report["last_mailbox_preflight_action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
report["last_mailbox_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
report["last_mailbox_preflight_operator_status"] != "resync_required" ||
report["last_mailbox_preflight_operator_severity"] != "warn" ||
report["current_session_last_mailbox_preflight_diagnostic_state"] != "stale_cursor_gap" ||
report["current_session_last_mailbox_preflight_stale_cursor"] != true ||
report["current_session_last_mailbox_preflight_missing_dropped_count"] != 2 ||
report["current_session_last_mailbox_preflight_recommended_action"] != "reset_consumer_and_resync" ||
report["current_session_last_mailbox_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
report["current_session_last_mailbox_preflight_operator_status"] != "resync_required" ||
report["current_session_last_mailbox_preflight_operator_severity"] != "warn" ||
mapInt64Value(report["current_session_mailbox_preflight_operator_status_counts"], "resync_required") != 2 ||
mapInt64Value(report["current_session_mailbox_preflight_operator_severity_counts"], "warn") != 2 {
t.Fatalf("stale preflight report = %+v", report)
}
readiness, ok := report["adapter_runtime_readiness"].(map[string]any)
if !ok {
t.Fatalf("adapter runtime readiness missing from report = %+v", report)
}
if readiness["last_preflight_diagnostic_state"] != "stale_cursor_gap" ||
readiness["last_preflight_stale_cursor"] != true ||
readiness["last_preflight_missing_dropped_count"] != 2 ||
readiness["last_preflight_recommended_action"] != "reset_consumer_and_resync" ||
readiness["last_preflight_action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
readiness["last_preflight_operator_summary"] != "stale cursor gap: reset consumer and resync before resume" ||
readiness["last_preflight_operator_status"] != "resync_required" ||
readiness["last_preflight_operator_severity"] != "warn" ||
mapInt64Value(readiness["mailbox_preflight_operator_status_counts"], "resync_required") != 2 ||
mapInt64Value(readiness["mailbox_preflight_operator_severity_counts"], "warn") != 2 ||
readiness["preflight_attention_status"] != "repeated_resync_required" ||
readiness["preflight_attention_reason"] != "resync_required_preflight_repeated" {
t.Fatalf("stale preflight readiness = %+v", readiness)
}
lastPreflight, ok := readiness["last_preflight"].(map[string]any)
if !ok {
t.Fatalf("stale last preflight rollup missing from readiness = %+v", readiness)
}
if lastPreflight["diagnostic_state"] != "stale_cursor_gap" ||
lastPreflight["diagnostics_schema_version"] != "rap.remote_workspace_adapter_mailbox_preflight_diagnostics.v1" ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "retained_window") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "remediation_checklist") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "attention") ||
!stringAnySliceContains(lastPreflight["diagnostics_contract"], "operator_counts") ||
!boolMapValue(lastPreflight["diagnostics_features"], "retained_window") ||
!boolMapValue(lastPreflight["diagnostics_features"], "remediation_checklist") ||
!boolMapValue(lastPreflight["diagnostics_features"], "attention") ||
!boolMapValue(lastPreflight["diagnostics_features"], "operator_counts") ||
lastPreflight["operator_status"] != "resync_required" ||
lastPreflight["operator_severity"] != "warn" ||
lastPreflight["recommended_action"] != "reset_consumer_and_resync" ||
lastPreflight["action_reason"] != "consumer_cursor_before_first_retained_sequence" ||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "reset_consumer_cursor", true, false) ||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "request_full_adapter_resync", true, false) ||
!preflightChecklistContains(lastPreflight["remediation_checklist"], "resume_from_checkpoint_after_resync", true, false) ||
lastPreflight["remediation_checklist_status"] != "action_required" ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "required_count")) != 3 ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "satisfied_count")) != 0 ||
anyInt64(preflightChecklistCountsValue(lastPreflight["remediation_checklist_counts"], "pending_count")) != 3 ||
mapInt64Value(lastPreflight["operator_status_counts"], "resync_required") != 2 ||
mapInt64Value(lastPreflight["operator_severity_counts"], "warn") != 2 ||
lastPreflight["preflight_attention_status"] != "repeated_resync_required" ||
lastPreflight["preflight_attention_reason"] != "resync_required_preflight_repeated" ||
anyInt64(lastPreflight["missing_dropped_count"]) != 2 ||
anyInt64(lastPreflight["first_retained_sequence"]) != 4 ||
anyInt64(lastPreflight["last_retained_sequence"]) != 19 ||
anyInt64(lastPreflight["mailbox_dropped_total"]) != 3 ||
anyInt64(lastPreflight["resume_sequence"]) != 1 {
t.Fatalf("invalid stale last preflight rollup = %+v", lastPreflight)
}
}
func preflightChecklistCountsValue(value any, key string) any {
switch counts := value.(type) {
case map[string]any:
return counts[key]
default:
return nil
}
}
func mapInt64Value(value any, key string) int64 {
switch items := value.(type) {
case map[string]int64:
return items[key]
case map[string]any:
return anyInt64(items[key])
default:
return 0
}
}
func boolMapValue(value any, key string) bool {
switch items := value.(type) {
case map[string]bool:
return items[key]
case map[string]any:
item, _ := items[key].(bool)
return item
default:
return false
}
}
func preflightDiagnosticsContractCompatible(rollup map[string]any) bool {
for _, feature := range []string{"retained_window", "remediation_checklist", "attention", "operator_counts"} {
if !stringAnySliceContains(rollup["diagnostics_contract"], feature) || !boolMapValue(rollup["diagnostics_features"], feature) {
return false
}
}
return true
}
func terminalSessionSummaryContractCompatible(summary map[string]any) bool {
for _, feature := range []string{"adapter_session_id", "session_state", "reason", "controlled_at"} {
if !stringAnySliceContains(summary["summary_contract"], feature) || !boolMapValue(summary["summary_features"], feature) {
return false
}
}
return true
}
func noSessionSummaryContractCompatible(summary map[string]any) bool {
for _, feature := range []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"} {
if !stringAnySliceContains(summary["summary_contract"], feature) || !boolMapValue(summary["summary_features"], feature) {
return false
}
}
return true
}
func stringAnySliceContains(value any, want string) bool {
switch items := value.(type) {
case []string:
for _, item := range items {
if item == want {
return true
}
}
case []any:
for _, item := range items {
if item == want {
return true
}
}
}
return false
}
func preflightChecklistContains(value any, step string, required bool, satisfied bool) bool {
switch items := value.(type) {
case []map[string]any:
for _, item := range items {
if item["step"] == step && item["required"] == required && item["satisfied"] == satisfied && item["source_hint"] == true {
return true
}
}
case []any:
for _, raw := range items {
item, ok := raw.(map[string]any)
if !ok {
continue
}
if item["step"] == step && item["required"] == required && item["satisfied"] == satisfied && item["source_hint"] == true {
return true
}
}
}
return false
}
func stringSliceContains(items []string, want string) bool {
for _, item := range items {
if item == want {
return true
}
}
return false
}
func anyInt64(value any) int64 {
switch v := value.(type) {
case int:
return int64(v)
case int64:
return v
case float64:
return int64(v)
default:
return 0
}
}
func TestRemoteWorkspacePreflightDiagnosticsContractCompatibility(t *testing.T) {
compatible := map[string]any{
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
}
if !preflightDiagnosticsContractCompatible(compatible) {
t.Fatalf("expected contract/features to be compatible")
}
tests := []struct {
name string
rollup map[string]any
}{
{
name: "missing contract item",
rollup: map[string]any{
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention"},
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": true},
},
},
{
name: "missing feature flag",
rollup: map[string]any{
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true},
},
},
{
name: "false feature flag",
rollup: map[string]any{
"diagnostics_contract": []string{"retained_window", "remediation_checklist", "attention", "operator_counts"},
"diagnostics_features": map[string]bool{"retained_window": true, "remediation_checklist": true, "attention": true, "operator_counts": false},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if preflightDiagnosticsContractCompatible(tt.rollup) {
t.Fatalf("expected incompatible contract/features for %+v", tt.rollup)
}
})
}
}
func TestRemoteWorkspaceTerminalSessionSummaryContractCompatibility(t *testing.T) {
compatible := map[string]any{
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
}
if !terminalSessionSummaryContractCompatible(compatible) {
t.Fatalf("expected summary contract/features to be compatible")
}
tests := []struct {
name string
summary map[string]any
}{
{
name: "missing contract item",
summary: map[string]any{
"summary_contract": []string{"adapter_session_id", "session_state", "reason"},
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": true},
},
},
{
name: "missing feature flag",
summary: map[string]any{
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true},
},
},
{
name: "false feature flag",
summary: map[string]any{
"summary_contract": []string{"adapter_session_id", "session_state", "reason", "controlled_at"},
"summary_features": map[string]bool{"adapter_session_id": true, "session_state": true, "reason": true, "controlled_at": false},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if terminalSessionSummaryContractCompatible(tt.summary) {
t.Fatalf("expected incompatible summary contract/features for %+v", tt.summary)
}
})
}
}
func TestRemoteWorkspaceNoSessionSummaryContractCompatibility(t *testing.T) {
compatible := map[string]any{
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
}
if !noSessionSummaryContractCompatible(compatible) {
t.Fatalf("expected no-session summary contract/features to be compatible")
}
tests := []struct {
name string
summary map[string]any
}{
{
name: "missing contract item",
summary: map[string]any{
"summary_contract": []string{"status", "diagnostic_state", "active_session_count"},
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": true},
},
},
{
name: "missing feature flag",
summary: map[string]any{
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true},
},
},
{
name: "false feature flag",
summary: map[string]any{
"summary_contract": []string{"status", "diagnostic_state", "active_session_count", "terminal_session_count"},
"summary_features": map[string]bool{"status": true, "diagnostic_state": true, "active_session_count": true, "terminal_session_count": false},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if noSessionSummaryContractCompatible(tt.summary) {
t.Fatalf("expected incompatible no-session summary contract/features for %+v", tt.summary)
}
})
}
}
func TestRemoteWorkspaceAdapterSessionMailboxPreflightRejectsInvalidRequests(t *testing.T) {
@@ -3145,6 +3986,57 @@ func TestRemoteWorkspaceAdapterSessionMailboxPreflightRejectsInvalidRequests(t *
}
}
func TestRemoteWorkspacePreflightAttentionReasonSummaries(t *testing.T) {
tests := []struct {
name string
statusCounts map[string]int64
severityCounts map[string]int64
wantStatus string
wantReason string
}{
{
name: "clean ready",
statusCounts: map[string]int64{"ready_to_resume": 1},
severityCounts: map[string]int64{"ok": 1},
wantStatus: "clean",
wantReason: "no_resync_required_preflight_observed",
},
{
name: "single resync",
statusCounts: map[string]int64{"resync_required": 1},
severityCounts: map[string]int64{"warn": 1},
wantStatus: "needs_attention",
wantReason: "resync_required_preflight_observed",
},
{
name: "repeated resync",
statusCounts: map[string]int64{"resync_required": 2},
severityCounts: map[string]int64{"warn": 2},
wantStatus: "repeated_resync_required",
wantReason: "resync_required_preflight_repeated",
},
{
name: "none observed",
statusCounts: map[string]int64{},
severityCounts: map[string]int64{},
wantStatus: "unknown",
wantReason: "no_preflight_observed",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
status := remoteWorkspacePreflightAttentionStatus(tt.statusCounts, tt.severityCounts)
if status != tt.wantStatus {
t.Fatalf("status=%q want %q", status, tt.wantStatus)
}
reason := remoteWorkspacePreflightAttentionReason(status, tt.statusCounts, tt.severityCounts)
if reason != tt.wantReason {
t.Fatalf("reason=%q want %q", reason, tt.wantReason)
}
})
}
}
func TestFabricServiceChannelVPNPacketIngressHonorsDisabledBackendRelayPolicy(t *testing.T) {
publicKey, privateKey, err := ed25519.GenerateKey(nil)
if err != nil {
@@ -13,7 +13,15 @@ type Supervisor interface {
}
type StubSupervisor struct {
Version string
Version string
RemoteWorkspaceRealAdapter RemoteWorkspaceRealAdapterConfig
}
type RemoteWorkspaceRealAdapterConfig struct {
EnabledRequested bool
Command string
ArgsJSON string
WorkDir string
}
func (s StubSupervisor) Apply(_ context.Context, desired []client.DesiredWorkload) ([]client.WorkloadStatusRequest, error) {
@@ -85,6 +93,7 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
payload["backend_relay_steady_state"] = false
payload["channels"] = remoteWorkspaceAdapterChannels()
payload["frame_batch_contract"] = remoteWorkspaceFrameBatchContract()
payload["real_adapter_supervision"] = remoteWorkspaceRealAdapterSupervisionContract(s.RemoteWorkspaceRealAdapter)
payload["traffic"] = "none"
return client.WorkloadStatusRequest{
ReportedState: "running",
@@ -93,6 +102,20 @@ func (s StubSupervisor) applyOne(workload client.DesiredWorkload) client.Workloa
StatusPayload: payload,
}
}
if serviceType == "rdp-worker" && runtimeMode == "native" && boolConfig(workload.Config, "real_adapter_supervision") {
payload["reason"] = "remote_workspace_real_adapter_supervision_disabled"
payload["execution_mode"] = "real_adapter_supervision_disabled"
payload["service_class"] = "remote_workspace"
payload["traffic"] = "blocked"
payload["payload_traffic"] = "none"
payload["real_adapter_supervision"] = remoteWorkspaceRealAdapterSupervisionContract(s.RemoteWorkspaceRealAdapter)
return client.WorkloadStatusRequest{
ReportedState: "degraded",
RuntimeMode: runtimeMode,
Version: version,
StatusPayload: payload,
}
}
payload["reason"] = "service_runtime_not_implemented"
payload["traffic"] = "blocked"
return client.WorkloadStatusRequest{
@@ -152,6 +175,166 @@ func remoteWorkspaceFrameBatchContract() map[string]any {
}
}
func remoteWorkspaceRealAdapterSupervisionContract(configs ...RemoteWorkspaceRealAdapterConfig) map[string]any {
var config RemoteWorkspaceRealAdapterConfig
if len(configs) > 0 {
config = configs[0]
}
return map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
"enabled": false,
"activation_state": "disabled_until_real_runtime_stage",
"execution_mode": "real_adapter_supervision_disabled",
"payload_traffic": "none",
"process_model": "external_rdp_worker_process",
"config_projection": remoteWorkspaceRealAdapterConfigProjection(config),
"activation_decision": remoteWorkspaceRealAdapterActivationDecision(config),
"process_supervisor_preconditions": remoteWorkspaceRealAdapterProcessSupervisorPreconditions(config),
"process_health_probe": remoteWorkspaceRealAdapterProcessHealthProbe(),
"features": map[string]any{
"config_projection": true,
"activation_decision": true,
"missing_gates": true,
"process_health_probe": true,
"process_health_probe_disabled": true,
"process_supervisor_preconditions": true,
"process_supervisor_start_disabled": true,
"raw_values_redacted": true,
},
"config_env": []string{
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR",
},
"status_contract": []string{
"schema_version",
"enabled",
"activation_state",
"execution_mode",
"payload_traffic",
"process_model",
"config_projection",
"activation_decision",
"process_supervisor_preconditions",
"process_health_probe",
"features",
"config_env",
"status_contract",
},
"guardrails": []string{
"contract_probe_remains_default",
"no_payload_forwarding_until_real_runtime_stage",
"backend_relay_not_steady_state",
"fabric_service_channel_required",
},
}
}
func remoteWorkspaceRealAdapterProcessHealthProbe() map[string]any {
return map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1",
"health_probe_enabled": false,
"reason": "disabled_until_real_runtime_stage",
"payload_traffic": "none",
"probe_model": "external_process_health",
"required_signals": []string{
"process_started",
"process_exit_status",
"adapter_control_channel_ready",
"fabric_service_channel_bound",
"payload_forwarding_contract_ready",
},
"missing_signals": []string{
"process_started",
"process_exit_status",
"adapter_control_channel_ready",
"fabric_service_channel_bound",
"payload_forwarding_contract_ready",
},
}
}
func remoteWorkspaceRealAdapterProcessSupervisorPreconditions(config RemoteWorkspaceRealAdapterConfig) map[string]any {
return map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1",
"process_start_allowed": false,
"reason": "disabled_until_real_runtime_stage",
"command_config_present": strings.TrimSpace(config.Command) != "",
"workdir_config_present": strings.TrimSpace(config.WorkDir) != "",
"args_config_present": strings.TrimSpace(config.ArgsJSON) != "",
"required_checks": []string{
"real_runtime_stage_enabled",
"command_config_validated",
"workdir_config_validated",
"process_identity_policy_bound",
"fabric_service_channel_runtime_ready",
"payload_forwarding_contract_enabled",
"health_probe_contract_enabled",
},
"missing_checks": []string{
"real_runtime_stage_enabled",
"command_config_validated",
"workdir_config_validated",
"process_identity_policy_bound",
"fabric_service_channel_runtime_ready",
"payload_forwarding_contract_enabled",
"health_probe_contract_enabled",
},
}
}
func remoteWorkspaceRealAdapterActivationDecision(config RemoteWorkspaceRealAdapterConfig) map[string]any {
return map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1",
"decision": "blocked",
"reason": "real_runtime_stage_not_enabled",
"enabled_requested": config.EnabledRequested,
"activation_allowed": false,
"payload_traffic": "none",
"required_gates": []string{
"real_runtime_stage_enabled",
"fabric_service_channel_runtime_ready",
"adapter_process_supervisor_enabled",
"payload_forwarding_contract_enabled",
},
"missing_gates": []string{
"real_runtime_stage_enabled",
"fabric_service_channel_runtime_ready",
"adapter_process_supervisor_enabled",
"payload_forwarding_contract_enabled",
},
}
}
func remoteWorkspaceRealAdapterConfigProjection(config RemoteWorkspaceRealAdapterConfig) map[string]any {
return map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1",
"enabled_requested": config.EnabledRequested,
"activation_allowed": false,
"command_present": strings.TrimSpace(config.Command) != "",
"args_json_present": strings.TrimSpace(config.ArgsJSON) != "",
"args_json_shape": remoteWorkspaceArgsJSONShape(config.ArgsJSON),
"workdir_present": strings.TrimSpace(config.WorkDir) != "",
"raw_values_redacted": true,
}
}
func remoteWorkspaceArgsJSONShape(value string) string {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return "absent"
}
switch {
case strings.HasPrefix(trimmed, "["):
return "json_array"
case strings.HasPrefix(trimmed, "{"):
return "json_object"
default:
return "opaque"
}
}
func serviceTrafficMode(serviceType string) string {
switch serviceType {
case "core-mesh":
@@ -130,4 +130,469 @@ func TestStubSupervisorRunsRDPWorkerAdapterContractProbeOnly(t *testing.T) {
frameBatch["service_class"] != "remote_workspace" {
t.Fatalf("unexpected frame batch contract: %#v", frameBatch)
}
realAdapter, ok := statuses[0].StatusPayload["real_adapter_supervision"].(map[string]any)
if !ok {
t.Fatalf("real_adapter_supervision = %#v", statuses[0].StatusPayload["real_adapter_supervision"])
}
if realAdapter["schema_version"] != "rap.remote_workspace_real_adapter_supervision.v1" ||
realAdapter["enabled"] != false ||
realAdapter["activation_state"] != "disabled_until_real_runtime_stage" ||
realAdapter["payload_traffic"] != "none" {
t.Fatalf("unexpected real adapter supervision contract: %#v", realAdapter)
}
if !realAdapterSupervisionContractCompatible(realAdapter) {
t.Fatalf("real adapter supervision contract is not compatible: %#v", realAdapter)
}
}
func TestStubSupervisorKeepsContractProbePrecedenceWhenRealAdapterAlsoRequested(t *testing.T) {
statuses, err := (StubSupervisor{
Version: "test",
RemoteWorkspaceRealAdapter: RemoteWorkspaceRealAdapterConfig{
EnabledRequested: true,
Command: "/opt/rap/bin/rdp-worker",
ArgsJSON: `["--future-probe"]`,
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
},
}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "rdp-worker",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"adapter_contract_probe": true,
"real_adapter_supervision": true,
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "running" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
payload := statuses[0].StatusPayload
if payload["execution_mode"] != "contract_probe" ||
payload["reason"] != "remote_workspace_adapter_contract_probe_ready" ||
payload["traffic"] != "none" {
t.Fatalf("contract probe did not retain precedence: %#v", payload)
}
realAdapter, ok := payload["real_adapter_supervision"].(map[string]any)
if !ok || !realAdapterSupervisionContractCompatible(realAdapter) {
t.Fatalf("real_adapter_supervision = %#v", payload["real_adapter_supervision"])
}
decision := realAdapter["activation_decision"].(map[string]any)
if realAdapter["enabled"] != false ||
decision["decision"] != "blocked" ||
decision["reason"] != "real_runtime_stage_not_enabled" ||
decision["payload_traffic"] != "none" {
t.Fatalf("unexpected activation decision under contract-probe precedence: %#v", realAdapter)
}
}
func TestStubSupervisorKeepsRealAdapterSupervisionDisabled(t *testing.T) {
statuses, err := (StubSupervisor{
Version: "test",
RemoteWorkspaceRealAdapter: RemoteWorkspaceRealAdapterConfig{
EnabledRequested: true,
Command: "/opt/rap/bin/rdp-worker",
ArgsJSON: `["--future-probe"]`,
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
},
}).Apply(context.Background(), []client.DesiredWorkload{
{
ServiceType: "rdp-worker",
DesiredState: "enabled",
RuntimeMode: "native",
Config: map[string]any{
"real_adapter_supervision": true,
},
},
})
if err != nil {
t.Fatalf("apply desired workload: %v", err)
}
if statuses[0].ReportedState != "degraded" {
t.Fatalf("ReportedState = %q", statuses[0].ReportedState)
}
if statuses[0].StatusPayload["reason"] != "remote_workspace_real_adapter_supervision_disabled" ||
statuses[0].StatusPayload["execution_mode"] != "real_adapter_supervision_disabled" ||
statuses[0].StatusPayload["traffic"] != "blocked" ||
statuses[0].StatusPayload["payload_traffic"] != "none" {
t.Fatalf("unexpected real adapter disabled payload: %#v", statuses[0].StatusPayload)
}
realAdapter, ok := statuses[0].StatusPayload["real_adapter_supervision"].(map[string]any)
if !ok || !realAdapterSupervisionContractCompatible(realAdapter) {
t.Fatalf("real adapter supervision contract = %#v", statuses[0].StatusPayload["real_adapter_supervision"])
}
projection, ok := realAdapter["config_projection"].(map[string]any)
if !ok {
t.Fatalf("config_projection = %#v", realAdapter["config_projection"])
}
if realAdapter["enabled"] != false ||
projection["enabled_requested"] != true ||
projection["activation_allowed"] != false ||
projection["command_present"] != true ||
projection["args_json_present"] != true ||
projection["args_json_shape"] != "json_array" ||
projection["workdir_present"] != true ||
projection["raw_values_redacted"] != true {
t.Fatalf("unexpected config projection: %#v", projection)
}
decision, ok := realAdapter["activation_decision"].(map[string]any)
if !ok {
t.Fatalf("activation_decision = %#v", realAdapter["activation_decision"])
}
if decision["decision"] != "blocked" ||
decision["reason"] != "real_runtime_stage_not_enabled" ||
decision["enabled_requested"] != true ||
decision["activation_allowed"] != false ||
decision["payload_traffic"] != "none" {
t.Fatalf("unexpected activation decision: %#v", decision)
}
features, ok := realAdapter["features"].(map[string]any)
if !ok ||
features["config_projection"] != true ||
features["activation_decision"] != true ||
features["process_supervisor_preconditions"] != true ||
features["process_supervisor_start_disabled"] != true ||
features["missing_gates"] != true ||
features["raw_values_redacted"] != true {
t.Fatalf("unexpected real adapter features: %#v", realAdapter["features"])
}
preconditions, ok := realAdapter["process_supervisor_preconditions"].(map[string]any)
if !ok ||
preconditions["schema_version"] != "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1" ||
preconditions["process_start_allowed"] != false ||
preconditions["command_config_present"] != true ||
preconditions["args_config_present"] != true ||
preconditions["workdir_config_present"] != true {
t.Fatalf("unexpected process supervisor preconditions: %#v", realAdapter["process_supervisor_preconditions"])
}
healthProbe, ok := realAdapter["process_health_probe"].(map[string]any)
if !ok ||
healthProbe["schema_version"] != "rap.remote_workspace_real_adapter_process_health_probe.v1" ||
healthProbe["health_probe_enabled"] != false ||
healthProbe["payload_traffic"] != "none" {
t.Fatalf("unexpected process health probe: %#v", realAdapter["process_health_probe"])
}
}
func TestRealAdapterSupervisionContractCompatibility(t *testing.T) {
compatible := remoteWorkspaceRealAdapterSupervisionContract()
if !realAdapterSupervisionContractCompatible(compatible) {
t.Fatalf("expected real adapter supervision contract to be compatible")
}
tests := []struct {
name string
contract map[string]any
}{
{
name: "enabled",
contract: map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
"enabled": true,
"activation_state": "disabled_until_real_runtime_stage",
"payload_traffic": "none",
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR"},
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
"guardrails": []string{"contract_probe_remains_default", "no_payload_forwarding_until_real_runtime_stage", "backend_relay_not_steady_state", "fabric_service_channel_required"},
},
},
{
name: "missing env",
contract: map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
"enabled": false,
"activation_state": "disabled_until_real_runtime_stage",
"payload_traffic": "none",
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED"},
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
"guardrails": []string{"contract_probe_remains_default", "no_payload_forwarding_until_real_runtime_stage", "backend_relay_not_steady_state", "fabric_service_channel_required"},
},
},
{
name: "missing guardrail",
contract: map[string]any{
"schema_version": "rap.remote_workspace_real_adapter_supervision.v1",
"enabled": false,
"activation_state": "disabled_until_real_runtime_stage",
"payload_traffic": "none",
"config_projection": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_config_projection.v1", "activation_allowed": false, "raw_values_redacted": true},
"activation_decision": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_activation_decision.v1", "decision": "blocked", "reason": "real_runtime_stage_not_enabled", "activation_allowed": false, "payload_traffic": "none", "required_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}, "missing_gates": []string{"real_runtime_stage_enabled", "fabric_service_channel_runtime_ready", "adapter_process_supervisor_enabled", "payload_forwarding_contract_enabled"}},
"process_supervisor_preconditions": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1", "process_start_allowed": false, "reason": "disabled_until_real_runtime_stage", "required_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}, "missing_checks": []string{"real_runtime_stage_enabled", "command_config_validated", "workdir_config_validated", "process_identity_policy_bound", "fabric_service_channel_runtime_ready", "payload_forwarding_contract_enabled", "health_probe_contract_enabled"}},
"process_health_probe": map[string]any{"schema_version": "rap.remote_workspace_real_adapter_process_health_probe.v1", "health_probe_enabled": false, "reason": "disabled_until_real_runtime_stage", "payload_traffic": "none", "probe_model": "external_process_health", "required_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}, "missing_signals": []string{"process_started", "process_exit_status", "adapter_control_channel_ready", "fabric_service_channel_bound", "payload_forwarding_contract_ready"}},
"features": map[string]any{"config_projection": true, "activation_decision": true, "missing_gates": true, "process_health_probe": true, "process_health_probe_disabled": true, "process_supervisor_preconditions": true, "process_supervisor_start_disabled": true, "raw_values_redacted": true},
"config_env": []string{"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON", "RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR"},
"status_contract": []string{"schema_version", "enabled", "activation_state", "execution_mode", "payload_traffic", "process_model", "config_projection", "activation_decision", "process_supervisor_preconditions", "process_health_probe", "features", "config_env", "status_contract"},
"guardrails": []string{"contract_probe_remains_default"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if realAdapterSupervisionContractCompatible(tt.contract) {
t.Fatalf("expected incompatible contract for %+v", tt.contract)
}
})
}
}
func TestRealAdapterConfigProjectionCompatibility(t *testing.T) {
tests := []struct {
name string
config RemoteWorkspaceRealAdapterConfig
enabledRequested bool
commandPresent bool
argsJSONPresent bool
argsJSONShape string
workdirPresent bool
}{
{
name: "default empty",
argsJSONShape: "absent",
},
{
name: "requested array args",
config: RemoteWorkspaceRealAdapterConfig{
EnabledRequested: true,
Command: "/opt/rap/bin/rdp-worker",
ArgsJSON: `["--future-probe"]`,
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
},
enabledRequested: true,
commandPresent: true,
argsJSONPresent: true,
argsJSONShape: "json_array",
workdirPresent: true,
},
{
name: "object args shape",
config: RemoteWorkspaceRealAdapterConfig{
ArgsJSON: `{"arg":"value"}`,
},
argsJSONPresent: true,
argsJSONShape: "json_object",
},
{
name: "opaque args shape",
config: RemoteWorkspaceRealAdapterConfig{
ArgsJSON: "--future-probe",
},
argsJSONPresent: true,
argsJSONShape: "opaque",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
contract := remoteWorkspaceRealAdapterSupervisionContract(tt.config)
if !realAdapterSupervisionContractCompatible(contract) {
t.Fatalf("contract is not compatible: %#v", contract)
}
projection := contract["config_projection"].(map[string]any)
if projection["enabled_requested"] != tt.enabledRequested ||
projection["activation_allowed"] != false ||
projection["command_present"] != tt.commandPresent ||
projection["args_json_present"] != tt.argsJSONPresent ||
projection["args_json_shape"] != tt.argsJSONShape ||
projection["workdir_present"] != tt.workdirPresent ||
projection["raw_values_redacted"] != true {
t.Fatalf("unexpected config projection: %#v", projection)
}
})
}
}
func TestRealAdapterProjectionAndActivationDecisionStayAligned(t *testing.T) {
tests := []struct {
name string
config RemoteWorkspaceRealAdapterConfig
enabledRequested bool
}{
{name: "default"},
{
name: "requested",
config: RemoteWorkspaceRealAdapterConfig{
EnabledRequested: true,
Command: "/opt/rap/bin/rdp-worker",
ArgsJSON: `["--future-probe"]`,
WorkDir: "/var/lib/rap-node-agent/rdp-worker",
},
enabledRequested: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
contract := remoteWorkspaceRealAdapterSupervisionContract(tt.config)
projection := contract["config_projection"].(map[string]any)
decision := contract["activation_decision"].(map[string]any)
if projection["enabled_requested"] != decision["enabled_requested"] ||
projection["enabled_requested"] != tt.enabledRequested ||
projection["activation_allowed"] != false ||
decision["activation_allowed"] != false ||
contract["enabled"] != false ||
contract["payload_traffic"] != decision["payload_traffic"] {
t.Fatalf("projection and activation decision are not aligned: contract=%#v", contract)
}
})
}
}
func realAdapterSupervisionContractCompatible(contract map[string]any) bool {
if contract["schema_version"] != "rap.remote_workspace_real_adapter_supervision.v1" ||
contract["enabled"] != false ||
contract["activation_state"] != "disabled_until_real_runtime_stage" ||
contract["payload_traffic"] != "none" {
return false
}
projection, ok := contract["config_projection"].(map[string]any)
if !ok ||
projection["schema_version"] != "rap.remote_workspace_real_adapter_config_projection.v1" ||
projection["activation_allowed"] != false ||
projection["raw_values_redacted"] != true {
return false
}
decision, ok := contract["activation_decision"].(map[string]any)
if !ok ||
decision["schema_version"] != "rap.remote_workspace_real_adapter_activation_decision.v1" ||
decision["decision"] != "blocked" ||
decision["reason"] != "real_runtime_stage_not_enabled" ||
decision["activation_allowed"] != false ||
decision["payload_traffic"] != "none" {
return false
}
for _, item := range []string{
"real_runtime_stage_enabled",
"fabric_service_channel_runtime_ready",
"adapter_process_supervisor_enabled",
"payload_forwarding_contract_enabled",
} {
if !anyStringSliceContains(decision["required_gates"], item) || !anyStringSliceContains(decision["missing_gates"], item) {
return false
}
}
preconditions, ok := contract["process_supervisor_preconditions"].(map[string]any)
if !ok ||
preconditions["schema_version"] != "rap.remote_workspace_real_adapter_process_supervisor_preconditions.v1" ||
preconditions["process_start_allowed"] != false ||
preconditions["reason"] != "disabled_until_real_runtime_stage" {
return false
}
for _, item := range []string{
"real_runtime_stage_enabled",
"command_config_validated",
"workdir_config_validated",
"process_identity_policy_bound",
"fabric_service_channel_runtime_ready",
"payload_forwarding_contract_enabled",
"health_probe_contract_enabled",
} {
if !anyStringSliceContains(preconditions["required_checks"], item) || !anyStringSliceContains(preconditions["missing_checks"], item) {
return false
}
}
healthProbe, ok := contract["process_health_probe"].(map[string]any)
if !ok ||
healthProbe["schema_version"] != "rap.remote_workspace_real_adapter_process_health_probe.v1" ||
healthProbe["health_probe_enabled"] != false ||
healthProbe["reason"] != "disabled_until_real_runtime_stage" ||
healthProbe["payload_traffic"] != "none" ||
healthProbe["probe_model"] != "external_process_health" {
return false
}
for _, item := range []string{
"process_started",
"process_exit_status",
"adapter_control_channel_ready",
"fabric_service_channel_bound",
"payload_forwarding_contract_ready",
} {
if !anyStringSliceContains(healthProbe["required_signals"], item) || !anyStringSliceContains(healthProbe["missing_signals"], item) {
return false
}
}
features, ok := contract["features"].(map[string]any)
if !ok ||
features["config_projection"] != true ||
features["activation_decision"] != true ||
features["missing_gates"] != true ||
features["process_health_probe"] != true ||
features["process_health_probe_disabled"] != true ||
features["process_supervisor_preconditions"] != true ||
features["process_supervisor_start_disabled"] != true ||
features["raw_values_redacted"] != true {
return false
}
for _, item := range []string{
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ENABLED",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_COMMAND",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_ARGS_JSON",
"RAP_REMOTE_WORKSPACE_REAL_ADAPTER_WORKDIR",
} {
if !anyStringSliceContains(contract["config_env"], item) {
return false
}
}
for _, item := range []string{
"schema_version",
"enabled",
"activation_state",
"execution_mode",
"payload_traffic",
"process_model",
"config_projection",
"activation_decision",
"process_supervisor_preconditions",
"process_health_probe",
"features",
"config_env",
"status_contract",
} {
if !anyStringSliceContains(contract["status_contract"], item) {
return false
}
}
for _, item := range []string{
"contract_probe_remains_default",
"no_payload_forwarding_until_real_runtime_stage",
"backend_relay_not_steady_state",
"fabric_service_channel_required",
} {
if !anyStringSliceContains(contract["guardrails"], item) {
return false
}
}
return true
}
func anyStringSliceContains(value any, want string) bool {
switch items := value.(type) {
case []string:
for _, item := range items {
if item == want {
return true
}
}
case []any:
for _, item := range items {
if item == want {
return true
}
}
}
return false
}
@@ -184,6 +184,9 @@ func (g *Gateway) Snapshot() map[string]any {
if !lastRuntimeActivityAt.IsZero() {
out["last_runtime_activity_at"] = lastRuntimeActivityAt.UTC().Format(time.RFC3339Nano)
}
if platform := gatewayPlatformSnapshot(g.InterfaceName, g.RouteCIDR); len(platform) > 0 {
out["platform"] = platform
}
return out
}
@@ -19,6 +19,8 @@ const (
iffNoPI = 0x1000
tunSetIFF = 0x400454ca
ifNameSize = 16
gatewayTunMTU = "1000"
gatewayTCPMSS = "900"
)
type tunDevice struct {
@@ -86,6 +88,9 @@ func configureGatewayInterface(name, addressCIDR, routeCIDR string) error {
if err := runCommand("ip", "addr", "replace", addressCIDR, "dev", name); err != nil {
return err
}
if err := runCommand("ip", "link", "set", "dev", name, "mtu", gatewayTunMTU); err != nil {
return err
}
if err := runCommand("ip", "link", "set", name, "up"); err != nil {
return err
}
@@ -118,11 +123,10 @@ func ensureMasqueradeRules(routeCIDR string) error {
}
func ensureMSSClampRule(interfaceName string) error {
err := ensureIPTablesRule("mangle", "FORWARD", "-i", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
if err == nil {
return nil
if err := ensureIPTablesRule("mangle", "FORWARD", "-i", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--set-mss", gatewayTCPMSS); err != nil {
return err
}
return nil
return ensureIPTablesRule("mangle", "FORWARD", "-o", interfaceName, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--set-mss", gatewayTCPMSS)
}
func defaultIPv4Interface() (string, error) {
@@ -204,3 +208,47 @@ func runCommand(name string, args ...string) error {
}
return nil
}
func gatewayPlatformSnapshot(interfaceName, routeCIDR string) map[string]any {
out := map[string]any{
"os": "linux",
"interface": interfaceName,
"route_cidr": routeCIDR,
}
if value, err := readTrimmedFile("/proc/sys/net/ipv4/ip_forward"); err == nil {
out["ipv4_forward"] = value
}
for _, key := range []string{"all", "default", interfaceName} {
if strings.TrimSpace(key) == "" {
continue
}
if value, err := readTrimmedFile(fmt.Sprintf("/proc/sys/net/ipv4/conf/%s/rp_filter", key)); err == nil {
out["rp_filter_"+key] = value
}
}
if interfaceName != "" {
out["forward_in_rule"] = iptablesRulePresent("filter", "FORWARD", "-i", interfaceName, "-j", "ACCEPT")
out["forward_out_established_rule"] = iptablesRulePresent("filter", "FORWARD", "-o", interfaceName, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT")
}
if routeCIDR != "" {
out["masquerade_rule"] = iptablesRulePresent("nat", "POSTROUTING", "-s", routeCIDR, "-j", "MASQUERADE")
if egress, err := defaultIPv4Interface(); err == nil && egress != "" {
out["default_egress"] = egress
out["egress_masquerade_rule"] = iptablesRulePresent("nat", "POSTROUTING", "-s", routeCIDR, "-o", egress, "-j", "MASQUERADE")
}
}
return out
}
func readTrimmedFile(path string) (string, error) {
payload, err := os.ReadFile(path)
if err != nil {
return "", err
}
return strings.TrimSpace(string(payload)), nil
}
func iptablesRulePresent(table, chain string, rule ...string) bool {
checkArgs := append([]string{"-t", table, "-C", chain}, rule...)
return exec.Command("iptables", checkArgs...).Run() == nil
}
@@ -21,3 +21,11 @@ func (d *tunDevice) Write(packet []byte) (int, error) {
func (d *tunDevice) Close() error {
return nil
}
func gatewayPlatformSnapshot(interfaceName, routeCIDR string) map[string]any {
return map[string]any{
"os": "unsupported",
"interface": interfaceName,
"route_cidr": routeCIDR,
}
}