Improve AI structure binary input diagnostics
CI / python (push) Has been cancelled
CI / rust (push) Has been cancelled

This commit is contained in:
2026-05-22 01:00:26 +03:00
parent 51d52ccf04
commit d93b7cb07e
3 changed files with 107 additions and 9 deletions
@@ -85,6 +85,7 @@ async def html5_ai_structure_run(
effective_project_id=effective_project_id, effective_project_id=effective_project_id,
input_path=input_path, input_path=input_path,
detected_binary_relative_path=binary_match.get("relative_path"), detected_binary_relative_path=binary_match.get("relative_path"),
detected_binary_relative_paths=binary_match.get("binary_relative_paths"),
) )
except HTTPException as error: except HTTPException as error:
return render_html5_ai_structure_error(str(error.detail)) return render_html5_ai_structure_error(str(error.detail))
@@ -262,7 +263,11 @@ def _detect_binary_tree(input_path: Path) -> dict[str, str] | None:
if parseable_files or not binary_files: if parseable_files or not binary_files:
return None return None
first = binary_files[0] first = binary_files[0]
return {"suffix": first.suffix.casefold(), "relative_path": first.relative_to(input_path).as_posix()} return {
"suffix": first.suffix.casefold(),
"relative_path": first.relative_to(input_path).as_posix(),
"binary_relative_paths": [path.relative_to(input_path).as_posix() for path in binary_files],
}
def _normalize_binary_match(value: str | dict[str, str] | None) -> dict[str, str] | None: def _normalize_binary_match(value: str | dict[str, str] | None) -> dict[str, str] | None:
@@ -270,7 +275,7 @@ def _normalize_binary_match(value: str | dict[str, str] | None) -> dict[str, str
return None return None
if isinstance(value, dict): if isinstance(value, dict):
return value return value
return {"suffix": value, "relative_path": ""} return {"suffix": value, "relative_path": "", "binary_relative_paths": []}
def _enum_text(value: object) -> str: def _enum_text(value: object) -> str:
+47 -7
View File
@@ -588,9 +588,14 @@ async def _start_ai_structure_agent_job(
effective_project_id: str, effective_project_id: str,
input_path: str, input_path: str,
detected_binary_relative_path: str | None = None, detected_binary_relative_path: str | None = None,
detected_binary_relative_paths: list[str] | None = None,
) -> AgentImportJob: ) -> AgentImportJob:
settings = _project_settings_or_404(project_id) settings = _project_settings_or_404(project_id)
binary_files = _ai_structure_binary_files(input_path, detected_binary_relative_path=detected_binary_relative_path) binary_files = _ai_structure_binary_files(
input_path,
detected_binary_relative_path=detected_binary_relative_path,
detected_binary_relative_paths=detected_binary_relative_paths,
)
if not binary_files: if not binary_files:
raise HTTPException(status_code=400, detail="Во входном пути не найдены файлы .cf или .cfe.") raise HTTPException(status_code=400, detail="Во входном пути не найдены файлы .cf или .cfe.")
@@ -599,15 +604,27 @@ async def _start_ai_structure_agent_job(
if cf_files and cfe_files: if cf_files and cfe_files:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail="Во входной папке одновременно лежат .cf и .cfe. Укажите конкретный файл, который нужно подготовить для ИИ.", detail=(
"Во входной папке одновременно лежат .cf и .cfe. "
f"Найдены: {_format_binary_file_list(binary_files)}. "
"Укажите конкретный файл, который нужно подготовить для ИИ."
),
) )
source = ImportSourceKind.CF_FILE if cf_files else ImportSourceKind.CFE_FILE source = ImportSourceKind.CF_FILE if cf_files else ImportSourceKind.CFE_FILE
agent_id = _agent_id_for_source(settings, ImportSourceKind.CF_FILE) agent_id = _agent_id_for_source(settings, ImportSourceKind.CF_FILE)
if not agent_id: if not agent_id:
raise HTTPException(status_code=400, detail="В настройках проекта не выбран Windows Agent для CF/CFE.") raise HTTPException(
status_code=400,
detail="В настройках проекта не выбран Windows Agent для CF/CFE. Откройте настройки проекта и укажите `cf_agent_id`.",
)
agent_status = _agent_status_with_liveness(_agent_statuses.get(agent_id, AgentStatus(agent_id=agent_id))) agent_status = _agent_status_with_liveness(_agent_statuses.get(agent_id, AgentStatus(agent_id=agent_id)))
if agent_status.status != "online": if agent_status.status != "online":
raise HTTPException(status_code=409, detail=f"Windows Agent {agent_id} сейчас офлайн. Запустите агент и повторите.") last_seen = str(agent_status.last_seen_at or "").strip()
detail = f"Windows Agent {agent_id} сейчас офлайн."
if last_seen:
detail += f" Последний heartbeat: {last_seen}."
detail += " Запустите агент и повторите."
raise HTTPException(status_code=409, detail=detail)
agent = settings.agent if isinstance(settings.agent, dict) else {} agent = settings.agent if isinstance(settings.agent, dict) else {}
metadata: dict[str, Any] = { metadata: dict[str, Any] = {
@@ -620,7 +637,10 @@ async def _start_ai_structure_agent_job(
if len(cf_files) != 1: if len(cf_files) != 1:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail="Для прямого разбора .cf укажите один конкретный файл .cf, а не папку с несколькими конфигурациями.", detail=(
"Для прямого разбора .cf укажите один конкретный файл .cf, "
f"а не папку с несколькими конфигурациями. Найдены: {_format_binary_file_list(cf_files)}."
),
) )
local_path = str(cf_files[0]) local_path = str(cf_files[0])
metadata["input_mode"] = "cf_file" metadata["input_mode"] = "cf_file"
@@ -628,7 +648,10 @@ async def _start_ai_structure_agent_job(
if len(cfe_files) != 1: if len(cfe_files) != 1:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail="Для прямого разбора расширения укажите один конкретный файл .cfe, а не папку с несколькими расширениями.", detail=(
"Для прямого разбора расширения укажите один конкретный файл .cfe, "
f"а не папку с несколькими расширениями. Найдены: {_format_binary_file_list(cfe_files)}."
),
) )
cfe_file = cfe_files[0] cfe_file = cfe_files[0]
local_path = str(cfe_file) local_path = str(cfe_file)
@@ -648,10 +671,19 @@ async def _start_ai_structure_agent_job(
) )
def _ai_structure_binary_files(raw_input_path: str, detected_binary_relative_path: str | None = None) -> list[Path]: def _ai_structure_binary_files(
raw_input_path: str,
detected_binary_relative_path: str | None = None,
detected_binary_relative_paths: list[str] | None = None,
) -> list[Path]:
lowered = raw_input_path.strip().casefold() lowered = raw_input_path.strip().casefold()
if lowered.endswith(".cf") or lowered.endswith(".cfe"): if lowered.endswith(".cf") or lowered.endswith(".cfe"):
return [Path(raw_input_path)] return [Path(raw_input_path)]
if detected_binary_relative_paths:
return [
Path(ntpath.join(raw_input_path, relative_path.replace("/", "\\")))
for relative_path in detected_binary_relative_paths
]
if detected_binary_relative_path: if detected_binary_relative_path:
windows_path = ntpath.join(raw_input_path, detected_binary_relative_path.replace("/", "\\")) windows_path = ntpath.join(raw_input_path, detected_binary_relative_path.replace("/", "\\"))
return [Path(windows_path)] return [Path(windows_path)]
@@ -663,6 +695,14 @@ def _ai_structure_binary_files(raw_input_path: str, detected_binary_relative_pat
return sorted(path for path in input_path.rglob("*") if path.is_file() and path.suffix.casefold() in {".cf", ".cfe"}) return sorted(path for path in input_path.rglob("*") if path.is_file() and path.suffix.casefold() in {".cf", ".cfe"})
def _format_binary_file_list(paths: list[Path]) -> str:
visible = [path.name for path in paths[:5]]
result = ", ".join(visible)
if len(paths) > 5:
result += f" и еще {len(paths) - 5}"
return result
def _cancel_stale_extension_install_jobs(project_id: str, selected_agent_id: str) -> None: def _cancel_stale_extension_install_jobs(project_id: str, selected_agent_id: str) -> None:
now = _current_timestamp() now = _current_timestamp()
for job in list(_agent_import_jobs.values()): for job in list(_agent_import_jobs.values()):
+53
View File
@@ -2015,9 +2015,62 @@ def test_html5_ai_structure_routes_unc_directory_with_cf_through_windows_agent(m
assert copied_targets assert copied_targets
assert started["input_path"] == r"\\192.168.220.200\mst\1c\MARKA\CODEX\CF" assert started["input_path"] == r"\\192.168.220.200\mst\1c\MARKA\CODEX\CF"
assert started["detected_binary_relative_path"] == "base.cf" assert started["detected_binary_relative_path"] == "base.cf"
assert started["detected_binary_relative_paths"] == ["base.cf"]
assert "agent-import-test" in saved_runs assert "agent-import-test" in saved_runs
def test_html5_ai_structure_reports_multiple_binary_files_in_directory(tmp_path: Path):
first = tmp_path / "first.cf"
second = tmp_path / "second.cf"
first.write_bytes(b"cf-1")
second.write_bytes(b"cf-2")
client = TestClient(app)
project_id = f"ai-many-{uuid4()}"
agent_id = f"win-agent-{uuid4()}"
settings = client.post(
f"/projects/{project_id}/settings",
json={"name": "AI Many", "structure_source": "CF_FILE", "agent": {"cf_agent_id": agent_id}},
)
assert settings.status_code == 200
heartbeat = client.post("/agent/heartbeat", json={"agent_id": agent_id, "host": "test-host"})
assert heartbeat.status_code == 200
queued = client.post(
f"/html5/projects/{project_id}/ai-structure/run",
data={"project_id": project_id, "input_path": str(tmp_path), "output_path": str(tmp_path / 'out')},
)
assert queued.status_code == 200
assert "один конкретный файл .cf" in queued.text
assert "first.cf" in queued.text
assert "second.cf" in queued.text
def test_html5_ai_structure_reports_offline_agent_with_last_seen(tmp_path: Path):
cf_input = tmp_path / "demo.cf"
cf_input.write_bytes(b"binary-cf")
client = TestClient(app)
project_id = f"ai-offline-{uuid4()}"
agent_id = f"win-agent-{uuid4()}"
settings = client.post(
f"/projects/{project_id}/settings",
json={"name": "AI Offline", "structure_source": "CF_FILE", "agent": {"cf_agent_id": agent_id}},
)
assert settings.status_code == 200
heartbeat = client.post("/agent/heartbeat", json={"agent_id": agent_id, "host": "test-host"})
assert heartbeat.status_code == 200
main._agent_statuses[agent_id].last_seen_at = "2020-01-01T00:00:00+00:00"
queued = client.post(
f"/html5/projects/{project_id}/ai-structure/run",
data={"project_id": project_id, "input_path": str(cf_input), "output_path": str(tmp_path / 'out')},
)
assert queued.status_code == 200
assert "сейчас офлайн" in queued.text
assert "Последний heartbeat" in queued.text
def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path): def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path):
first = tmp_path / "first" first = tmp_path / "first"
second = tmp_path / "second" second = tmp_path / "second"