From cbcfcc17418f60d18903399cca8437b981761fd8 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Thu, 21 May 2026 20:51:34 +0300 Subject: [PATCH] Optimize AI structure output for Codex --- .../src/api_server/ai_structure_service.py | 167 +++++++++++++++++- .../src/api_server/html5_ai_structure.py | 3 +- services/api-server/src/api_server/main.py | 2 + services/api-server/tests/test_api.py | 12 +- 4 files changed, 180 insertions(+), 4 deletions(-) diff --git a/services/api-server/src/api_server/ai_structure_service.py b/services/api-server/src/api_server/ai_structure_service.py index 2b0a5bc..cc72c13 100644 --- a/services/api-server/src/api_server/ai_structure_service.py +++ b/services/api-server/src/api_server/ai_structure_service.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from pathlib import Path from typing import Any @@ -44,7 +45,8 @@ def prepare_ai_structure( else: diagnostics.append("No 1C metadata/XML/BSL files or .cf/.cfe binaries were found.") - manifest = _manifest(project_id, input_path, output_path, files, snapshot, normalized, diagnostics, binaries) + codex_root = output_path / _codex_folder_name(project_id) + manifest = _manifest(project_id, input_path, output_path, codex_root, files, snapshot, normalized, diagnostics, binaries) _write_json(output_path / "manifest.json", manifest) _write_json(output_path / "source_inventory.json", {"files": files}) if snapshot is not None: @@ -56,6 +58,7 @@ def prepare_ai_structure( _write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json")) _write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable)) + _write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable) return manifest @@ -75,6 +78,7 @@ def _manifest( project_id: str, input_path: Path, output_path: Path, + codex_root: Path, files: list[dict[str, Any]], snapshot: SirSnapshot | None, normalized: NormalizedProject | None, @@ -86,6 +90,8 @@ def _manifest( "project_id": project_id, "input_path": str(input_path), "output_path": str(output_path), + "codex_package_path": str(codex_root), + "codex_package_folder": codex_root.name, "status": "ready" if snapshot is not None or normalized is not None else "export_required", "files_count": len(files), "binary_1c_files": binaries, @@ -114,7 +120,7 @@ def _manifest( def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | None) -> list[str]: - artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md"] + artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md", "codex_package"] if snapshot is not None: artifacts.extend(["sir_snapshot.json", "ai_objects.json", "ai_modules.json", "ai_edges.json"]) if normalized is not None: @@ -122,6 +128,163 @@ def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | Non return artifacts +def _codex_folder_name(project_id: str) -> str: + safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", project_id).strip("-._") or "project" + return f"codex-1c-context-{safe}" + + +def _write_codex_package( + root: Path, + manifest: dict[str, Any], + files: list[dict[str, Any]], + snapshot: SirSnapshot | None, + normalized: NormalizedProject | None, + binaries: list[dict[str, Any]], + parseable: bool, +) -> None: + (root / "context").mkdir(parents=True, exist_ok=True) + (root / "indexes").mkdir(parents=True, exist_ok=True) + (root / "objects").mkdir(parents=True, exist_ok=True) + (root / "modules").mkdir(parents=True, exist_ok=True) + (root / "raw").mkdir(parents=True, exist_ok=True) + _write_text(root / "AGENTS.md", _codex_agents_markdown(manifest)) + _write_text(root / "README.md", _codex_readme_markdown(manifest)) + _write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized)) + _write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable)) + _write_json(root / "indexes" / "manifest.json", manifest) + _write_json(root / "indexes" / "source-inventory.json", {"files": files}) + if snapshot is not None: + (root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot)) + objects = _ai_objects(snapshot) + modules = _ai_modules(snapshot) + _write_json(root / "indexes" / "objects.json", objects) + _write_json(root / "indexes" / "modules.json", modules) + _write_json(root / "indexes" / "edges.json", [edge.model_dump(mode="json") for edge in snapshot.edges]) + _write_object_markdown_files(root / "objects", objects) + _write_module_markdown_files(root / "modules", modules) + if normalized is not None: + _write_json(root / "raw" / "normalized_project.json", normalized.model_dump(mode="json")) + _write_text(root / "context" / "metadata-tree.md", _normalized_tree_markdown(normalized)) + _write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json")) + + +def _codex_agents_markdown(manifest: dict[str, Any]) -> str: + return f"""# AGENTS.md for 1C context package + +This folder is generated by SFERA for Codex. + +## How to use this folder + +- Treat this package as read-only context for project `{manifest['project_id']}`. +- Start with `README.md` and `context/project-overview.md`. +- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation. +- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present. +- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file. +- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`. +- If `status` is `export_required`, first export `.cf/.cfe` through 1C Designer/Windows Agent and regenerate this package from the exported files. + +## Important files + +- `context/project-overview.md` - compact human context. +- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject. +- `indexes/*.json` - machine-readable indexes for Codex search and reasoning. +- `objects/*.md` - object-level summaries. +- `modules/*.md` - module-level summaries. +- `raw/*.json` - full raw SFERA model. +""" + + +def _codex_readme_markdown(manifest: dict[str, Any]) -> str: + snapshot = manifest.get("snapshot") or {} + normalized = manifest.get("normalized") or {} + lines = [ + f"# Codex 1C Context: {manifest['project_id']}", + "", + f"- Status: `{manifest['status']}`", + f"- Source: `{manifest['input_path']}`", + f"- Files scanned: {manifest['files_count']}", + f"- SIR nodes: {snapshot.get('nodes', 0)}", + f"- SIR edges: {snapshot.get('edges', 0)}", + f"- Normalized objects: {normalized.get('objects', 0)}", + "", + "Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.", + ] + if manifest.get("diagnostics"): + lines.extend(["", "## Diagnostics"]) + lines.extend(f"- {item}" for item in manifest["diagnostics"]) + return "\n".join(lines) + "\n" + + +def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None: + for item in objects[:1000]: + filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md" + _write_text(root / filename, _object_markdown(item)) + + +def _write_module_markdown_files(root: Path, modules: list[dict[str, Any]]) -> None: + for item in modules[:1000]: + filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "module")) + ".md" + _write_text(root / filename, _module_markdown(item)) + + +def _safe_context_filename(value: str) -> str: + safe = re.sub(r"[^A-Za-zА-Яа-яЁё0-9_.-]+", "_", value).strip("._") + return (safe or "item")[:140] + + +def _object_markdown(item: dict[str, Any]) -> str: + return "\n".join( + [ + f"# {item.get('qualified_name') or item.get('name')}", + "", + f"- Kind: `{item.get('kind')}`", + f"- Name: `{item.get('name')}`", + f"- Lineage: `{item.get('lineage_id')}`", + f"- Semantic: `{item.get('semantic_id')}`", + f"- Source: `{item.get('source')}`", + "", + "## Attributes", + "```json", + json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str), + "```", + ] + ) + "\n" + + +def _module_markdown(item: dict[str, Any]) -> str: + return "\n".join( + [ + f"# {item.get('qualified_name') or item.get('name')}", + "", + f"- Name: `{item.get('name')}`", + f"- Lineage: `{item.get('lineage_id')}`", + f"- Source: `{item.get('source')}`", + "", + "## Module Attributes", + "```json", + json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str), + "```", + ] + ) + "\n" + + +def _normalized_tree_markdown(normalized: NormalizedProject) -> str: + lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""] + for group in normalized.configuration.groups: + lines.append(f"## {group.name}") + if not group.objects: + lines.append("- нет объектов") + continue + for item in group.objects[:500]: + lines.append(f"- `{item.qualified_name}` ({item.object_kind})") + for form in item.forms[:20]: + lines.append(f" - form: `{form.name}`") + for command in item.commands[:20]: + lines.append(f" - command: `{command.name}`") + lines.append("") + return "\n".join(lines) + + def _ai_objects(snapshot: SirSnapshot) -> list[dict[str, Any]]: return [ { diff --git a/services/api-server/src/api_server/html5_ai_structure.py b/services/api-server/src/api_server/html5_ai_structure.py index 4f2f892..2bbb3c2 100644 --- a/services/api-server/src/api_server/html5_ai_structure.py +++ b/services/api-server/src/api_server/html5_ai_structure.py @@ -69,8 +69,9 @@ def render_html5_ai_structure_result(result: dict | None) -> str:
{escape(str(result.get("status", "")))} - {escape(str(result.get("output_path", "")))} + {escape(str(result.get("codex_package_folder") or result.get("output_path", "")))}
+

Папка для переноса в Codex: {escape(str(result.get("codex_package_path", "")))}

Файлы
{escape(str(result.get("files_count", 0)))}
Nodes
{escape(str(snapshot.get("nodes", 0)))}
diff --git a/services/api-server/src/api_server/main.py b/services/api-server/src/api_server/main.py index 0da741d..795576c 100644 --- a/services/api-server/src/api_server/main.py +++ b/services/api-server/src/api_server/main.py @@ -857,6 +857,8 @@ class AiStructurePrepareResponse(BaseModel): project_id: str input_path: str output_path: str + codex_package_path: str + codex_package_folder: str status: str files_count: int = 0 binary_1c_files: list[dict] = Field(default_factory=list) diff --git a/services/api-server/tests/test_api.py b/services/api-server/tests/test_api.py index 83cd6ff..64ff81e 100644 --- a/services/api-server/tests/test_api.py +++ b/services/api-server/tests/test_api.py @@ -1702,9 +1702,17 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path): payload = response.json() assert payload["status"] == "ready" assert payload["snapshot"]["nodes"] >= 2 + codex_package = output / payload["codex_package_folder"] assert (output / "manifest.json").exists() assert (output / "normalized_project.json").exists() assert (output / "sir_snapshot.json").exists() + assert (codex_package / "AGENTS.md").exists() + assert (codex_package / "README.md").exists() + assert (codex_package / "context" / "project-overview.md").exists() + assert (codex_package / "indexes" / "objects.json").exists() + assert (codex_package / "raw" / "normalized_project.json").exists() + assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8") + assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8") assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8") page = client.get("/html5/projects/ai-demo/ai-structure") @@ -1714,7 +1722,7 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path): "/html5/projects/ai-demo/ai-structure/run", data={"project_id": "ai-demo-html5", "input_path": str(source), "output_path": str(tmp_path / "html5-out")}, ) - assert_html5_response_contract(html5_run, "ready", "sir_snapshot.json", "normalized_project.json") + assert_html5_response_contract(html5_run, "ready", "codex-1c-context-ai-demo-html5", "sir_snapshot.json", "normalized_project.json") def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path): @@ -1735,6 +1743,8 @@ def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path): assert payload["status"] == "export_required" assert len(payload["binary_1c_files"]) == 2 assert "DumpConfigToFiles" in (output / "export_plan.md").read_text(encoding="utf-8") + assert (output / payload["codex_package_folder"] / "AGENTS.md").exists() + assert "export_required" in (output / payload["codex_package_folder"] / "README.md").read_text(encoding="utf-8") def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path):