Optimize AI structure output for Codex

2026-05-21 20:51:34 +03:00
parent e86f6be385
commit cbcfcc1741
4 changed files with 180 additions and 4 deletions
@@ -1,6 +1,7 @@
 from __future__ import annotations
 import json
 import re
 from pathlib import Path
 from typing import Any
@@ -44,7 +45,8 @@ def prepare_ai_structure(
    else:
        diagnostics.append("No 1C metadata/XML/BSL files or .cf/.cfe binaries were found.")
-    manifest = _manifest(project_id, input_path, output_path, files, snapshot, normalized, diagnostics, binaries)
+    codex_root = output_path / _codex_folder_name(project_id)
    manifest = _manifest(project_id, input_path, output_path, codex_root, files, snapshot, normalized, diagnostics, binaries)
    _write_json(output_path / "manifest.json", manifest)
    _write_json(output_path / "source_inventory.json", {"files": files})
    if snapshot is not None:
@@ -56,6 +58,7 @@ def prepare_ai_structure(
        _write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
    _write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
    _write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
    _write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable)
    return manifest
@@ -75,6 +78,7 @@ def _manifest(
    project_id: str,
    input_path: Path,
    output_path: Path,
    codex_root: Path,
    files: list[dict[str, Any]],
    snapshot: SirSnapshot | None,
    normalized: NormalizedProject | None,
@@ -86,6 +90,8 @@ def _manifest(
        "project_id": project_id,
        "input_path": str(input_path),
        "output_path": str(output_path),
        "codex_package_path": str(codex_root),
        "codex_package_folder": codex_root.name,
        "status": "ready" if snapshot is not None or normalized is not None else "export_required",
        "files_count": len(files),
        "binary_1c_files": binaries,
@@ -114,7 +120,7 @@ def _manifest(
 def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | None) -> list[str]:
-    artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md"]
+    artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md", "codex_package"]
    if snapshot is not None:
        artifacts.extend(["sir_snapshot.json", "ai_objects.json", "ai_modules.json", "ai_edges.json"])
    if normalized is not None:
@@ -122,6 +128,163 @@ def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | Non
    return artifacts
 def _codex_folder_name(project_id: str) -> str:
    safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", project_id).strip("-._") or "project"
    return f"codex-1c-context-{safe}"
 def _write_codex_package(
    root: Path,
    manifest: dict[str, Any],
    files: list[dict[str, Any]],
    snapshot: SirSnapshot | None,
    normalized: NormalizedProject | None,
    binaries: list[dict[str, Any]],
    parseable: bool,
 ) -> None:
    (root / "context").mkdir(parents=True, exist_ok=True)
    (root / "indexes").mkdir(parents=True, exist_ok=True)
    (root / "objects").mkdir(parents=True, exist_ok=True)
    (root / "modules").mkdir(parents=True, exist_ok=True)
    (root / "raw").mkdir(parents=True, exist_ok=True)
    _write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
    _write_text(root / "README.md", _codex_readme_markdown(manifest))
    _write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
    _write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
    _write_json(root / "indexes" / "manifest.json", manifest)
    _write_json(root / "indexes" / "source-inventory.json", {"files": files})
    if snapshot is not None:
        (root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
        objects = _ai_objects(snapshot)
        modules = _ai_modules(snapshot)
        _write_json(root / "indexes" / "objects.json", objects)
        _write_json(root / "indexes" / "modules.json", modules)
        _write_json(root / "indexes" / "edges.json", [edge.model_dump(mode="json") for edge in snapshot.edges])
        _write_object_markdown_files(root / "objects", objects)
        _write_module_markdown_files(root / "modules", modules)
    if normalized is not None:
        _write_json(root / "raw" / "normalized_project.json", normalized.model_dump(mode="json"))
        _write_text(root / "context" / "metadata-tree.md", _normalized_tree_markdown(normalized))
        _write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
 def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
    return f"""# AGENTS.md for 1C context package
 This folder is generated by SFERA for Codex.
 ## How to use this folder
 - Treat this package as read-only context for project `{manifest['project_id']}`.
 - Start with `README.md` and `context/project-overview.md`.
 - Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
 - Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
 - 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
 - When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
 - If `status` is `export_required`, first export `.cf/.cfe` through 1C Designer/Windows Agent and regenerate this package from the exported files.
 ## Important files
 - `context/project-overview.md` - compact human context.
 - `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
 - `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
 - `objects/*.md` - object-level summaries.
 - `modules/*.md` - module-level summaries.
 - `raw/*.json` - full raw SFERA model.
 """
 def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
    snapshot = manifest.get("snapshot") or {}
    normalized = manifest.get("normalized") or {}
    lines = [
        f"# Codex 1C Context: {manifest['project_id']}",
        "",
        f"- Status: `{manifest['status']}`",
        f"- Source: `{manifest['input_path']}`",
        f"- Files scanned: {manifest['files_count']}",
        f"- SIR nodes: {snapshot.get('nodes', 0)}",
        f"- SIR edges: {snapshot.get('edges', 0)}",
        f"- Normalized objects: {normalized.get('objects', 0)}",
        "",
        "Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
    ]
    if manifest.get("diagnostics"):
        lines.extend(["", "## Diagnostics"])
        lines.extend(f"- {item}" for item in manifest["diagnostics"])
    return "\n".join(lines) + "\n"
 def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
    for item in objects[:1000]:
        filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
        _write_text(root / filename, _object_markdown(item))
 def _write_module_markdown_files(root: Path, modules: list[dict[str, Any]]) -> None:
    for item in modules[:1000]:
        filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "module")) + ".md"
        _write_text(root / filename, _module_markdown(item))
 def _safe_context_filename(value: str) -> str:
    safe = re.sub(r"[^A-Za-zА-Яа-яЁё0-9_.-]+", "_", value).strip("._")
    return (safe or "item")[:140]
 def _object_markdown(item: dict[str, Any]) -> str:
    return "\n".join(
        [
            f"# {item.get('qualified_name') or item.get('name')}",
            "",
            f"- Kind: `{item.get('kind')}`",
            f"- Name: `{item.get('name')}`",
            f"- Lineage: `{item.get('lineage_id')}`",
            f"- Semantic: `{item.get('semantic_id')}`",
            f"- Source: `{item.get('source')}`",
            "",
            "## Attributes",
            "```json",
            json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str),
            "```",
        ]
    ) + "\n"
 def _module_markdown(item: dict[str, Any]) -> str:
    return "\n".join(
        [
            f"# {item.get('qualified_name') or item.get('name')}",
            "",
            f"- Name: `{item.get('name')}`",
            f"- Lineage: `{item.get('lineage_id')}`",
            f"- Source: `{item.get('source')}`",
            "",
            "## Module Attributes",
            "```json",
            json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str),
            "```",
        ]
    ) + "\n"
 def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
    lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
    for group in normalized.configuration.groups:
        lines.append(f"## {group.name}")
        if not group.objects:
            lines.append("- нет объектов")
            continue
        for item in group.objects[:500]:
            lines.append(f"- `{item.qualified_name}` ({item.object_kind})")
            for form in item.forms[:20]:
                lines.append(f"  - form: `{form.name}`")
            for command in item.commands[:20]:
                lines.append(f"  - command: `{command.name}`")
        lines.append("")
    return "\n".join(lines)
 def _ai_objects(snapshot: SirSnapshot) -> list[dict[str, Any]]:
    return [
        {
@@ -69,8 +69,9 @@ def render_html5_ai_structure_result(result: dict | None) -> str:
    <section class="ai-structure-result" data-html5-ai-structure-status="{escape(str(result.get('status', '')))}">
      <div class="access-plan-head">
        <span class="status-pill">{escape(str(result.get("status", "")))}</span>
-        <strong>{escape(str(result.get("output_path", "")))}</strong>
+        <strong>{escape(str(result.get("codex_package_folder") or result.get("output_path", "")))}</strong>
      </div>
      <p class="object-summary">Папка для переноса в Codex: {escape(str(result.get("codex_package_path", "")))}</p>
      <dl class="setup-metrics">
        <div><dt>Файлы</dt><dd>{escape(str(result.get("files_count", 0)))}</dd></div>
        <div><dt>Nodes</dt><dd>{escape(str(snapshot.get("nodes", 0)))}</dd></div>
@@ -857,6 +857,8 @@ class AiStructurePrepareResponse(BaseModel):
    project_id: str
    input_path: str
    output_path: str
    codex_package_path: str
    codex_package_folder: str
    status: str
    files_count: int = 0
    binary_1c_files: list[dict] = Field(default_factory=list)
@@ -1702,9 +1702,17 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
    payload = response.json()
    assert payload["status"] == "ready"
    assert payload["snapshot"]["nodes"] >= 2
    codex_package = output / payload["codex_package_folder"]
    assert (output / "manifest.json").exists()
    assert (output / "normalized_project.json").exists()
    assert (output / "sir_snapshot.json").exists()
    assert (codex_package / "AGENTS.md").exists()
    assert (codex_package / "README.md").exists()
    assert (codex_package / "context" / "project-overview.md").exists()
    assert (codex_package / "indexes" / "objects.json").exists()
    assert (codex_package / "raw" / "normalized_project.json").exists()
    assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
    assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
    assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")
    page = client.get("/html5/projects/ai-demo/ai-structure")
@@ -1714,7 +1722,7 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
        "/html5/projects/ai-demo/ai-structure/run",
        data={"project_id": "ai-demo-html5", "input_path": str(source), "output_path": str(tmp_path / "html5-out")},
    )
-    assert_html5_response_contract(html5_run, "ready", "sir_snapshot.json", "normalized_project.json")
+    assert_html5_response_contract(html5_run, "ready", "codex-1c-context-ai-demo-html5", "sir_snapshot.json", "normalized_project.json")
 def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path):
@@ -1735,6 +1743,8 @@ def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path):
    assert payload["status"] == "export_required"
    assert len(payload["binary_1c_files"]) == 2
    assert "DumpConfigToFiles" in (output / "export_plan.md").read_text(encoding="utf-8")
    assert (output / payload["codex_package_folder"] / "AGENTS.md").exists()
    assert "export_required" in (output / payload["codex_package_folder"] / "README.md").read_text(encoding="utf-8")
 def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path):