Optimize AI structure output for Codex
CI / python (push) Has been cancelled
CI / rust (push) Has been cancelled

This commit is contained in:
2026-05-21 20:51:34 +03:00
parent e86f6be385
commit cbcfcc1741
4 changed files with 180 additions and 4 deletions
@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import json import json
import re
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -44,7 +45,8 @@ def prepare_ai_structure(
else: else:
diagnostics.append("No 1C metadata/XML/BSL files or .cf/.cfe binaries were found.") diagnostics.append("No 1C metadata/XML/BSL files or .cf/.cfe binaries were found.")
manifest = _manifest(project_id, input_path, output_path, files, snapshot, normalized, diagnostics, binaries) codex_root = output_path / _codex_folder_name(project_id)
manifest = _manifest(project_id, input_path, output_path, codex_root, files, snapshot, normalized, diagnostics, binaries)
_write_json(output_path / "manifest.json", manifest) _write_json(output_path / "manifest.json", manifest)
_write_json(output_path / "source_inventory.json", {"files": files}) _write_json(output_path / "source_inventory.json", {"files": files})
if snapshot is not None: if snapshot is not None:
@@ -56,6 +58,7 @@ def prepare_ai_structure(
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json")) _write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable)) _write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
_write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable)
return manifest return manifest
@@ -75,6 +78,7 @@ def _manifest(
project_id: str, project_id: str,
input_path: Path, input_path: Path,
output_path: Path, output_path: Path,
codex_root: Path,
files: list[dict[str, Any]], files: list[dict[str, Any]],
snapshot: SirSnapshot | None, snapshot: SirSnapshot | None,
normalized: NormalizedProject | None, normalized: NormalizedProject | None,
@@ -86,6 +90,8 @@ def _manifest(
"project_id": project_id, "project_id": project_id,
"input_path": str(input_path), "input_path": str(input_path),
"output_path": str(output_path), "output_path": str(output_path),
"codex_package_path": str(codex_root),
"codex_package_folder": codex_root.name,
"status": "ready" if snapshot is not None or normalized is not None else "export_required", "status": "ready" if snapshot is not None or normalized is not None else "export_required",
"files_count": len(files), "files_count": len(files),
"binary_1c_files": binaries, "binary_1c_files": binaries,
@@ -114,7 +120,7 @@ def _manifest(
def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | None) -> list[str]: def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | None) -> list[str]:
artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md"] artifacts = ["manifest.json", "source_inventory.json", "ai_context.md", "export_plan.md", "codex_package"]
if snapshot is not None: if snapshot is not None:
artifacts.extend(["sir_snapshot.json", "ai_objects.json", "ai_modules.json", "ai_edges.json"]) artifacts.extend(["sir_snapshot.json", "ai_objects.json", "ai_modules.json", "ai_edges.json"])
if normalized is not None: if normalized is not None:
@@ -122,6 +128,163 @@ def _artifacts(snapshot: SirSnapshot | None, normalized: NormalizedProject | Non
return artifacts return artifacts
def _codex_folder_name(project_id: str) -> str:
safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", project_id).strip("-._") or "project"
return f"codex-1c-context-{safe}"
def _write_codex_package(
root: Path,
manifest: dict[str, Any],
files: list[dict[str, Any]],
snapshot: SirSnapshot | None,
normalized: NormalizedProject | None,
binaries: list[dict[str, Any]],
parseable: bool,
) -> None:
(root / "context").mkdir(parents=True, exist_ok=True)
(root / "indexes").mkdir(parents=True, exist_ok=True)
(root / "objects").mkdir(parents=True, exist_ok=True)
(root / "modules").mkdir(parents=True, exist_ok=True)
(root / "raw").mkdir(parents=True, exist_ok=True)
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
_write_text(root / "README.md", _codex_readme_markdown(manifest))
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
_write_json(root / "indexes" / "manifest.json", manifest)
_write_json(root / "indexes" / "source-inventory.json", {"files": files})
if snapshot is not None:
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
objects = _ai_objects(snapshot)
modules = _ai_modules(snapshot)
_write_json(root / "indexes" / "objects.json", objects)
_write_json(root / "indexes" / "modules.json", modules)
_write_json(root / "indexes" / "edges.json", [edge.model_dump(mode="json") for edge in snapshot.edges])
_write_object_markdown_files(root / "objects", objects)
_write_module_markdown_files(root / "modules", modules)
if normalized is not None:
_write_json(root / "raw" / "normalized_project.json", normalized.model_dump(mode="json"))
_write_text(root / "context" / "metadata-tree.md", _normalized_tree_markdown(normalized))
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
return f"""# AGENTS.md for 1C context package
This folder is generated by SFERA for Codex.
## How to use this folder
- Treat this package as read-only context for project `{manifest['project_id']}`.
- Start with `README.md` and `context/project-overview.md`.
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
- If `status` is `export_required`, first export `.cf/.cfe` through 1C Designer/Windows Agent and regenerate this package from the exported files.
## Important files
- `context/project-overview.md` - compact human context.
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
- `objects/*.md` - object-level summaries.
- `modules/*.md` - module-level summaries.
- `raw/*.json` - full raw SFERA model.
"""
def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
snapshot = manifest.get("snapshot") or {}
normalized = manifest.get("normalized") or {}
lines = [
f"# Codex 1C Context: {manifest['project_id']}",
"",
f"- Status: `{manifest['status']}`",
f"- Source: `{manifest['input_path']}`",
f"- Files scanned: {manifest['files_count']}",
f"- SIR nodes: {snapshot.get('nodes', 0)}",
f"- SIR edges: {snapshot.get('edges', 0)}",
f"- Normalized objects: {normalized.get('objects', 0)}",
"",
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
]
if manifest.get("diagnostics"):
lines.extend(["", "## Diagnostics"])
lines.extend(f"- {item}" for item in manifest["diagnostics"])
return "\n".join(lines) + "\n"
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
for item in objects[:1000]:
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
_write_text(root / filename, _object_markdown(item))
def _write_module_markdown_files(root: Path, modules: list[dict[str, Any]]) -> None:
for item in modules[:1000]:
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "module")) + ".md"
_write_text(root / filename, _module_markdown(item))
def _safe_context_filename(value: str) -> str:
safe = re.sub(r"[^A-Za-zА-Яа-яЁё0-9_.-]+", "_", value).strip("._")
return (safe or "item")[:140]
def _object_markdown(item: dict[str, Any]) -> str:
return "\n".join(
[
f"# {item.get('qualified_name') or item.get('name')}",
"",
f"- Kind: `{item.get('kind')}`",
f"- Name: `{item.get('name')}`",
f"- Lineage: `{item.get('lineage_id')}`",
f"- Semantic: `{item.get('semantic_id')}`",
f"- Source: `{item.get('source')}`",
"",
"## Attributes",
"```json",
json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str),
"```",
]
) + "\n"
def _module_markdown(item: dict[str, Any]) -> str:
return "\n".join(
[
f"# {item.get('qualified_name') or item.get('name')}",
"",
f"- Name: `{item.get('name')}`",
f"- Lineage: `{item.get('lineage_id')}`",
f"- Source: `{item.get('source')}`",
"",
"## Module Attributes",
"```json",
json.dumps(item.get("attributes") or {}, ensure_ascii=False, indent=2, default=str),
"```",
]
) + "\n"
def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
for group in normalized.configuration.groups:
lines.append(f"## {group.name}")
if not group.objects:
lines.append("- нет объектов")
continue
for item in group.objects[:500]:
lines.append(f"- `{item.qualified_name}` ({item.object_kind})")
for form in item.forms[:20]:
lines.append(f" - form: `{form.name}`")
for command in item.commands[:20]:
lines.append(f" - command: `{command.name}`")
lines.append("")
return "\n".join(lines)
def _ai_objects(snapshot: SirSnapshot) -> list[dict[str, Any]]: def _ai_objects(snapshot: SirSnapshot) -> list[dict[str, Any]]:
return [ return [
{ {
@@ -69,8 +69,9 @@ def render_html5_ai_structure_result(result: dict | None) -> str:
<section class="ai-structure-result" data-html5-ai-structure-status="{escape(str(result.get('status', '')))}"> <section class="ai-structure-result" data-html5-ai-structure-status="{escape(str(result.get('status', '')))}">
<div class="access-plan-head"> <div class="access-plan-head">
<span class="status-pill">{escape(str(result.get("status", "")))}</span> <span class="status-pill">{escape(str(result.get("status", "")))}</span>
<strong>{escape(str(result.get("output_path", "")))}</strong> <strong>{escape(str(result.get("codex_package_folder") or result.get("output_path", "")))}</strong>
</div> </div>
<p class="object-summary">Папка для переноса в Codex: {escape(str(result.get("codex_package_path", "")))}</p>
<dl class="setup-metrics"> <dl class="setup-metrics">
<div><dt>Файлы</dt><dd>{escape(str(result.get("files_count", 0)))}</dd></div> <div><dt>Файлы</dt><dd>{escape(str(result.get("files_count", 0)))}</dd></div>
<div><dt>Nodes</dt><dd>{escape(str(snapshot.get("nodes", 0)))}</dd></div> <div><dt>Nodes</dt><dd>{escape(str(snapshot.get("nodes", 0)))}</dd></div>
@@ -857,6 +857,8 @@ class AiStructurePrepareResponse(BaseModel):
project_id: str project_id: str
input_path: str input_path: str
output_path: str output_path: str
codex_package_path: str
codex_package_folder: str
status: str status: str
files_count: int = 0 files_count: int = 0
binary_1c_files: list[dict] = Field(default_factory=list) binary_1c_files: list[dict] = Field(default_factory=list)
+11 -1
View File
@@ -1702,9 +1702,17 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
payload = response.json() payload = response.json()
assert payload["status"] == "ready" assert payload["status"] == "ready"
assert payload["snapshot"]["nodes"] >= 2 assert payload["snapshot"]["nodes"] >= 2
codex_package = output / payload["codex_package_folder"]
assert (output / "manifest.json").exists() assert (output / "manifest.json").exists()
assert (output / "normalized_project.json").exists() assert (output / "normalized_project.json").exists()
assert (output / "sir_snapshot.json").exists() assert (output / "sir_snapshot.json").exists()
assert (codex_package / "AGENTS.md").exists()
assert (codex_package / "README.md").exists()
assert (codex_package / "context" / "project-overview.md").exists()
assert (codex_package / "indexes" / "objects.json").exists()
assert (codex_package / "raw" / "normalized_project.json").exists()
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8") assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")
page = client.get("/html5/projects/ai-demo/ai-structure") page = client.get("/html5/projects/ai-demo/ai-structure")
@@ -1714,7 +1722,7 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
"/html5/projects/ai-demo/ai-structure/run", "/html5/projects/ai-demo/ai-structure/run",
data={"project_id": "ai-demo-html5", "input_path": str(source), "output_path": str(tmp_path / "html5-out")}, data={"project_id": "ai-demo-html5", "input_path": str(source), "output_path": str(tmp_path / "html5-out")},
) )
assert_html5_response_contract(html5_run, "ready", "sir_snapshot.json", "normalized_project.json") assert_html5_response_contract(html5_run, "ready", "codex-1c-context-ai-demo-html5", "sir_snapshot.json", "normalized_project.json")
def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path): def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path):
@@ -1735,6 +1743,8 @@ def test_ai_structure_prepare_reports_cf_cfe_export_required(tmp_path: Path):
assert payload["status"] == "export_required" assert payload["status"] == "export_required"
assert len(payload["binary_1c_files"]) == 2 assert len(payload["binary_1c_files"]) == 2
assert "DumpConfigToFiles" in (output / "export_plan.md").read_text(encoding="utf-8") assert "DumpConfigToFiles" in (output / "export_plan.md").read_text(encoding="utf-8")
assert (output / payload["codex_package_folder"] / "AGENTS.md").exists()
assert "export_required" in (output / payload["codex_package_folder"] / "README.md").read_text(encoding="utf-8")
def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path): def test_import_full_replace_replaces_current_normalized_project(tmp_path: Path):