Make Codex 1C context packages self contained
CI / python (push) Has been cancelled
CI / rust (push) Has been cancelled

This commit is contained in:
2026-05-21 20:58:35 +03:00
parent cbcfcc1741
commit fea29e665c
2 changed files with 117 additions and 1 deletions
@@ -13,6 +13,8 @@ from sir import SirSnapshot, snapshot_to_json
AI_STRUCTURE_VERSION = "1.0"
_PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"}
_BINARY_1C_SUFFIXES = {".cf", ".cfe"}
_CODEX_SOURCE_SUFFIXES = {".xml", ".mdo", ".bsl", ".json", ".txt"}
_MAX_CODEX_SOURCE_FILE_BYTES = 2_000_000
def prepare_ai_structure(
@@ -58,7 +60,7 @@ def prepare_ai_structure(
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
_write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable)
_write_codex_package(codex_root, input_path, manifest, files, snapshot, normalized, binaries, parseable)
return manifest
@@ -135,6 +137,7 @@ def _codex_folder_name(project_id: str) -> str:
def _write_codex_package(
root: Path,
input_path: Path,
manifest: dict[str, Any],
files: list[dict[str, Any]],
snapshot: SirSnapshot | None,
@@ -147,12 +150,16 @@ def _write_codex_package(
(root / "objects").mkdir(parents=True, exist_ok=True)
(root / "modules").mkdir(parents=True, exist_ok=True)
(root / "raw").mkdir(parents=True, exist_ok=True)
source_map = _copy_codex_sources(input_path, root / "source")
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
_write_text(root / "README.md", _codex_readme_markdown(manifest))
_write_text(root / "context" / "CODEX_START_HERE.md", _codex_start_here_markdown(manifest))
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
_write_json(root / "indexes" / "manifest.json", manifest)
_write_json(root / "indexes" / "codex-navigation.json", _codex_navigation(manifest, source_map))
_write_json(root / "indexes" / "source-inventory.json", {"files": files})
_write_json(root / "indexes" / "source-map.json", {"files": source_map})
if snapshot is not None:
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
objects = _ai_objects(snapshot)
@@ -168,6 +175,46 @@ def _write_codex_package(
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
def _copy_codex_sources(input_path: Path, target: Path) -> list[dict[str, Any]]:
target.mkdir(parents=True, exist_ok=True)
source_files = [input_path] if input_path.is_file() else sorted(path for path in input_path.rglob("*") if path.is_file())
copied: list[dict[str, Any]] = []
for path in source_files:
suffix = path.suffix.casefold()
relative = path.name if input_path.is_file() else path.relative_to(input_path).as_posix()
if suffix not in _CODEX_SOURCE_SUFFIXES:
continue
size = path.stat().st_size
if size > _MAX_CODEX_SOURCE_FILE_BYTES:
copied.append(
{
"original_path": str(path),
"relative_path": relative,
"copied": False,
"reason": f"file is larger than {_MAX_CODEX_SOURCE_FILE_BYTES} bytes",
"size": size,
}
)
continue
destination = target / relative
destination.parent.mkdir(parents=True, exist_ok=True)
try:
text = path.read_text(encoding="utf-8-sig")
except UnicodeDecodeError:
text = path.read_text(encoding="cp1251", errors="replace")
destination.write_text(text, encoding="utf-8")
copied.append(
{
"original_path": str(path),
"relative_path": relative,
"codex_path": f"source/{relative}",
"copied": True,
"size": size,
}
)
return copied
def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
return f"""# AGENTS.md for 1C context package
@@ -178,6 +225,8 @@ This folder is generated by SFERA for Codex.
- Treat this package as read-only context for project `{manifest['project_id']}`.
- Start with `README.md` and `context/project-overview.md`.
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
- Use `source/` for local copied BSL/XML/MDO text. Do not rely on the absolute source path from the machine that generated the package.
- Use `indexes/source-map.json` to map original source paths to local `source/...` paths.
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
@@ -188,6 +237,7 @@ This folder is generated by SFERA for Codex.
- `context/project-overview.md` - compact human context.
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
- `source/` - local UTF-8 copies of BSL/XML/MDO source files.
- `objects/*.md` - object-level summaries.
- `modules/*.md` - module-level summaries.
- `raw/*.json` - full raw SFERA model.
@@ -208,6 +258,7 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
f"- Normalized objects: {normalized.get('objects', 0)}",
"",
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
"The package includes a local `source/` folder, so Codex can inspect BSL/XML/MDO files after the folder is moved.",
]
if manifest.get("diagnostics"):
lines.extend(["", "## Diagnostics"])
@@ -215,6 +266,53 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
return "\n".join(lines) + "\n"
def _codex_start_here_markdown(manifest: dict[str, Any]) -> str:
return f"""# Start Here For Codex
Project: `{manifest['project_id']}`
Status: `{manifest['status']}`
Read in this order:
1. `AGENTS.md`
2. `README.md`
3. `context/project-overview.md`
4. `context/metadata-tree.md`
5. `indexes/objects.json`
6. `indexes/modules.json`
7. `indexes/edges.json`
8. `source/`
When generating code:
- Locate the owner 1C object first.
- Then inspect its module/form/command context.
- Prefer local copied files under `source/` for exact source text.
- Use `raw/normalized_project.json` when object structure matters more than raw XML layout.
- Use `indexes/source-map.json` if you need to map SFERA source references to local package paths.
"""
def _codex_navigation(manifest: dict[str, Any], source_map: list[dict[str, Any]]) -> dict[str, Any]:
copied_sources = [item for item in source_map if item.get("copied")]
return {
"project_id": manifest["project_id"],
"status": manifest["status"],
"start_here": "context/CODEX_START_HERE.md",
"instructions": "AGENTS.md",
"overview": "context/project-overview.md",
"metadata_tree": "context/metadata-tree.md",
"objects_index": "indexes/objects.json",
"modules_index": "indexes/modules.json",
"edges_index": "indexes/edges.json",
"source_map": "indexes/source-map.json",
"raw_normalized_project": "raw/normalized_project.json",
"raw_sir_snapshot": "raw/sir_snapshot.json",
"local_source_count": len(copied_sources),
"first_sources": [item.get("codex_path") for item in copied_sources[:25]],
}
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
for item in objects[:1000]:
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
@@ -233,6 +331,7 @@ def _safe_context_filename(value: str) -> str:
def _object_markdown(item: dict[str, Any]) -> str:
local_source = _local_source_path(item)
return "\n".join(
[
f"# {item.get('qualified_name') or item.get('name')}",
@@ -242,6 +341,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
f"- Lineage: `{item.get('lineage_id')}`",
f"- Semantic: `{item.get('semantic_id')}`",
f"- Source: `{item.get('source')}`",
f"- Local source: `{local_source}`",
"",
"## Attributes",
"```json",
@@ -252,6 +352,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
def _module_markdown(item: dict[str, Any]) -> str:
local_source = _local_source_path(item)
return "\n".join(
[
f"# {item.get('qualified_name') or item.get('name')}",
@@ -259,6 +360,7 @@ def _module_markdown(item: dict[str, Any]) -> str:
f"- Name: `{item.get('name')}`",
f"- Lineage: `{item.get('lineage_id')}`",
f"- Source: `{item.get('source')}`",
f"- Local source: `{local_source}`",
"",
"## Module Attributes",
"```json",
@@ -268,6 +370,14 @@ def _module_markdown(item: dict[str, Any]) -> str:
) + "\n"
def _local_source_path(item: dict[str, Any]) -> str:
source = item.get("source") or {}
if not isinstance(source, dict):
return ""
source_path = str(source.get("source_path") or "")
return f"source/{source_path}" if source_path else ""
def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
for group in normalized.configuration.groups:
+6
View File
@@ -1708,10 +1708,16 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
assert (output / "sir_snapshot.json").exists()
assert (codex_package / "AGENTS.md").exists()
assert (codex_package / "README.md").exists()
assert (codex_package / "context" / "CODEX_START_HERE.md").exists()
assert (codex_package / "context" / "project-overview.md").exists()
assert (codex_package / "indexes" / "codex-navigation.json").exists()
assert (codex_package / "indexes" / "objects.json").exists()
assert (codex_package / "indexes" / "source-map.json").exists()
assert (codex_package / "raw" / "normalized_project.json").exists()
assert (codex_package / "source" / "metadata.xml").exists()
assert (codex_package / "source" / "Интеграция.bsl").read_text(encoding="utf-8").startswith("Процедура")
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
assert "Use `source/`" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")