Make Codex 1C context packages self contained
CI / python (push) Has been cancelled
CI / rust (push) Has been cancelled

This commit is contained in:
2026-05-21 20:58:35 +03:00
parent cbcfcc1741
commit fea29e665c
2 changed files with 117 additions and 1 deletions
@@ -13,6 +13,8 @@ from sir import SirSnapshot, snapshot_to_json
AI_STRUCTURE_VERSION = "1.0" AI_STRUCTURE_VERSION = "1.0"
_PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"} _PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"}
_BINARY_1C_SUFFIXES = {".cf", ".cfe"} _BINARY_1C_SUFFIXES = {".cf", ".cfe"}
_CODEX_SOURCE_SUFFIXES = {".xml", ".mdo", ".bsl", ".json", ".txt"}
_MAX_CODEX_SOURCE_FILE_BYTES = 2_000_000
def prepare_ai_structure( def prepare_ai_structure(
@@ -58,7 +60,7 @@ def prepare_ai_structure(
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json")) _write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable)) _write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
_write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable) _write_codex_package(codex_root, input_path, manifest, files, snapshot, normalized, binaries, parseable)
return manifest return manifest
@@ -135,6 +137,7 @@ def _codex_folder_name(project_id: str) -> str:
def _write_codex_package( def _write_codex_package(
root: Path, root: Path,
input_path: Path,
manifest: dict[str, Any], manifest: dict[str, Any],
files: list[dict[str, Any]], files: list[dict[str, Any]],
snapshot: SirSnapshot | None, snapshot: SirSnapshot | None,
@@ -147,12 +150,16 @@ def _write_codex_package(
(root / "objects").mkdir(parents=True, exist_ok=True) (root / "objects").mkdir(parents=True, exist_ok=True)
(root / "modules").mkdir(parents=True, exist_ok=True) (root / "modules").mkdir(parents=True, exist_ok=True)
(root / "raw").mkdir(parents=True, exist_ok=True) (root / "raw").mkdir(parents=True, exist_ok=True)
source_map = _copy_codex_sources(input_path, root / "source")
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest)) _write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
_write_text(root / "README.md", _codex_readme_markdown(manifest)) _write_text(root / "README.md", _codex_readme_markdown(manifest))
_write_text(root / "context" / "CODEX_START_HERE.md", _codex_start_here_markdown(manifest))
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable)) _write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
_write_json(root / "indexes" / "manifest.json", manifest) _write_json(root / "indexes" / "manifest.json", manifest)
_write_json(root / "indexes" / "codex-navigation.json", _codex_navigation(manifest, source_map))
_write_json(root / "indexes" / "source-inventory.json", {"files": files}) _write_json(root / "indexes" / "source-inventory.json", {"files": files})
_write_json(root / "indexes" / "source-map.json", {"files": source_map})
if snapshot is not None: if snapshot is not None:
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot)) (root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
objects = _ai_objects(snapshot) objects = _ai_objects(snapshot)
@@ -168,6 +175,46 @@ def _write_codex_package(
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json")) _write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
def _copy_codex_sources(input_path: Path, target: Path) -> list[dict[str, Any]]:
target.mkdir(parents=True, exist_ok=True)
source_files = [input_path] if input_path.is_file() else sorted(path for path in input_path.rglob("*") if path.is_file())
copied: list[dict[str, Any]] = []
for path in source_files:
suffix = path.suffix.casefold()
relative = path.name if input_path.is_file() else path.relative_to(input_path).as_posix()
if suffix not in _CODEX_SOURCE_SUFFIXES:
continue
size = path.stat().st_size
if size > _MAX_CODEX_SOURCE_FILE_BYTES:
copied.append(
{
"original_path": str(path),
"relative_path": relative,
"copied": False,
"reason": f"file is larger than {_MAX_CODEX_SOURCE_FILE_BYTES} bytes",
"size": size,
}
)
continue
destination = target / relative
destination.parent.mkdir(parents=True, exist_ok=True)
try:
text = path.read_text(encoding="utf-8-sig")
except UnicodeDecodeError:
text = path.read_text(encoding="cp1251", errors="replace")
destination.write_text(text, encoding="utf-8")
copied.append(
{
"original_path": str(path),
"relative_path": relative,
"codex_path": f"source/{relative}",
"copied": True,
"size": size,
}
)
return copied
def _codex_agents_markdown(manifest: dict[str, Any]) -> str: def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
return f"""# AGENTS.md for 1C context package return f"""# AGENTS.md for 1C context package
@@ -178,6 +225,8 @@ This folder is generated by SFERA for Codex.
- Treat this package as read-only context for project `{manifest['project_id']}`. - Treat this package as read-only context for project `{manifest['project_id']}`.
- Start with `README.md` and `context/project-overview.md`. - Start with `README.md` and `context/project-overview.md`.
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation. - Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
- Use `source/` for local copied BSL/XML/MDO text. Do not rely on the absolute source path from the machine that generated the package.
- Use `indexes/source-map.json` to map original source paths to local `source/...` paths.
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present. - Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file. - 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`. - When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
@@ -188,6 +237,7 @@ This folder is generated by SFERA for Codex.
- `context/project-overview.md` - compact human context. - `context/project-overview.md` - compact human context.
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject. - `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning. - `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
- `source/` - local UTF-8 copies of BSL/XML/MDO source files.
- `objects/*.md` - object-level summaries. - `objects/*.md` - object-level summaries.
- `modules/*.md` - module-level summaries. - `modules/*.md` - module-level summaries.
- `raw/*.json` - full raw SFERA model. - `raw/*.json` - full raw SFERA model.
@@ -208,6 +258,7 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
f"- Normalized objects: {normalized.get('objects', 0)}", f"- Normalized objects: {normalized.get('objects', 0)}",
"", "",
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.", "Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
"The package includes a local `source/` folder, so Codex can inspect BSL/XML/MDO files after the folder is moved.",
] ]
if manifest.get("diagnostics"): if manifest.get("diagnostics"):
lines.extend(["", "## Diagnostics"]) lines.extend(["", "## Diagnostics"])
@@ -215,6 +266,53 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
return "\n".join(lines) + "\n" return "\n".join(lines) + "\n"
def _codex_start_here_markdown(manifest: dict[str, Any]) -> str:
return f"""# Start Here For Codex
Project: `{manifest['project_id']}`
Status: `{manifest['status']}`
Read in this order:
1. `AGENTS.md`
2. `README.md`
3. `context/project-overview.md`
4. `context/metadata-tree.md`
5. `indexes/objects.json`
6. `indexes/modules.json`
7. `indexes/edges.json`
8. `source/`
When generating code:
- Locate the owner 1C object first.
- Then inspect its module/form/command context.
- Prefer local copied files under `source/` for exact source text.
- Use `raw/normalized_project.json` when object structure matters more than raw XML layout.
- Use `indexes/source-map.json` if you need to map SFERA source references to local package paths.
"""
def _codex_navigation(manifest: dict[str, Any], source_map: list[dict[str, Any]]) -> dict[str, Any]:
copied_sources = [item for item in source_map if item.get("copied")]
return {
"project_id": manifest["project_id"],
"status": manifest["status"],
"start_here": "context/CODEX_START_HERE.md",
"instructions": "AGENTS.md",
"overview": "context/project-overview.md",
"metadata_tree": "context/metadata-tree.md",
"objects_index": "indexes/objects.json",
"modules_index": "indexes/modules.json",
"edges_index": "indexes/edges.json",
"source_map": "indexes/source-map.json",
"raw_normalized_project": "raw/normalized_project.json",
"raw_sir_snapshot": "raw/sir_snapshot.json",
"local_source_count": len(copied_sources),
"first_sources": [item.get("codex_path") for item in copied_sources[:25]],
}
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None: def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
for item in objects[:1000]: for item in objects[:1000]:
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md" filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
@@ -233,6 +331,7 @@ def _safe_context_filename(value: str) -> str:
def _object_markdown(item: dict[str, Any]) -> str: def _object_markdown(item: dict[str, Any]) -> str:
local_source = _local_source_path(item)
return "\n".join( return "\n".join(
[ [
f"# {item.get('qualified_name') or item.get('name')}", f"# {item.get('qualified_name') or item.get('name')}",
@@ -242,6 +341,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
f"- Lineage: `{item.get('lineage_id')}`", f"- Lineage: `{item.get('lineage_id')}`",
f"- Semantic: `{item.get('semantic_id')}`", f"- Semantic: `{item.get('semantic_id')}`",
f"- Source: `{item.get('source')}`", f"- Source: `{item.get('source')}`",
f"- Local source: `{local_source}`",
"", "",
"## Attributes", "## Attributes",
"```json", "```json",
@@ -252,6 +352,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
def _module_markdown(item: dict[str, Any]) -> str: def _module_markdown(item: dict[str, Any]) -> str:
local_source = _local_source_path(item)
return "\n".join( return "\n".join(
[ [
f"# {item.get('qualified_name') or item.get('name')}", f"# {item.get('qualified_name') or item.get('name')}",
@@ -259,6 +360,7 @@ def _module_markdown(item: dict[str, Any]) -> str:
f"- Name: `{item.get('name')}`", f"- Name: `{item.get('name')}`",
f"- Lineage: `{item.get('lineage_id')}`", f"- Lineage: `{item.get('lineage_id')}`",
f"- Source: `{item.get('source')}`", f"- Source: `{item.get('source')}`",
f"- Local source: `{local_source}`",
"", "",
"## Module Attributes", "## Module Attributes",
"```json", "```json",
@@ -268,6 +370,14 @@ def _module_markdown(item: dict[str, Any]) -> str:
) + "\n" ) + "\n"
def _local_source_path(item: dict[str, Any]) -> str:
source = item.get("source") or {}
if not isinstance(source, dict):
return ""
source_path = str(source.get("source_path") or "")
return f"source/{source_path}" if source_path else ""
def _normalized_tree_markdown(normalized: NormalizedProject) -> str: def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""] lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
for group in normalized.configuration.groups: for group in normalized.configuration.groups:
+6
View File
@@ -1708,10 +1708,16 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
assert (output / "sir_snapshot.json").exists() assert (output / "sir_snapshot.json").exists()
assert (codex_package / "AGENTS.md").exists() assert (codex_package / "AGENTS.md").exists()
assert (codex_package / "README.md").exists() assert (codex_package / "README.md").exists()
assert (codex_package / "context" / "CODEX_START_HERE.md").exists()
assert (codex_package / "context" / "project-overview.md").exists() assert (codex_package / "context" / "project-overview.md").exists()
assert (codex_package / "indexes" / "codex-navigation.json").exists()
assert (codex_package / "indexes" / "objects.json").exists() assert (codex_package / "indexes" / "objects.json").exists()
assert (codex_package / "indexes" / "source-map.json").exists()
assert (codex_package / "raw" / "normalized_project.json").exists() assert (codex_package / "raw" / "normalized_project.json").exists()
assert (codex_package / "source" / "metadata.xml").exists()
assert (codex_package / "source" / "Интеграция.bsl").read_text(encoding="utf-8").startswith("Процедура")
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8") assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
assert "Use `source/`" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8") assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8") assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")