Make Codex 1C context packages self contained
This commit is contained in:
@@ -13,6 +13,8 @@ from sir import SirSnapshot, snapshot_to_json
|
||||
AI_STRUCTURE_VERSION = "1.0"
|
||||
_PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"}
|
||||
_BINARY_1C_SUFFIXES = {".cf", ".cfe"}
|
||||
_CODEX_SOURCE_SUFFIXES = {".xml", ".mdo", ".bsl", ".json", ".txt"}
|
||||
_MAX_CODEX_SOURCE_FILE_BYTES = 2_000_000
|
||||
|
||||
|
||||
def prepare_ai_structure(
|
||||
@@ -58,7 +60,7 @@ def prepare_ai_structure(
|
||||
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
|
||||
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
|
||||
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
|
||||
_write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable)
|
||||
_write_codex_package(codex_root, input_path, manifest, files, snapshot, normalized, binaries, parseable)
|
||||
return manifest
|
||||
|
||||
|
||||
@@ -135,6 +137,7 @@ def _codex_folder_name(project_id: str) -> str:
|
||||
|
||||
def _write_codex_package(
|
||||
root: Path,
|
||||
input_path: Path,
|
||||
manifest: dict[str, Any],
|
||||
files: list[dict[str, Any]],
|
||||
snapshot: SirSnapshot | None,
|
||||
@@ -147,12 +150,16 @@ def _write_codex_package(
|
||||
(root / "objects").mkdir(parents=True, exist_ok=True)
|
||||
(root / "modules").mkdir(parents=True, exist_ok=True)
|
||||
(root / "raw").mkdir(parents=True, exist_ok=True)
|
||||
source_map = _copy_codex_sources(input_path, root / "source")
|
||||
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
|
||||
_write_text(root / "README.md", _codex_readme_markdown(manifest))
|
||||
_write_text(root / "context" / "CODEX_START_HERE.md", _codex_start_here_markdown(manifest))
|
||||
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
|
||||
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
|
||||
_write_json(root / "indexes" / "manifest.json", manifest)
|
||||
_write_json(root / "indexes" / "codex-navigation.json", _codex_navigation(manifest, source_map))
|
||||
_write_json(root / "indexes" / "source-inventory.json", {"files": files})
|
||||
_write_json(root / "indexes" / "source-map.json", {"files": source_map})
|
||||
if snapshot is not None:
|
||||
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
|
||||
objects = _ai_objects(snapshot)
|
||||
@@ -168,6 +175,46 @@ def _write_codex_package(
|
||||
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
|
||||
|
||||
|
||||
def _copy_codex_sources(input_path: Path, target: Path) -> list[dict[str, Any]]:
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
source_files = [input_path] if input_path.is_file() else sorted(path for path in input_path.rglob("*") if path.is_file())
|
||||
copied: list[dict[str, Any]] = []
|
||||
for path in source_files:
|
||||
suffix = path.suffix.casefold()
|
||||
relative = path.name if input_path.is_file() else path.relative_to(input_path).as_posix()
|
||||
if suffix not in _CODEX_SOURCE_SUFFIXES:
|
||||
continue
|
||||
size = path.stat().st_size
|
||||
if size > _MAX_CODEX_SOURCE_FILE_BYTES:
|
||||
copied.append(
|
||||
{
|
||||
"original_path": str(path),
|
||||
"relative_path": relative,
|
||||
"copied": False,
|
||||
"reason": f"file is larger than {_MAX_CODEX_SOURCE_FILE_BYTES} bytes",
|
||||
"size": size,
|
||||
}
|
||||
)
|
||||
continue
|
||||
destination = target / relative
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8-sig")
|
||||
except UnicodeDecodeError:
|
||||
text = path.read_text(encoding="cp1251", errors="replace")
|
||||
destination.write_text(text, encoding="utf-8")
|
||||
copied.append(
|
||||
{
|
||||
"original_path": str(path),
|
||||
"relative_path": relative,
|
||||
"codex_path": f"source/{relative}",
|
||||
"copied": True,
|
||||
"size": size,
|
||||
}
|
||||
)
|
||||
return copied
|
||||
|
||||
|
||||
def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
|
||||
return f"""# AGENTS.md for 1C context package
|
||||
|
||||
@@ -178,6 +225,8 @@ This folder is generated by SFERA for Codex.
|
||||
- Treat this package as read-only context for project `{manifest['project_id']}`.
|
||||
- Start with `README.md` and `context/project-overview.md`.
|
||||
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
|
||||
- Use `source/` for local copied BSL/XML/MDO text. Do not rely on the absolute source path from the machine that generated the package.
|
||||
- Use `indexes/source-map.json` to map original source paths to local `source/...` paths.
|
||||
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
|
||||
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
|
||||
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
|
||||
@@ -188,6 +237,7 @@ This folder is generated by SFERA for Codex.
|
||||
- `context/project-overview.md` - compact human context.
|
||||
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
|
||||
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
|
||||
- `source/` - local UTF-8 copies of BSL/XML/MDO source files.
|
||||
- `objects/*.md` - object-level summaries.
|
||||
- `modules/*.md` - module-level summaries.
|
||||
- `raw/*.json` - full raw SFERA model.
|
||||
@@ -208,6 +258,7 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
|
||||
f"- Normalized objects: {normalized.get('objects', 0)}",
|
||||
"",
|
||||
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
|
||||
"The package includes a local `source/` folder, so Codex can inspect BSL/XML/MDO files after the folder is moved.",
|
||||
]
|
||||
if manifest.get("diagnostics"):
|
||||
lines.extend(["", "## Diagnostics"])
|
||||
@@ -215,6 +266,53 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def _codex_start_here_markdown(manifest: dict[str, Any]) -> str:
|
||||
return f"""# Start Here For Codex
|
||||
|
||||
Project: `{manifest['project_id']}`
|
||||
Status: `{manifest['status']}`
|
||||
|
||||
Read in this order:
|
||||
|
||||
1. `AGENTS.md`
|
||||
2. `README.md`
|
||||
3. `context/project-overview.md`
|
||||
4. `context/metadata-tree.md`
|
||||
5. `indexes/objects.json`
|
||||
6. `indexes/modules.json`
|
||||
7. `indexes/edges.json`
|
||||
8. `source/`
|
||||
|
||||
When generating code:
|
||||
|
||||
- Locate the owner 1C object first.
|
||||
- Then inspect its module/form/command context.
|
||||
- Prefer local copied files under `source/` for exact source text.
|
||||
- Use `raw/normalized_project.json` when object structure matters more than raw XML layout.
|
||||
- Use `indexes/source-map.json` if you need to map SFERA source references to local package paths.
|
||||
"""
|
||||
|
||||
|
||||
def _codex_navigation(manifest: dict[str, Any], source_map: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
copied_sources = [item for item in source_map if item.get("copied")]
|
||||
return {
|
||||
"project_id": manifest["project_id"],
|
||||
"status": manifest["status"],
|
||||
"start_here": "context/CODEX_START_HERE.md",
|
||||
"instructions": "AGENTS.md",
|
||||
"overview": "context/project-overview.md",
|
||||
"metadata_tree": "context/metadata-tree.md",
|
||||
"objects_index": "indexes/objects.json",
|
||||
"modules_index": "indexes/modules.json",
|
||||
"edges_index": "indexes/edges.json",
|
||||
"source_map": "indexes/source-map.json",
|
||||
"raw_normalized_project": "raw/normalized_project.json",
|
||||
"raw_sir_snapshot": "raw/sir_snapshot.json",
|
||||
"local_source_count": len(copied_sources),
|
||||
"first_sources": [item.get("codex_path") for item in copied_sources[:25]],
|
||||
}
|
||||
|
||||
|
||||
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
|
||||
for item in objects[:1000]:
|
||||
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
|
||||
@@ -233,6 +331,7 @@ def _safe_context_filename(value: str) -> str:
|
||||
|
||||
|
||||
def _object_markdown(item: dict[str, Any]) -> str:
|
||||
local_source = _local_source_path(item)
|
||||
return "\n".join(
|
||||
[
|
||||
f"# {item.get('qualified_name') or item.get('name')}",
|
||||
@@ -242,6 +341,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
|
||||
f"- Lineage: `{item.get('lineage_id')}`",
|
||||
f"- Semantic: `{item.get('semantic_id')}`",
|
||||
f"- Source: `{item.get('source')}`",
|
||||
f"- Local source: `{local_source}`",
|
||||
"",
|
||||
"## Attributes",
|
||||
"```json",
|
||||
@@ -252,6 +352,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
|
||||
|
||||
|
||||
def _module_markdown(item: dict[str, Any]) -> str:
|
||||
local_source = _local_source_path(item)
|
||||
return "\n".join(
|
||||
[
|
||||
f"# {item.get('qualified_name') or item.get('name')}",
|
||||
@@ -259,6 +360,7 @@ def _module_markdown(item: dict[str, Any]) -> str:
|
||||
f"- Name: `{item.get('name')}`",
|
||||
f"- Lineage: `{item.get('lineage_id')}`",
|
||||
f"- Source: `{item.get('source')}`",
|
||||
f"- Local source: `{local_source}`",
|
||||
"",
|
||||
"## Module Attributes",
|
||||
"```json",
|
||||
@@ -268,6 +370,14 @@ def _module_markdown(item: dict[str, Any]) -> str:
|
||||
) + "\n"
|
||||
|
||||
|
||||
def _local_source_path(item: dict[str, Any]) -> str:
|
||||
source = item.get("source") or {}
|
||||
if not isinstance(source, dict):
|
||||
return ""
|
||||
source_path = str(source.get("source_path") or "")
|
||||
return f"source/{source_path}" if source_path else ""
|
||||
|
||||
|
||||
def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
|
||||
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
|
||||
for group in normalized.configuration.groups:
|
||||
|
||||
@@ -1708,10 +1708,16 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
|
||||
assert (output / "sir_snapshot.json").exists()
|
||||
assert (codex_package / "AGENTS.md").exists()
|
||||
assert (codex_package / "README.md").exists()
|
||||
assert (codex_package / "context" / "CODEX_START_HERE.md").exists()
|
||||
assert (codex_package / "context" / "project-overview.md").exists()
|
||||
assert (codex_package / "indexes" / "codex-navigation.json").exists()
|
||||
assert (codex_package / "indexes" / "objects.json").exists()
|
||||
assert (codex_package / "indexes" / "source-map.json").exists()
|
||||
assert (codex_package / "raw" / "normalized_project.json").exists()
|
||||
assert (codex_package / "source" / "metadata.xml").exists()
|
||||
assert (codex_package / "source" / "Интеграция.bsl").read_text(encoding="utf-8").startswith("Процедура")
|
||||
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
|
||||
assert "Use `source/`" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
|
||||
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
|
||||
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user