Make Codex 1C context packages self contained
This commit is contained in:
@@ -13,6 +13,8 @@ from sir import SirSnapshot, snapshot_to_json
|
|||||||
AI_STRUCTURE_VERSION = "1.0"
|
AI_STRUCTURE_VERSION = "1.0"
|
||||||
_PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"}
|
_PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"}
|
||||||
_BINARY_1C_SUFFIXES = {".cf", ".cfe"}
|
_BINARY_1C_SUFFIXES = {".cf", ".cfe"}
|
||||||
|
_CODEX_SOURCE_SUFFIXES = {".xml", ".mdo", ".bsl", ".json", ".txt"}
|
||||||
|
_MAX_CODEX_SOURCE_FILE_BYTES = 2_000_000
|
||||||
|
|
||||||
|
|
||||||
def prepare_ai_structure(
|
def prepare_ai_structure(
|
||||||
@@ -58,7 +60,7 @@ def prepare_ai_structure(
|
|||||||
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
|
_write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json"))
|
||||||
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
|
_write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized))
|
||||||
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
|
_write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable))
|
||||||
_write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable)
|
_write_codex_package(codex_root, input_path, manifest, files, snapshot, normalized, binaries, parseable)
|
||||||
return manifest
|
return manifest
|
||||||
|
|
||||||
|
|
||||||
@@ -135,6 +137,7 @@ def _codex_folder_name(project_id: str) -> str:
|
|||||||
|
|
||||||
def _write_codex_package(
|
def _write_codex_package(
|
||||||
root: Path,
|
root: Path,
|
||||||
|
input_path: Path,
|
||||||
manifest: dict[str, Any],
|
manifest: dict[str, Any],
|
||||||
files: list[dict[str, Any]],
|
files: list[dict[str, Any]],
|
||||||
snapshot: SirSnapshot | None,
|
snapshot: SirSnapshot | None,
|
||||||
@@ -147,12 +150,16 @@ def _write_codex_package(
|
|||||||
(root / "objects").mkdir(parents=True, exist_ok=True)
|
(root / "objects").mkdir(parents=True, exist_ok=True)
|
||||||
(root / "modules").mkdir(parents=True, exist_ok=True)
|
(root / "modules").mkdir(parents=True, exist_ok=True)
|
||||||
(root / "raw").mkdir(parents=True, exist_ok=True)
|
(root / "raw").mkdir(parents=True, exist_ok=True)
|
||||||
|
source_map = _copy_codex_sources(input_path, root / "source")
|
||||||
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
|
_write_text(root / "AGENTS.md", _codex_agents_markdown(manifest))
|
||||||
_write_text(root / "README.md", _codex_readme_markdown(manifest))
|
_write_text(root / "README.md", _codex_readme_markdown(manifest))
|
||||||
|
_write_text(root / "context" / "CODEX_START_HERE.md", _codex_start_here_markdown(manifest))
|
||||||
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
|
_write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized))
|
||||||
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
|
_write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable))
|
||||||
_write_json(root / "indexes" / "manifest.json", manifest)
|
_write_json(root / "indexes" / "manifest.json", manifest)
|
||||||
|
_write_json(root / "indexes" / "codex-navigation.json", _codex_navigation(manifest, source_map))
|
||||||
_write_json(root / "indexes" / "source-inventory.json", {"files": files})
|
_write_json(root / "indexes" / "source-inventory.json", {"files": files})
|
||||||
|
_write_json(root / "indexes" / "source-map.json", {"files": source_map})
|
||||||
if snapshot is not None:
|
if snapshot is not None:
|
||||||
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
|
(root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot))
|
||||||
objects = _ai_objects(snapshot)
|
objects = _ai_objects(snapshot)
|
||||||
@@ -168,6 +175,46 @@ def _write_codex_package(
|
|||||||
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
|
_write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json"))
|
||||||
|
|
||||||
|
|
||||||
|
def _copy_codex_sources(input_path: Path, target: Path) -> list[dict[str, Any]]:
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
source_files = [input_path] if input_path.is_file() else sorted(path for path in input_path.rglob("*") if path.is_file())
|
||||||
|
copied: list[dict[str, Any]] = []
|
||||||
|
for path in source_files:
|
||||||
|
suffix = path.suffix.casefold()
|
||||||
|
relative = path.name if input_path.is_file() else path.relative_to(input_path).as_posix()
|
||||||
|
if suffix not in _CODEX_SOURCE_SUFFIXES:
|
||||||
|
continue
|
||||||
|
size = path.stat().st_size
|
||||||
|
if size > _MAX_CODEX_SOURCE_FILE_BYTES:
|
||||||
|
copied.append(
|
||||||
|
{
|
||||||
|
"original_path": str(path),
|
||||||
|
"relative_path": relative,
|
||||||
|
"copied": False,
|
||||||
|
"reason": f"file is larger than {_MAX_CODEX_SOURCE_FILE_BYTES} bytes",
|
||||||
|
"size": size,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
destination = target / relative
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
text = path.read_text(encoding="utf-8-sig")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
text = path.read_text(encoding="cp1251", errors="replace")
|
||||||
|
destination.write_text(text, encoding="utf-8")
|
||||||
|
copied.append(
|
||||||
|
{
|
||||||
|
"original_path": str(path),
|
||||||
|
"relative_path": relative,
|
||||||
|
"codex_path": f"source/{relative}",
|
||||||
|
"copied": True,
|
||||||
|
"size": size,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return copied
|
||||||
|
|
||||||
|
|
||||||
def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
|
def _codex_agents_markdown(manifest: dict[str, Any]) -> str:
|
||||||
return f"""# AGENTS.md for 1C context package
|
return f"""# AGENTS.md for 1C context package
|
||||||
|
|
||||||
@@ -178,6 +225,8 @@ This folder is generated by SFERA for Codex.
|
|||||||
- Treat this package as read-only context for project `{manifest['project_id']}`.
|
- Treat this package as read-only context for project `{manifest['project_id']}`.
|
||||||
- Start with `README.md` and `context/project-overview.md`.
|
- Start with `README.md` and `context/project-overview.md`.
|
||||||
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
|
- Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation.
|
||||||
|
- Use `source/` for local copied BSL/XML/MDO text. Do not rely on the absolute source path from the machine that generated the package.
|
||||||
|
- Use `indexes/source-map.json` to map original source paths to local `source/...` paths.
|
||||||
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
|
- Use `raw/normalized_project.json` as the authoritative 1C metadata model when present.
|
||||||
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
|
- 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file.
|
||||||
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
|
- When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`.
|
||||||
@@ -188,6 +237,7 @@ This folder is generated by SFERA for Codex.
|
|||||||
- `context/project-overview.md` - compact human context.
|
- `context/project-overview.md` - compact human context.
|
||||||
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
|
- `context/metadata-tree.md` - metadata tree extracted from NormalizedProject.
|
||||||
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
|
- `indexes/*.json` - machine-readable indexes for Codex search and reasoning.
|
||||||
|
- `source/` - local UTF-8 copies of BSL/XML/MDO source files.
|
||||||
- `objects/*.md` - object-level summaries.
|
- `objects/*.md` - object-level summaries.
|
||||||
- `modules/*.md` - module-level summaries.
|
- `modules/*.md` - module-level summaries.
|
||||||
- `raw/*.json` - full raw SFERA model.
|
- `raw/*.json` - full raw SFERA model.
|
||||||
@@ -208,6 +258,7 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
|
|||||||
f"- Normalized objects: {normalized.get('objects', 0)}",
|
f"- Normalized objects: {normalized.get('objects', 0)}",
|
||||||
"",
|
"",
|
||||||
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
|
"Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.",
|
||||||
|
"The package includes a local `source/` folder, so Codex can inspect BSL/XML/MDO files after the folder is moved.",
|
||||||
]
|
]
|
||||||
if manifest.get("diagnostics"):
|
if manifest.get("diagnostics"):
|
||||||
lines.extend(["", "## Diagnostics"])
|
lines.extend(["", "## Diagnostics"])
|
||||||
@@ -215,6 +266,53 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str:
|
|||||||
return "\n".join(lines) + "\n"
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _codex_start_here_markdown(manifest: dict[str, Any]) -> str:
|
||||||
|
return f"""# Start Here For Codex
|
||||||
|
|
||||||
|
Project: `{manifest['project_id']}`
|
||||||
|
Status: `{manifest['status']}`
|
||||||
|
|
||||||
|
Read in this order:
|
||||||
|
|
||||||
|
1. `AGENTS.md`
|
||||||
|
2. `README.md`
|
||||||
|
3. `context/project-overview.md`
|
||||||
|
4. `context/metadata-tree.md`
|
||||||
|
5. `indexes/objects.json`
|
||||||
|
6. `indexes/modules.json`
|
||||||
|
7. `indexes/edges.json`
|
||||||
|
8. `source/`
|
||||||
|
|
||||||
|
When generating code:
|
||||||
|
|
||||||
|
- Locate the owner 1C object first.
|
||||||
|
- Then inspect its module/form/command context.
|
||||||
|
- Prefer local copied files under `source/` for exact source text.
|
||||||
|
- Use `raw/normalized_project.json` when object structure matters more than raw XML layout.
|
||||||
|
- Use `indexes/source-map.json` if you need to map SFERA source references to local package paths.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _codex_navigation(manifest: dict[str, Any], source_map: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
copied_sources = [item for item in source_map if item.get("copied")]
|
||||||
|
return {
|
||||||
|
"project_id": manifest["project_id"],
|
||||||
|
"status": manifest["status"],
|
||||||
|
"start_here": "context/CODEX_START_HERE.md",
|
||||||
|
"instructions": "AGENTS.md",
|
||||||
|
"overview": "context/project-overview.md",
|
||||||
|
"metadata_tree": "context/metadata-tree.md",
|
||||||
|
"objects_index": "indexes/objects.json",
|
||||||
|
"modules_index": "indexes/modules.json",
|
||||||
|
"edges_index": "indexes/edges.json",
|
||||||
|
"source_map": "indexes/source-map.json",
|
||||||
|
"raw_normalized_project": "raw/normalized_project.json",
|
||||||
|
"raw_sir_snapshot": "raw/sir_snapshot.json",
|
||||||
|
"local_source_count": len(copied_sources),
|
||||||
|
"first_sources": [item.get("codex_path") for item in copied_sources[:25]],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
|
def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None:
|
||||||
for item in objects[:1000]:
|
for item in objects[:1000]:
|
||||||
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
|
filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md"
|
||||||
@@ -233,6 +331,7 @@ def _safe_context_filename(value: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _object_markdown(item: dict[str, Any]) -> str:
|
def _object_markdown(item: dict[str, Any]) -> str:
|
||||||
|
local_source = _local_source_path(item)
|
||||||
return "\n".join(
|
return "\n".join(
|
||||||
[
|
[
|
||||||
f"# {item.get('qualified_name') or item.get('name')}",
|
f"# {item.get('qualified_name') or item.get('name')}",
|
||||||
@@ -242,6 +341,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
|
|||||||
f"- Lineage: `{item.get('lineage_id')}`",
|
f"- Lineage: `{item.get('lineage_id')}`",
|
||||||
f"- Semantic: `{item.get('semantic_id')}`",
|
f"- Semantic: `{item.get('semantic_id')}`",
|
||||||
f"- Source: `{item.get('source')}`",
|
f"- Source: `{item.get('source')}`",
|
||||||
|
f"- Local source: `{local_source}`",
|
||||||
"",
|
"",
|
||||||
"## Attributes",
|
"## Attributes",
|
||||||
"```json",
|
"```json",
|
||||||
@@ -252,6 +352,7 @@ def _object_markdown(item: dict[str, Any]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _module_markdown(item: dict[str, Any]) -> str:
|
def _module_markdown(item: dict[str, Any]) -> str:
|
||||||
|
local_source = _local_source_path(item)
|
||||||
return "\n".join(
|
return "\n".join(
|
||||||
[
|
[
|
||||||
f"# {item.get('qualified_name') or item.get('name')}",
|
f"# {item.get('qualified_name') or item.get('name')}",
|
||||||
@@ -259,6 +360,7 @@ def _module_markdown(item: dict[str, Any]) -> str:
|
|||||||
f"- Name: `{item.get('name')}`",
|
f"- Name: `{item.get('name')}`",
|
||||||
f"- Lineage: `{item.get('lineage_id')}`",
|
f"- Lineage: `{item.get('lineage_id')}`",
|
||||||
f"- Source: `{item.get('source')}`",
|
f"- Source: `{item.get('source')}`",
|
||||||
|
f"- Local source: `{local_source}`",
|
||||||
"",
|
"",
|
||||||
"## Module Attributes",
|
"## Module Attributes",
|
||||||
"```json",
|
"```json",
|
||||||
@@ -268,6 +370,14 @@ def _module_markdown(item: dict[str, Any]) -> str:
|
|||||||
) + "\n"
|
) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _local_source_path(item: dict[str, Any]) -> str:
|
||||||
|
source = item.get("source") or {}
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
return ""
|
||||||
|
source_path = str(source.get("source_path") or "")
|
||||||
|
return f"source/{source_path}" if source_path else ""
|
||||||
|
|
||||||
|
|
||||||
def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
|
def _normalized_tree_markdown(normalized: NormalizedProject) -> str:
|
||||||
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
|
lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""]
|
||||||
for group in normalized.configuration.groups:
|
for group in normalized.configuration.groups:
|
||||||
|
|||||||
@@ -1708,10 +1708,16 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path):
|
|||||||
assert (output / "sir_snapshot.json").exists()
|
assert (output / "sir_snapshot.json").exists()
|
||||||
assert (codex_package / "AGENTS.md").exists()
|
assert (codex_package / "AGENTS.md").exists()
|
||||||
assert (codex_package / "README.md").exists()
|
assert (codex_package / "README.md").exists()
|
||||||
|
assert (codex_package / "context" / "CODEX_START_HERE.md").exists()
|
||||||
assert (codex_package / "context" / "project-overview.md").exists()
|
assert (codex_package / "context" / "project-overview.md").exists()
|
||||||
|
assert (codex_package / "indexes" / "codex-navigation.json").exists()
|
||||||
assert (codex_package / "indexes" / "objects.json").exists()
|
assert (codex_package / "indexes" / "objects.json").exists()
|
||||||
|
assert (codex_package / "indexes" / "source-map.json").exists()
|
||||||
assert (codex_package / "raw" / "normalized_project.json").exists()
|
assert (codex_package / "raw" / "normalized_project.json").exists()
|
||||||
|
assert (codex_package / "source" / "metadata.xml").exists()
|
||||||
|
assert (codex_package / "source" / "Интеграция.bsl").read_text(encoding="utf-8").startswith("Процедура")
|
||||||
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
|
assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
|
||||||
|
assert "Use `source/`" in (codex_package / "AGENTS.md").read_text(encoding="utf-8")
|
||||||
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
|
assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8")
|
||||||
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")
|
assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user