diff --git a/services/api-server/src/api_server/ai_structure_service.py b/services/api-server/src/api_server/ai_structure_service.py index cc72c13..6270a46 100644 --- a/services/api-server/src/api_server/ai_structure_service.py +++ b/services/api-server/src/api_server/ai_structure_service.py @@ -13,6 +13,8 @@ from sir import SirSnapshot, snapshot_to_json AI_STRUCTURE_VERSION = "1.0" _PARSEABLE_SUFFIXES = {".xml", ".mdo", ".bsl"} _BINARY_1C_SUFFIXES = {".cf", ".cfe"} +_CODEX_SOURCE_SUFFIXES = {".xml", ".mdo", ".bsl", ".json", ".txt"} +_MAX_CODEX_SOURCE_FILE_BYTES = 2_000_000 def prepare_ai_structure( @@ -58,7 +60,7 @@ def prepare_ai_structure( _write_json(output_path / "normalized_project.json", normalized.model_dump(mode="json")) _write_text(output_path / "ai_context.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(output_path / "export_plan.md", _export_plan_markdown(project_id, input_path, output_path, binaries, parseable)) - _write_codex_package(codex_root, manifest, files, snapshot, normalized, binaries, parseable) + _write_codex_package(codex_root, input_path, manifest, files, snapshot, normalized, binaries, parseable) return manifest @@ -135,6 +137,7 @@ def _codex_folder_name(project_id: str) -> str: def _write_codex_package( root: Path, + input_path: Path, manifest: dict[str, Any], files: list[dict[str, Any]], snapshot: SirSnapshot | None, @@ -147,12 +150,16 @@ def _write_codex_package( (root / "objects").mkdir(parents=True, exist_ok=True) (root / "modules").mkdir(parents=True, exist_ok=True) (root / "raw").mkdir(parents=True, exist_ok=True) + source_map = _copy_codex_sources(input_path, root / "source") _write_text(root / "AGENTS.md", _codex_agents_markdown(manifest)) _write_text(root / "README.md", _codex_readme_markdown(manifest)) + _write_text(root / "context" / "CODEX_START_HERE.md", _codex_start_here_markdown(manifest)) _write_text(root / "context" / "project-overview.md", _ai_context_markdown(manifest, snapshot, normalized)) _write_text(root / "context" / "export-plan.md", _export_plan_markdown(manifest["project_id"], Path(manifest["input_path"]), root, binaries, parseable)) _write_json(root / "indexes" / "manifest.json", manifest) + _write_json(root / "indexes" / "codex-navigation.json", _codex_navigation(manifest, source_map)) _write_json(root / "indexes" / "source-inventory.json", {"files": files}) + _write_json(root / "indexes" / "source-map.json", {"files": source_map}) if snapshot is not None: (root / "raw" / "sir_snapshot.json").write_bytes(snapshot_to_json(snapshot)) objects = _ai_objects(snapshot) @@ -168,6 +175,46 @@ def _write_codex_package( _write_json(root / "indexes" / "access-model.json", normalized.access.model_dump(mode="json")) +def _copy_codex_sources(input_path: Path, target: Path) -> list[dict[str, Any]]: + target.mkdir(parents=True, exist_ok=True) + source_files = [input_path] if input_path.is_file() else sorted(path for path in input_path.rglob("*") if path.is_file()) + copied: list[dict[str, Any]] = [] + for path in source_files: + suffix = path.suffix.casefold() + relative = path.name if input_path.is_file() else path.relative_to(input_path).as_posix() + if suffix not in _CODEX_SOURCE_SUFFIXES: + continue + size = path.stat().st_size + if size > _MAX_CODEX_SOURCE_FILE_BYTES: + copied.append( + { + "original_path": str(path), + "relative_path": relative, + "copied": False, + "reason": f"file is larger than {_MAX_CODEX_SOURCE_FILE_BYTES} bytes", + "size": size, + } + ) + continue + destination = target / relative + destination.parent.mkdir(parents=True, exist_ok=True) + try: + text = path.read_text(encoding="utf-8-sig") + except UnicodeDecodeError: + text = path.read_text(encoding="cp1251", errors="replace") + destination.write_text(text, encoding="utf-8") + copied.append( + { + "original_path": str(path), + "relative_path": relative, + "codex_path": f"source/{relative}", + "copied": True, + "size": size, + } + ) + return copied + + def _codex_agents_markdown(manifest: dict[str, Any]) -> str: return f"""# AGENTS.md for 1C context package @@ -178,6 +225,8 @@ This folder is generated by SFERA for Codex. - Treat this package as read-only context for project `{manifest['project_id']}`. - Start with `README.md` and `context/project-overview.md`. - Use `indexes/objects.json`, `indexes/modules.json`, and `indexes/edges.json` for precise navigation. +- Use `source/` for local copied BSL/XML/MDO text. Do not rely on the absolute source path from the machine that generated the package. +- Use `indexes/source-map.json` to map original source paths to local `source/...` paths. - Use `raw/normalized_project.json` as the authoritative 1C metadata model when present. - 1C modules, forms, commands, реквизиты, табличные части and rights are parts of owner 1C objects. Do not treat a form module as an independent detached source file. - When writing BSL, preserve the owner object context from `qualified_name`, `lineage_id`, and `source`. @@ -188,6 +237,7 @@ This folder is generated by SFERA for Codex. - `context/project-overview.md` - compact human context. - `context/metadata-tree.md` - metadata tree extracted from NormalizedProject. - `indexes/*.json` - machine-readable indexes for Codex search and reasoning. +- `source/` - local UTF-8 copies of BSL/XML/MDO source files. - `objects/*.md` - object-level summaries. - `modules/*.md` - module-level summaries. - `raw/*.json` - full raw SFERA model. @@ -208,6 +258,7 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str: f"- Normalized objects: {normalized.get('objects', 0)}", "", "Copy this whole folder into the Codex project when you want Codex to write code for this 1C configuration.", + "The package includes a local `source/` folder, so Codex can inspect BSL/XML/MDO files after the folder is moved.", ] if manifest.get("diagnostics"): lines.extend(["", "## Diagnostics"]) @@ -215,6 +266,53 @@ def _codex_readme_markdown(manifest: dict[str, Any]) -> str: return "\n".join(lines) + "\n" +def _codex_start_here_markdown(manifest: dict[str, Any]) -> str: + return f"""# Start Here For Codex + +Project: `{manifest['project_id']}` +Status: `{manifest['status']}` + +Read in this order: + +1. `AGENTS.md` +2. `README.md` +3. `context/project-overview.md` +4. `context/metadata-tree.md` +5. `indexes/objects.json` +6. `indexes/modules.json` +7. `indexes/edges.json` +8. `source/` + +When generating code: + +- Locate the owner 1C object first. +- Then inspect its module/form/command context. +- Prefer local copied files under `source/` for exact source text. +- Use `raw/normalized_project.json` when object structure matters more than raw XML layout. +- Use `indexes/source-map.json` if you need to map SFERA source references to local package paths. +""" + + +def _codex_navigation(manifest: dict[str, Any], source_map: list[dict[str, Any]]) -> dict[str, Any]: + copied_sources = [item for item in source_map if item.get("copied")] + return { + "project_id": manifest["project_id"], + "status": manifest["status"], + "start_here": "context/CODEX_START_HERE.md", + "instructions": "AGENTS.md", + "overview": "context/project-overview.md", + "metadata_tree": "context/metadata-tree.md", + "objects_index": "indexes/objects.json", + "modules_index": "indexes/modules.json", + "edges_index": "indexes/edges.json", + "source_map": "indexes/source-map.json", + "raw_normalized_project": "raw/normalized_project.json", + "raw_sir_snapshot": "raw/sir_snapshot.json", + "local_source_count": len(copied_sources), + "first_sources": [item.get("codex_path") for item in copied_sources[:25]], + } + + def _write_object_markdown_files(root: Path, objects: list[dict[str, Any]]) -> None: for item in objects[:1000]: filename = _safe_context_filename(str(item.get("qualified_name") or item.get("name") or "object")) + ".md" @@ -233,6 +331,7 @@ def _safe_context_filename(value: str) -> str: def _object_markdown(item: dict[str, Any]) -> str: + local_source = _local_source_path(item) return "\n".join( [ f"# {item.get('qualified_name') or item.get('name')}", @@ -242,6 +341,7 @@ def _object_markdown(item: dict[str, Any]) -> str: f"- Lineage: `{item.get('lineage_id')}`", f"- Semantic: `{item.get('semantic_id')}`", f"- Source: `{item.get('source')}`", + f"- Local source: `{local_source}`", "", "## Attributes", "```json", @@ -252,6 +352,7 @@ def _object_markdown(item: dict[str, Any]) -> str: def _module_markdown(item: dict[str, Any]) -> str: + local_source = _local_source_path(item) return "\n".join( [ f"# {item.get('qualified_name') or item.get('name')}", @@ -259,6 +360,7 @@ def _module_markdown(item: dict[str, Any]) -> str: f"- Name: `{item.get('name')}`", f"- Lineage: `{item.get('lineage_id')}`", f"- Source: `{item.get('source')}`", + f"- Local source: `{local_source}`", "", "## Module Attributes", "```json", @@ -268,6 +370,14 @@ def _module_markdown(item: dict[str, Any]) -> str: ) + "\n" +def _local_source_path(item: dict[str, Any]) -> str: + source = item.get("source") or {} + if not isinstance(source, dict): + return "" + source_path = str(source.get("source_path") or "") + return f"source/{source_path}" if source_path else "" + + def _normalized_tree_markdown(normalized: NormalizedProject) -> str: lines = [f"# Metadata Tree: {normalized.project_id or 'project'}", ""] for group in normalized.configuration.groups: diff --git a/services/api-server/tests/test_api.py b/services/api-server/tests/test_api.py index 64ff81e..0d19021 100644 --- a/services/api-server/tests/test_api.py +++ b/services/api-server/tests/test_api.py @@ -1708,10 +1708,16 @@ def test_ai_structure_prepare_writes_ai_ready_package(tmp_path: Path): assert (output / "sir_snapshot.json").exists() assert (codex_package / "AGENTS.md").exists() assert (codex_package / "README.md").exists() + assert (codex_package / "context" / "CODEX_START_HERE.md").exists() assert (codex_package / "context" / "project-overview.md").exists() + assert (codex_package / "indexes" / "codex-navigation.json").exists() assert (codex_package / "indexes" / "objects.json").exists() + assert (codex_package / "indexes" / "source-map.json").exists() assert (codex_package / "raw" / "normalized_project.json").exists() + assert (codex_package / "source" / "metadata.xml").exists() + assert (codex_package / "source" / "Интеграция.bsl").read_text(encoding="utf-8").startswith("Процедура") assert "generated by SFERA for Codex" in (codex_package / "AGENTS.md").read_text(encoding="utf-8") + assert "Use `source/`" in (codex_package / "AGENTS.md").read_text(encoding="utf-8") assert "Copy this whole folder into the Codex project" in (codex_package / "README.md").read_text(encoding="utf-8") assert "Treat modules/forms/commands as parts" in (output / "ai_context.md").read_text(encoding="utf-8")