sfera/packages/semantic-kernel/src/semantic_kernel/__init__.py

from __future__ import annotations

import hashlib
import json
import os
import re
import shutil
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
import xml.etree.ElementTree as ET

from one_c_normalizer import parse_one_c_xml_file
from sir import (
    Diagnostic,
    DiagnosticSeverity,
    EdgeKind,
    NodeKind,
    SemanticEdge,
    SemanticNode,
    SirSnapshot,
    SnapshotMetadata,
    SourceRef,
    ReferenceKind,
    UnresolvedReference,
    compute_snapshot_hash,
    make_lineage_id,
    make_semantic_id,
    validate_snapshot,
)


_ROUTINE_START_RE = re.compile(
    r"^\s*(?P<kind>Процедура|Procedure|Функция|Function)\s+"
    r"(?P<name>[A-Za-zА-Яа-я_][\wА-Яа-я]*)",
    re.IGNORECASE,
)
_ROUTINE_END_RE = re.compile(r"^\s*(КонецПроцедуры|EndProcedure|КонецФункции|EndFunction)\b", re.IGNORECASE)
_CALL_RE = re.compile(r"^\s*(?P<name>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\s*\([^)]*\)\s*;")
_ASSIGNMENT_CALL_RE = re.compile(
    r"=\s*(?P<name>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\s*\([^)]*\)\s*;",
    re.IGNORECASE,
)
_CONDITION_CALL_RE = re.compile(
    r"^\s*(Если|If)\b.*?(?<!\.)(?P<name>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\s*\([^)]*\).*?(Тогда|Then)\b",
    re.IGNORECASE,
)
_OBJECT_CREATE_RE = re.compile(
    r"^\s*(?P<var>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\s*=\s*"
    r"(?P<factory>Справочники|Catalogs|Документы|Documents|"
    r"РегистрыСведений|InformationRegisters|РегистрыНакопления|AccumulationRegisters|"
    r"РегистрыБухгалтерии|AccountingRegisters|РегистрыРасчета|CalculationRegisters)\."
    r"(?P<name>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\."
    r"(?P<method>СоздатьЭлемент|CreateItem|СоздатьДокумент|CreateDocument|"
    r"СоздатьНаборЗаписей|CreateRecordSet)\s*\(",
    re.IGNORECASE,
)
_QUERY_TEXT_RE = re.compile(r"\.(Текст|Text)\b", re.IGNORECASE)
_INLINE_NEW_QUERY_RE = re.compile(r"(Новый\s+Запрос|New\s+Query)\s*\(\s*\"(?P<query>.*?)\"\s*\)", re.IGNORECASE)
_FROM_RE = re.compile(r"^\s*(ИЗ|FROM)\s*$", re.IGNORECASE)
_URL_RE = re.compile(r"https?://[^\"'\s;]+", re.IGNORECASE)
_RUST_PARSER_ENV = "SFERA_BSL_PARSER"
_METADATA_OWNER_KINDS = {
    NodeKind.CATALOG,
    NodeKind.DOCUMENT,
    NodeKind.REGISTER,
    NodeKind.COMMON_MODULE,
    NodeKind.CONSTANT,
    NodeKind.DOCUMENT_JOURNAL,
    NodeKind.ENUM,
    NodeKind.REPORT,
    NodeKind.DATA_PROCESSOR,
    NodeKind.CHART_OF_CHARACTERISTIC_TYPES,
    NodeKind.CHART_OF_ACCOUNTS,
    NodeKind.CHART_OF_CALCULATION_TYPES,
    NodeKind.EXCHANGE_PLAN,
    NodeKind.EXTERNAL_DATA_SOURCE,
    NodeKind.SCHEDULED_JOB,
    NodeKind.BUSINESS_PROCESS,
    NodeKind.TASK,
}
_PATH_METADATA_ALIASES = {
    "catalogs": ("Справочник", NodeKind.CATALOG),
    "справочники": ("Справочник", NodeKind.CATALOG),
    "documents": ("Документ", NodeKind.DOCUMENT),
    "документы": ("Документ", NodeKind.DOCUMENT),
    "constants": ("Константа", NodeKind.CONSTANT),
    "константы": ("Константа", NodeKind.CONSTANT),
    "documentjournals": ("ЖурналДокументов", NodeKind.DOCUMENT_JOURNAL),
    "журналыдокументов": ("ЖурналДокументов", NodeKind.DOCUMENT_JOURNAL),
    "enums": ("Перечисление", NodeKind.ENUM),
    "перечисления": ("Перечисление", NodeKind.ENUM),
    "reports": ("Отчет", NodeKind.REPORT),
    "отчеты": ("Отчет", NodeKind.REPORT),
    "dataprocessors": ("Обработка", NodeKind.DATA_PROCESSOR),
    "обработки": ("Обработка", NodeKind.DATA_PROCESSOR),
    "chartsofcharacteristictypes": ("ПланВидовХарактеристик", NodeKind.CHART_OF_CHARACTERISTIC_TYPES),
    "планывидовхарактеристик": ("ПланВидовХарактеристик", NodeKind.CHART_OF_CHARACTERISTIC_TYPES),
    "chartsofaccounts": ("ПланСчетов", NodeKind.CHART_OF_ACCOUNTS),
    "планысчетов": ("ПланСчетов", NodeKind.CHART_OF_ACCOUNTS),
    "chartsofcalculationtypes": ("ПланВидовРасчета", NodeKind.CHART_OF_CALCULATION_TYPES),
    "планывидоврасчета": ("ПланВидовРасчета", NodeKind.CHART_OF_CALCULATION_TYPES),
    "accumulationregisters": ("РегистрНакопления", NodeKind.REGISTER),
    "регистрынакопления": ("РегистрНакопления", NodeKind.REGISTER),
    "informationregisters": ("РегистрСведений", NodeKind.REGISTER),
    "регистрысведений": ("РегистрСведений", NodeKind.REGISTER),
    "accountingregisters": ("РегистрБухгалтерии", NodeKind.REGISTER),
    "регистрыбухгалтерии": ("РегистрБухгалтерии", NodeKind.REGISTER),
    "calculationregisters": ("РегистрРасчета", NodeKind.REGISTER),
    "регистрырасчета": ("РегистрРасчета", NodeKind.REGISTER),
    "commonmodules": ("ОбщийМодуль", NodeKind.COMMON_MODULE),
    "общиемодули": ("ОбщийМодуль", NodeKind.COMMON_MODULE),
    "exchangeplans": ("ПланОбмена", NodeKind.EXCHANGE_PLAN),
    "планыобмена": ("ПланОбмена", NodeKind.EXCHANGE_PLAN),
    "externaldatasources": ("ВнешнийИсточникДанных", NodeKind.EXTERNAL_DATA_SOURCE),
    "внешниеисточникиданных": ("ВнешнийИсточникДанных", NodeKind.EXTERNAL_DATA_SOURCE),
    "scheduledjobs": ("РегламентноеЗадание", NodeKind.SCHEDULED_JOB),
    "регламентныезадания": ("РегламентноеЗадание", NodeKind.SCHEDULED_JOB),
    "businessprocesses": ("БизнесПроцесс", NodeKind.BUSINESS_PROCESS),
    "бизнеспроцессы": ("БизнесПроцесс", NodeKind.BUSINESS_PROCESS),
    "tasks": ("Задача", NodeKind.TASK),
    "задачи": ("Задача", NodeKind.TASK),
}


@dataclass(frozen=True)
class ParsedRoutine:
    name: str
    is_function: bool
    export: bool
    line_start: int
    line_end: int
    calls: tuple[tuple[str, int], ...] = ()
    queries: tuple["ParsedQuery", ...] = ()
    writes: tuple["ParsedWrite", ...] = ()


@dataclass(frozen=True)
class ParsedQuery:
    text: str
    tables: tuple[str, ...]
    line_start: int
    line_end: int


@dataclass(frozen=True)
class ParsedWrite:
    target: str
    write_type: str
    line: int


@dataclass
class _RoutineBuilder:
    name: str
    is_function: bool
    export: bool
    line_start: int
    line_end: int
    calls: list[tuple[str, int]] = field(default_factory=list)
    queries: list[ParsedQuery] = field(default_factory=list)
    writes: list[ParsedWrite] = field(default_factory=list)

    def freeze(self) -> ParsedRoutine:
        return ParsedRoutine(
            name=self.name,
            is_function=self.is_function,
            export=self.export,
            line_start=self.line_start,
            line_end=self.line_end,
            calls=tuple(self.calls),
            queries=tuple(self.queries),
            writes=tuple(self.writes),
        )


def index_project(path: str | Path, *, project_id: str | None = None, structure_only: bool = False) -> SirSnapshot:
    root = Path(path)
    source_files = [] if structure_only else (
        [root] if root.is_file() and root.suffix.lower() == ".bsl" else sorted(root.rglob("*.bsl"))
    )
    xml_suffixes = {".xml", ".mdo", ".form"}
    xml_root = root / "src" if structure_only and (root / "src").exists() else root
    xml_files = [root] if root.is_file() and root.suffix.lower() in xml_suffixes else sorted(
        file for file in xml_root.rglob("*") if file.is_file() and file.suffix.lower() in xml_suffixes
    )
    project = project_id or root.stem or "sfera"

    nodes: list[SemanticNode] = []
    edges: list[SemanticEdge] = []
    unresolved_references: list[UnresolvedReference] = []
    routine_by_name: dict[str, SemanticNode] = {}
    routine_by_source_and_name: dict[tuple[str, str], SemanticNode] = {}
    module_nodes: dict[str, SemanticNode] = {}
    routines_by_source: dict[str, list[ParsedRoutine]] = {}
    diagnostics: list[Diagnostic] = []
    metadata_nodes: list[SemanticNode] = []
    scheduled_job_nodes: list[SemanticNode] = []
    command_nodes: list[SemanticNode] = []
    form_nodes: list[SemanticNode] = []
    role_rights: list[dict] = []

    for source_file in source_files:
        text = _read_text_file(source_file)
        source_path = source_file.as_posix()
        source_hash = _source_hash(text)
        routines = parse_bsl_module_file(source_file, text)
        diagnostics.extend(_bsl_parse_diagnostics(source_path, text, source_hash))
        routines_by_source[source_path] = routines
        module_name = source_file.stem
        module_key = f"{project}:{source_path}"
        module = _node(
            NodeKind.MODULE,
            module_name,
            module_name,
            module_key,
            SourceRef(source_path=source_path, source_hash=source_hash),
            {"source_text": text},
        )
        module_nodes[source_path] = module
        nodes.append(module)
        integration_nodes, integration_edges = _module_integration_graph(module, text, source_path, source_hash)
        nodes.extend(integration_nodes)
        edges.extend(integration_edges)

        for routine in routines:
            routine_kind = NodeKind.FUNCTION if routine.is_function else NodeKind.PROCEDURE
            qualified_name = f"{module_name}.{routine.name}"
            routine_node = _node(
                routine_kind,
                routine.name,
                qualified_name,
                f"{module_key}:{routine.name}",
                SourceRef(
                    source_path=source_path,
                    line_start=routine.line_start,
                    line_end=routine.line_end,
                    column_start=1,
                    source_hash=source_hash,
                ),
                {"export": routine.export},
            )
            nodes.append(routine_node)
            routine_key = routine.name.casefold()
            routine_by_name.setdefault(routine_key, routine_node)
            routine_by_source_and_name[(source_path, routine_key)] = routine_node
            edges.append(_edge(EdgeKind.DECLARES, module, routine_node, source_path, routine.line_start))

            for index, query in enumerate(routine.queries, start=1):
                query_node = _node(
                    NodeKind.QUERY,
                    f"{routine.name}.query{index}",
                    f"{qualified_name}.query{index}",
                    f"{module_key}:{routine.name}:query:{index}",
                    SourceRef(
                        source_path=source_path,
                        line_start=query.line_start,
                        line_end=query.line_end,
                        source_hash=source_hash,
                    ),
                    {"query_text": query.text},
                )
                nodes.append(query_node)
                edges.append(_edge(EdgeKind.OWNS_QUERY, routine_node, query_node, source_path, query.line_start))
                for table in query.tables:
                    table_node = _table_node(table, source_path, source_hash)
                    if table_node.lineage_id not in {node.lineage_id for node in nodes}:
                        nodes.append(table_node)
                    edges.append(_edge(EdgeKind.READS_TABLE, query_node, table_node, source_path, query.line_start))

            for write in routine.writes:
                target_node = _write_target_node(write, source_path, source_hash)
                if target_node.lineage_id not in {node.lineage_id for node in nodes}:
                    nodes.append(target_node)
                edges.append(
                    _edge(
                        EdgeKind.WRITES,
                        routine_node,
                        target_node,
                        source_path,
                        write.line,
                        {"write_type": write.write_type},
                    )
                )

    for source_file in xml_files:
        source_path = source_file.as_posix()
        text = _read_text_file(source_file)
        source_hash = _source_hash(text)
        parent_by_prefix: dict[str, SemanticNode] = {}
        try:
            xml_objects = parse_one_c_xml_file(source_file)
        except (OSError, UnicodeDecodeError, ET.ParseError) as error:
            diagnostics.append(
                _diagnostic(
                    "XML_PARSE_ERROR",
                    DiagnosticSeverity.ERROR,
                    f"Cannot parse 1C metadata XML: {error}",
                    source_path,
                    1,
                    source_hash,
                )
            )
            continue
        for xml_object in xml_objects:
            if xml_object.object_kind == "RIGHT":
                role_rights.append(xml_object.attributes)
                continue
            kind = _xml_node_kind(xml_object.object_kind)
            if kind is None:
                continue
            node = _node(
                kind,
                xml_object.name,
                xml_object.qualified_name,
                f"{project}:{source_path}:{xml_object.object_kind}:{xml_object.qualified_name}",
                SourceRef(source_path=source_path, source_hash=source_hash),
                xml_object.attributes,
            )
            nodes.append(node)
            if kind in _METADATA_OWNER_KINDS:
                metadata_nodes.append(node)
            if kind == NodeKind.SCHEDULED_JOB:
                scheduled_job_nodes.append(node)
            if kind == NodeKind.COMMAND:
                command_nodes.append(node)
            if kind == NodeKind.FORM:
                form_nodes.append(node)
            parent = _find_xml_parent(parent_by_prefix, node.qualified_name)
            if parent is not None:
                edges.append(_edge(_xml_edge_kind(kind), parent, node, source_path, 1))

            if kind in {
                NodeKind.CATALOG,
                NodeKind.DOCUMENT,
                NodeKind.REGISTER,
                NodeKind.COMMON_MODULE,
                NodeKind.CONSTANT,
                NodeKind.DOCUMENT_JOURNAL,
                NodeKind.ENUM,
                NodeKind.REPORT,
                NodeKind.DATA_PROCESSOR,
                NodeKind.CHART_OF_CHARACTERISTIC_TYPES,
                NodeKind.CHART_OF_ACCOUNTS,
                NodeKind.CHART_OF_CALCULATION_TYPES,
                NodeKind.EXCHANGE_PLAN,
                NodeKind.EXTERNAL_DATA_SOURCE,
                NodeKind.SCHEDULED_JOB,
                NodeKind.BUSINESS_PROCESS,
                NodeKind.TASK,
                NodeKind.ROLE,
                NodeKind.FORM,
                NodeKind.TABULAR_SECTION,
            }:
                parent_by_prefix[node.qualified_name] = node

    edges.extend(_link_metadata_to_modules(root, module_nodes, metadata_nodes, form_nodes))
    edges.extend(_link_role_rights(nodes, role_rights))
    edges.extend(_link_scheduled_jobs_to_routines(scheduled_job_nodes, routine_by_name))
    edges.extend(_link_commands_to_handlers(command_nodes, routine_by_name))
    edges.extend(_link_forms_to_handlers(form_nodes, routine_by_name))

    for source_file in source_files:
        source_path = source_file.as_posix()
        module = module_nodes[source_path]
        for routine in routines_by_source[source_path]:
            caller = _find_declared_routine(nodes, module.name, routine.name)
            if caller is None:
                continue
            for callee_name, line in routine.calls:
                callee_key = callee_name.casefold()
                callee = routine_by_source_and_name.get((source_path, callee_key)) or routine_by_name.get(callee_key)
                if callee is not None:
                    edges.append(_edge(EdgeKind.CALLS, caller, callee, source_path, line))
                else:
                    unresolved_references.append(
                        UnresolvedReference(
                            reference_id=f"ref.{_stable_hash(f'{caller.lineage_id}:{callee_name}:{line}')}",
                            kind=ReferenceKind.CALL,
                            source_lineage=caller.lineage_id,
                            target_name=callee_name,
                            source_ref=SourceRef(
                                source_path=source_path,
                                line_start=line,
                                line_end=line,
                            ),
                        )
                    )

    deduped_nodes, lineage_aliases = _dedupe_nodes_with_aliases(nodes)
    deduped_edges, edge_diagnostics = _remap_and_dedupe_edges(edges, deduped_nodes, lineage_aliases)
    diagnostics.extend(edge_diagnostics)

    snapshot = SirSnapshot(
        snapshot_id=f"snapshot.{_stable_hash(project + ':' + str(root))}",
        project_id=project,
        metadata=SnapshotMetadata(source_root=root.as_posix()),
        nodes=deduped_nodes,
        edges=deduped_edges,
        diagnostics=diagnostics,
        unresolved_references=unresolved_references,
    )
    snapshot.snapshot_hash = compute_snapshot_hash(snapshot)
    validate_snapshot(snapshot)
    return snapshot


def parse_bsl_module_file(source_file: str | Path, source: str | None = None) -> list[ParsedRoutine]:
    parser_path = resolve_rust_bsl_parser(source_file)
    source_text = source if source is not None else _read_text_file(Path(source_file))
    if parser_path:
        rust_routines = parse_bsl_module_from_rust_json(_run_rust_bsl_parser(parser_path, Path(source_file)))
        return _merge_inline_query_fallback(rust_routines, parse_bsl_module(source_text))
    return parse_bsl_module(source_text)


def _merge_inline_query_fallback(
    primary: list[ParsedRoutine],
    fallback: list[ParsedRoutine],
) -> list[ParsedRoutine]:
    fallback_by_name = {routine.name.casefold(): routine for routine in fallback}
    result: list[ParsedRoutine] = []
    seen: set[str] = set()
    for routine in primary:
        key = routine.name.casefold()
        seen.add(key)
        fallback_routine = fallback_by_name.get(key)
        queries = routine.queries
        if fallback_routine is not None and len(fallback_routine.queries) > len(queries):
            queries = fallback_routine.queries
        result.append(
            ParsedRoutine(
                name=routine.name,
                is_function=routine.is_function,
                export=routine.export,
                line_start=routine.line_start,
                line_end=routine.line_end,
                calls=routine.calls,
                queries=queries,
                writes=routine.writes,
            )
        )
    result.extend(routine for routine in fallback if routine.name.casefold() not in seen)
    return result


def resolve_rust_bsl_parser(source_file: str | Path | None = None) -> str | None:
    configured = os.getenv(_RUST_PARSER_ENV)
    if configured:
        return configured
    return _auto_discovered_rust_bsl_parser(Path(source_file) if source_file is not None else None)


def _auto_discovered_rust_bsl_parser(source_file: Path | None = None) -> str | None:
    binary_name = "bsl-parser.exe" if os.name == "nt" else "bsl-parser"
    candidates: list[Path] = []

    if source_file is not None:
        for parent in [source_file.resolve().parent, *source_file.resolve().parents]:
            candidates.append(parent / "rust" / "target" / "debug" / binary_name)

    package_file = Path(__file__).resolve()
    for parent in [package_file.parent, *package_file.parents]:
        candidates.append(parent / "rust" / "target" / "debug" / binary_name)

    for candidate in candidates:
        if candidate.is_file():
            return str(candidate)

    return shutil.which("bsl-parser")


def parse_bsl_module_from_rust_json(payload: str | dict) -> list[ParsedRoutine]:
    data = json.loads(payload) if isinstance(payload, str) else payload
    routines_by_name: dict[str, _RoutineBuilder] = {}

    for procedure in data.get("procedures", []):
        source_range = procedure.get("source_range", {})
        name = str(procedure["name"])
        routines_by_name[name.lower()] = _RoutineBuilder(
            name=name,
            is_function=bool(procedure.get("is_function")),
            export=bool(procedure.get("export")),
            line_start=int(source_range.get("line_start", 0)),
            line_end=int(source_range.get("line_end", source_range.get("line_start", 0))),
        )

    for call in data.get("calls", []):
        caller = routines_by_name.get(str(call.get("caller", "")).lower())
        if caller is None:
            continue
        source_range = call.get("source_range", {})
        caller.calls.append((str(call["callee"]), int(source_range.get("line_start", 0))))

    for query in data.get("queries", []):
        owner = routines_by_name.get(str(query.get("owner_procedure", "")).lower())
        if owner is None:
            continue
        source_range = query.get("source_range", {})
        owner.queries.append(
            ParsedQuery(
                text=str(query.get("query_text", "")),
                tables=tuple(str(table) for table in query.get("tables", [])),
                line_start=int(source_range.get("line_start", 0)),
                line_end=int(source_range.get("line_end", source_range.get("line_start", 0))),
            )
        )

    for write in data.get("writes", []):
        owner = routines_by_name.get(str(write.get("owner_procedure", "")).lower())
        if owner is None:
            continue
        source_range = write.get("source_range", {})
        owner.writes.append(
            ParsedWrite(
                target=str(write.get("target", "unknown")),
                write_type=str(write.get("write_type", "OBJECT_WRITE")),
                line=int(source_range.get("line_start", 0)),
            )
        )

    return [builder.freeze() for builder in routines_by_name.values()]


def _run_rust_bsl_parser(parser_path: str, source_file: Path) -> str:
    command = [parser_path, str(source_file)]
    completed = subprocess.run(command, check=False, capture_output=True, text=True, encoding="utf-8")
    if completed.returncode != 0:
        message = completed.stderr.strip() or completed.stdout.strip() or f"exit code {completed.returncode}"
        raise RuntimeError(f"Rust BSL parser failed for {source_file}: {message}")
    return completed.stdout


def parse_bsl_module(source: str) -> list[ParsedRoutine]:
    routines: list[ParsedRoutine] = []
    current: _RoutineBuilder | None = None
    collecting_query = False
    query_start = 0
    query_lines: list[str] = []
    object_targets: dict[str, str] = {}

    for line_no, line in enumerate(source.splitlines(), start=1):
        if match := _ROUTINE_START_RE.match(line):
            current = _RoutineBuilder(
                name=match.group("name"),
                is_function=match.group("kind").lower() in {"функция", "function"},
                export=_routine_has_export(line),
                line_start=line_no,
                line_end=line_no,
            )
            object_targets = {}
            continue

        if current is None:
            continue

        current.line_end = line_no
        stripped = line.strip()
        if _ROUTINE_END_RE.match(line):
            if collecting_query:
                _append_query(current, query_lines, query_start, line_no)
                collecting_query = False
                query_lines = []
            routines.append(current.freeze())
            current = None
            continue

        if collecting_query:
            query_lines.append(_clean_query_line(stripped))
            if stripped.endswith(";"):
                _append_query(current, query_lines, query_start, line_no)
                collecting_query = False
                query_lines = []
            continue

        if object_target := _extract_object_create_target(stripped):
            variable, target = object_target
            object_targets[variable.lower()] = target

        if inline_query := _inline_new_query_text(stripped):
            _append_query(current, [_clean_query_line(inline_query)], line_no, line_no)
            continue

        if _QUERY_TEXT_RE.search(line):
            collecting_query = True
            query_start = line_no
            inline_query = _query_text_after_assignment(stripped)
            if inline_query:
                query_lines.append(_clean_query_line(inline_query))
                if stripped.endswith(";"):
                    _append_query(current, query_lines, query_start, line_no)
                    collecting_query = False
                    query_lines = []
            continue

        if call_match := _CALL_RE.match(line):
            callee = call_match.group("name")
            if "." not in stripped and callee.lower() not in {"if", "если"}:
                current.calls.append((callee, line_no))
        elif call_match := _ASSIGNMENT_CALL_RE.search(line):
            callee = call_match.group("name")
            if "." not in stripped:
                current.calls.append((callee, line_no))
        elif call_match := _CONDITION_CALL_RE.search(line):
            current.calls.append((call_match.group("name"), line_no))

        if write := _extract_object_write(stripped, object_targets):
            current.writes.append(ParsedWrite(write[0], write[1], line_no))
        elif write := _extract_write(stripped):
            current.writes.append(ParsedWrite(write[0], write[1], line_no))

    return routines


def _bsl_parse_diagnostics(source_path: str, source: str, source_hash: str) -> list[Diagnostic]:
    diagnostics: list[Diagnostic] = []
    routine_stack: list[tuple[str, int]] = []
    collecting_query = False
    query_start = 0

    for line_no, line in enumerate(source.splitlines(), start=1):
        stripped = line.strip()
        if _ROUTINE_START_RE.match(line):
            if routine_stack:
                name, start = routine_stack[-1]
                diagnostics.append(
                    _diagnostic(
                        "BSL_NESTED_ROUTINE",
                        DiagnosticSeverity.ERROR,
                        f"Routine starts before previous routine is closed: {name}",
                        source_path,
                        line_no,
                        source_hash,
                        {"open_routine_line": start},
                    )
                )
            routine_stack.append((_ROUTINE_START_RE.match(line).group("name"), line_no))
            continue

        if collecting_query and stripped.endswith(";"):
            collecting_query = False

        if routine_stack and _ROUTINE_END_RE.match(line):
            if collecting_query:
                diagnostics.append(
                    _diagnostic(
                        "BSL_UNCLOSED_QUERY",
                        DiagnosticSeverity.ERROR,
                        "Query text assignment is not closed before routine end",
                        source_path,
                        query_start,
                        source_hash,
                    )
                )
                collecting_query = False
            routine_stack.pop()
            continue

        if routine_stack and not collecting_query and _QUERY_TEXT_RE.search(line):
            collecting_query = True
            query_start = line_no
            if stripped.endswith(";"):
                collecting_query = False

    if collecting_query:
        diagnostics.append(
            _diagnostic(
                "BSL_UNCLOSED_QUERY",
                DiagnosticSeverity.ERROR,
                "Query text assignment is not closed before end of file",
                source_path,
                query_start,
                source_hash,
            )
        )

    for routine_name, line_start in routine_stack:
        diagnostics.append(
            _diagnostic(
                "BSL_UNCLOSED_ROUTINE",
                DiagnosticSeverity.ERROR,
                f"Routine is not closed: {routine_name}",
                source_path,
                line_start,
                source_hash,
            )
        )

    return diagnostics


def _diagnostic(
    code: str,
    severity: DiagnosticSeverity,
    message: str,
    source_path: str,
    line: int,
    source_hash: str,
    attributes: dict | None = None,
) -> Diagnostic:
    return Diagnostic(
        diagnostic_id=f"diag.{_stable_hash(f'{code}:{source_path}:{line}:{message}')}",
        code=code,
        severity=severity,
        message=message,
        source_ref=SourceRef(
            source_path=source_path,
            line_start=line,
            line_end=line,
            column_start=1,
            source_hash=source_hash,
        ),
        attributes=attributes or {},
    )


def _append_query(current: _RoutineBuilder, lines: list[str], start: int, end: int) -> None:
    text = "\n".join(line for line in lines if line)
    current.queries.append(ParsedQuery(text=text, tables=tuple(_extract_tables(text)), line_start=start, line_end=end))


def _clean_query_line(line: str) -> str:
    value = line.strip().rstrip(";").strip().strip('"').strip()
    if value.startswith("|"):
        value = value[1:].strip()
    return value.strip('"').strip()


def _query_text_after_assignment(line: str) -> str:
    if "=" not in line:
        return ""
    return line.split("=", 1)[1].strip()


def _inline_new_query_text(line: str) -> str:
    match = _INLINE_NEW_QUERY_RE.search(line)
    return match.group("query") if match else ""


def _extract_tables(query_text: str) -> list[str]:
    lines = query_text.splitlines()
    tables: list[str] = []
    for index, line in enumerate(lines):
        inline_table = _table_after_from(line)
        if inline_table:
            tables.append(inline_table)
        join_table = _table_after_join(line)
        if join_table:
            tables.append(join_table)
        elif _FROM_RE.match(line) and index + 1 < len(lines):
            table = lines[index + 1].strip().split()[0].rstrip(",")
            if table:
                tables.append(table)
    return tables


def _table_after_from(line: str) -> str:
    match = re.search(r"\b(ИЗ|FROM)\s+(?P<table>[^\s,;]+)", line, re.IGNORECASE)
    return match.group("table").rstrip(",") if match else ""


def _table_after_join(line: str) -> str:
    match = re.search(r"\b(СОЕДИНЕНИЕ|JOIN)\s+(?P<table>[^\s,;]+)", line, re.IGNORECASE)
    return match.group("table").rstrip(",") if match else ""


def _extract_write(line: str) -> tuple[str, str] | None:
    lowered = line.lower()
    if "движения." in lowered and ".записать" in lowered:
        match = re.search(r"Движения\.([A-Za-zА-Яа-я0-9_]+)\.", line, re.IGNORECASE)
        return ((match.group(1) if match else "unknown"), "REGISTER_WRITE")
    if "movements." in lowered and ".write" in lowered:
        match = re.search(r"Movements\.([A-Za-zА-Яа-я0-9_]+)\.", line, re.IGNORECASE)
        return ((match.group(1) if match else "unknown"), "REGISTER_WRITE")
    if ".записать()" in lowered or ".write()" in lowered:
        return ("unknown", "OBJECT_WRITE")
    return None


def _extract_object_create_target(line: str) -> tuple[str, str] | None:
    match = _OBJECT_CREATE_RE.match(line)
    if not match:
        return None
    factory = match.group("factory").lower()
    name = match.group("name")
    prefixes = {
        "справочники": "Справочник",
        "catalogs": "Справочник",
        "документы": "Документ",
        "documents": "Документ",
        "регистрысведений": "РегистрСведений",
        "informationregisters": "РегистрСведений",
        "регистрынакопления": "РегистрНакопления",
        "accumulationregisters": "РегистрНакопления",
        "регистрыбухгалтерии": "РегистрБухгалтерии",
        "accountingregisters": "РегистрБухгалтерии",
        "регистрырасчета": "РегистрРасчета",
        "calculationregisters": "РегистрРасчета",
    }
    prefix = prefixes[factory]
    return match.group("var"), f"{prefix}.{name}"


def _extract_object_write(line: str, object_targets: dict[str, str]) -> tuple[str, str] | None:
    match = re.match(
        r"^\s*(?P<var>[A-Za-zА-Яа-я_][\wА-Яа-я]*)\.(Записать|Write)\s*\(",
        line,
        re.IGNORECASE,
    )
    if not match:
        return None
    target = object_targets.get(match.group("var").lower())
    if not target:
        return None
    write_type = "REGISTER_WRITE" if target.lower().startswith("регистр") else "OBJECT_WRITE"
    return target, write_type


def _routine_has_export(line: str) -> bool:
    return bool(re.search(r"\b(Экспорт|Export)\b", line, re.IGNORECASE))


def _node(
    kind: NodeKind,
    name: str,
    qualified_name: str,
    stable_key: str,
    source_ref: SourceRef,
    attributes: dict | None = None,
) -> SemanticNode:
    return SemanticNode(
        semantic_id=make_semantic_id(kind.value, qualified_name),
        lineage_id=make_lineage_id(kind.value, stable_key),
        kind=kind,
        name=name,
        qualified_name=qualified_name,
        source_ref=source_ref,
        attributes=attributes or {},
    )


def _table_node(table: str, source_path: str, source_hash: str) -> SemanticNode:
    kind = NodeKind.REGISTER if table.lower().startswith("регистр") else NodeKind.TABLE
    return _node(
        kind,
        table.split(".")[-1],
        table,
        table.lower(),
        SourceRef(source_path=source_path, source_hash=source_hash),
    )


def _register_node(register: str, source_path: str, source_hash: str) -> SemanticNode:
    qualified_name = register if register.lower().startswith("регистр") else f"РегистрНакопления.{register}"
    return _node(
        NodeKind.REGISTER,
        register.split(".")[-1],
        qualified_name,
        qualified_name.lower(),
        SourceRef(source_path=source_path, source_hash=source_hash),
    )


def _write_target_node(write: ParsedWrite, source_path: str, source_hash: str) -> SemanticNode:
    if write.write_type == "OBJECT_WRITE":
        lowered = write.target.lower()
        if lowered.startswith("справочник.") or lowered.startswith("catalog."):
            return _metadata_reference_node(NodeKind.CATALOG, write.target, source_path, source_hash)
        if lowered.startswith("документ.") or lowered.startswith("document."):
            return _metadata_reference_node(NodeKind.DOCUMENT, write.target, source_path, source_hash)
    return _register_node(write.target, source_path, source_hash)


def _metadata_reference_node(
    kind: NodeKind,
    qualified_name: str,
    source_path: str,
    source_hash: str,
) -> SemanticNode:
    return _node(
        kind,
        qualified_name.split(".")[-1],
        qualified_name,
        qualified_name.lower(),
        SourceRef(source_path=source_path, source_hash=source_hash),
    )


def _module_integration_graph(
    module: SemanticNode,
    text: str,
    source_path: str,
    source_hash: str,
) -> tuple[list[SemanticNode], list[SemanticEdge]]:
    endpoints: list[tuple[str, str, dict]] = []
    for url in _URL_RE.findall(text):
        endpoints.append((url, "HTTP_SERVICE", {"url": url, "direction": "OUTBOUND"}))
    if "HTTPСоединение" in text or "HTTPConnection" in text:
        endpoints.append(("HTTPConnection", "HTTP_SERVICE", {"direction": "OUTBOUND"}))
    if "WSПрокси" in text or "WSProxy" in text or "WSСсылка" in text:
        endpoints.append(("WSProxy", "WEB_SERVICE", {"direction": "OUTBOUND"}))
    if "FTPСоединение" in text or "FTPConnection" in text:
        endpoints.append(("FTPConnection", "FILE_EXCHANGE", {"direction": "OUTBOUND"}))
    if "COMОбъект" in text or "COMObject" in text:
        endpoints.append(("COMObject", "COM_CONNECTOR", {"direction": "OUTBOUND"}))

    nodes: list[SemanticNode] = []
    edges: list[SemanticEdge] = []
    seen: set[tuple[str, str]] = set()
    for name, kind, attributes in endpoints:
        key = (name, kind)
        if key in seen:
            continue
        seen.add(key)
        endpoint = _node(
            NodeKind.INTEGRATION_ENDPOINT,
            name,
            f"{module.qualified_name}.{kind}.{name}",
            f"{module.lineage_id}:integration:{kind}:{name}",
            SourceRef(source_path=source_path, source_hash=source_hash),
            {"integration_kind": kind, **attributes},
        )
        nodes.append(endpoint)
        edges.append(
            _edge(
                EdgeKind.USES_INTEGRATION,
                module,
                endpoint,
                source_path,
                1,
                {"integration_kind": kind, **attributes},
            )
        )
    return nodes, edges


def _xml_node_kind(object_kind: str) -> NodeKind | None:
    return {
        "CATALOG": NodeKind.CATALOG,
        "DOCUMENT": NodeKind.DOCUMENT,
        "CONSTANT": NodeKind.CONSTANT,
        "DOCUMENT_JOURNAL": NodeKind.DOCUMENT_JOURNAL,
        "ENUM": NodeKind.ENUM,
        "REPORT": NodeKind.REPORT,
        "DATA_PROCESSOR": NodeKind.DATA_PROCESSOR,
        "CHART_OF_CHARACTERISTIC_TYPES": NodeKind.CHART_OF_CHARACTERISTIC_TYPES,
        "CHART_OF_ACCOUNTS": NodeKind.CHART_OF_ACCOUNTS,
        "CHART_OF_CALCULATION_TYPES": NodeKind.CHART_OF_CALCULATION_TYPES,
        "REGISTER": NodeKind.REGISTER,
        "INFORMATION_REGISTER": NodeKind.REGISTER,
        "ACCUMULATION_REGISTER": NodeKind.REGISTER,
        "ACCOUNTING_REGISTER": NodeKind.REGISTER,
        "CALCULATION_REGISTER": NodeKind.REGISTER,
        "COMMON_MODULE": NodeKind.COMMON_MODULE,
        "EXCHANGE_PLAN": NodeKind.EXCHANGE_PLAN,
        "EXTERNAL_DATA_SOURCE": NodeKind.EXTERNAL_DATA_SOURCE,
        "SCHEDULED_JOB": NodeKind.SCHEDULED_JOB,
        "BUSINESS_PROCESS": NodeKind.BUSINESS_PROCESS,
        "TASK": NodeKind.TASK,
        "SUBSYSTEM": NodeKind.SUBSYSTEM,
        "HTTP_SERVICE": NodeKind.HTTP_SERVICE,
        "XDTO_PACKAGE": NodeKind.XDTO_PACKAGE,
        "EXTENSION": NodeKind.EXTENSION,
        "LAYOUT": NodeKind.LAYOUT,
        "MOVEMENT": NodeKind.MOVEMENT,
        "ROLE": NodeKind.ROLE,
        "FORM": NodeKind.FORM,
        "COMMAND": NodeKind.COMMAND,
        "ATTRIBUTE": NodeKind.ATTRIBUTE,
        "TABULAR_SECTION": NodeKind.TABULAR_SECTION,
        "ELEMENT": NodeKind.FORM_ELEMENT,
    }.get(object_kind)


def _xml_edge_kind(kind: NodeKind) -> EdgeKind:
    if kind == NodeKind.FORM:
        return EdgeKind.HAS_FORM
    if kind == NodeKind.COMMAND:
        return EdgeKind.HAS_COMMAND
    if kind == NodeKind.ATTRIBUTE:
        return EdgeKind.HAS_ATTRIBUTE
    if kind == NodeKind.TABULAR_SECTION:
        return EdgeKind.HAS_TABULAR_SECTION
    if kind == NodeKind.ROLE:
        return EdgeKind.HAS_ROLE
    if kind == NodeKind.FORM_ELEMENT:
        return EdgeKind.HAS_ELEMENT
    return EdgeKind.CONTAINS


def _find_xml_parent(parents: dict[str, SemanticNode], qualified_name: str) -> SemanticNode | None:
    candidates = [
        parent
        for prefix, parent in parents.items()
        if qualified_name == prefix or qualified_name.startswith(f"{prefix}.")
    ]
    if not candidates:
        return None
    return max(candidates, key=lambda node: len(node.qualified_name))


def _edge(
    kind: EdgeKind,
    source: SemanticNode,
    target: SemanticNode,
    source_path: str,
    line: int,
    attributes: dict | None = None,
) -> SemanticEdge:
    key = f"{kind.value}:{source.lineage_id}:{target.lineage_id}:{source_path}:{line}"
    return SemanticEdge(
        edge_id=f"edge.{_stable_hash(key)}",
        kind=kind,
        source_lineage=source.lineage_id,
        target_lineage=target.lineage_id,
        source_ref=SourceRef(source_path=source_path, line_start=line, line_end=line),
        attributes=attributes or {},
    )


def _find_declared_routine(nodes: list[SemanticNode], module_name: str, routine_name: str) -> SemanticNode | None:
    qualified = f"{module_name}.{routine_name}"
    return next((node for node in nodes if node.qualified_name == qualified), None)


def _link_metadata_to_modules(
    root: Path,
    module_nodes: dict[str, SemanticNode],
    metadata_nodes: list[SemanticNode],
    form_nodes: list[SemanticNode],
) -> list[SemanticEdge]:
    if not metadata_nodes:
        return []

    by_qualified = {_normalize_lookup_key(node.qualified_name): node for node in metadata_nodes}
    by_kind_and_name = {
        (node.kind, _normalize_lookup_key(node.name)): node
        for node in metadata_nodes
    }
    forms_by_qualified = {_normalize_lookup_key(node.qualified_name): node for node in form_nodes}

    edges: list[SemanticEdge] = []
    for source_path, module in module_nodes.items():
        source_file = Path(source_path)
        owner = _find_metadata_module_owner(root, source_file, by_qualified, by_kind_and_name)
        if owner is None:
            continue
        line = module.source_ref.line_start or 1
        module_role = _module_role(source_file)
        form_name = _form_name_for_module(root, source_file)
        object_part = _module_object_part(module_role, form_name)
        module.attributes.update(
            {
                "owner_lineage_id": owner.lineage_id,
                "owner_qualified_name": owner.qualified_name,
                "owner_kind": owner.kind.value,
                "object_part": object_part,
                "module_role": module_role,
            }
        )
        if form_name:
            module.attributes["form_name"] = form_name
        edge_attributes = {
            "link_type": "METADATA_MODULE",
            "module_role": module_role,
            "object_part": object_part,
            "form_name": form_name,
        }
        edges.append(
            _edge(
                EdgeKind.CONTAINS,
                owner,
                module,
                source_path,
                line,
                edge_attributes,
            )
        )
        if module_role == "FORM_MODULE" and form_name:
            form_node = _find_form_node_for_module(owner, form_name, forms_by_qualified)
            if form_node is not None:
                module.attributes["form_lineage_id"] = form_node.lineage_id
                module.attributes["form_qualified_name"] = form_node.qualified_name
                edges.append(
                    _edge(
                        EdgeKind.CONTAINS,
                        form_node,
                        module,
                        source_path,
                        line,
                        {**edge_attributes, "link_type": "FORM_MODULE"},
                    )
                )
    return edges


def _find_form_node_for_module(
    owner: SemanticNode,
    form_name: str,
    forms_by_qualified: dict[str, SemanticNode],
) -> SemanticNode | None:
    candidates = [
        f"{owner.qualified_name}.{form_name}",
        f"{owner.qualified_name}.Форма.{form_name}",
    ]
    for candidate in candidates:
        form = forms_by_qualified.get(_normalize_lookup_key(candidate))
        if form is not None:
            return form
    suffix = f".{form_name}".casefold()
    return next(
        (
            form
            for key, form in forms_by_qualified.items()
            if key.endswith(suffix) and key.startswith(_normalize_lookup_key(owner.qualified_name))
        ),
        None,
    )


def _module_object_part(module_role: str, form_name: str = "") -> str:
    return {
        "OBJECT_MODULE": "object.module",
        "MANAGER_MODULE": "object.manager",
        "RECORD_SET_MODULE": "object.record_set",
        "FORM_MODULE": f"form.{form_name}.module" if form_name else "form.module",
        "MODULE": "module",
    }.get(module_role, "module")


def _link_role_rights(nodes: list[SemanticNode], role_rights: list[dict]) -> list[SemanticEdge]:
    if not role_rights:
        return []
    by_qualified = {_normalize_lookup_key(node.qualified_name): node for node in nodes}
    by_role_name = {
        _normalize_lookup_key(node.name): node
        for node in nodes
        if node.kind == NodeKind.ROLE
    }
    edges: list[SemanticEdge] = []
    for right in role_rights:
        role_name = str(right.get("role", ""))
        target_name = str(right.get("target", ""))
        role = by_qualified.get(_normalize_lookup_key(role_name)) or by_role_name.get(_normalize_lookup_key(role_name))
        target = by_qualified.get(_normalize_lookup_key(target_name))
        if role is None or target is None:
            continue
        attributes = {
            key: value
            for key, value in right.items()
            if key not in {"role", "target", "object", "Object", "metadata", "Metadata"}
        }
        edges.append(
            _edge(
                EdgeKind.GRANTS_ACCESS,
                role,
                target,
                role.source_ref.source_path,
                role.source_ref.line_start or 1,
                attributes,
            )
        )
    return edges


def _link_scheduled_jobs_to_routines(
    scheduled_jobs: list[SemanticNode],
    routine_by_name: dict[str, SemanticNode],
) -> list[SemanticEdge]:
    edges: list[SemanticEdge] = []
    for job in scheduled_jobs:
        routine_name = _scheduled_job_routine_name(job.attributes)
        if not routine_name:
            continue
        routine = routine_by_name.get(routine_name.casefold())
        if routine is None:
            continue
        edges.append(
            _edge(
                EdgeKind.RUNS,
                job,
                routine,
                job.source_ref.source_path,
                job.source_ref.line_start or 1,
                {"routine_name": routine_name},
            )
        )
    return edges


def _scheduled_job_routine_name(attributes: dict) -> str:
    for key in (
        "method",
        "Method",
        "methodName",
        "MethodName",
        "routine",
        "Routine",
        "routineName",
        "RoutineName",
        "handler",
        "Handler",
        "ИмяМетода",
        "Метод",
        "Процедура",
    ):
        value = attributes.get(key)
        if value:
            return str(value).split(".")[-1]
    return ""


def _link_commands_to_handlers(
    commands: list[SemanticNode],
    routine_by_name: dict[str, SemanticNode],
) -> list[SemanticEdge]:
    edges: list[SemanticEdge] = []
    for command in commands:
        handler_name = _command_handler_name(command.attributes)
        if not handler_name:
            continue
        handler = routine_by_name.get(handler_name.casefold())
        if handler is None:
            continue
        edges.append(
            _edge(
                EdgeKind.HANDLES,
                command,
                handler,
                command.source_ref.source_path,
                command.source_ref.line_start or 1,
                {"handler_name": handler_name},
            )
        )
    return edges


def _command_handler_name(attributes: dict) -> str:
    for key in (
        "action",
        "Action",
        "handler",
        "Handler",
        "method",
        "Method",
        "methodName",
        "MethodName",
        "Действие",
        "Обработчик",
        "Метод",
        "ИмяМетода",
    ):
        value = attributes.get(key)
        if value:
            return str(value).split(".")[-1]
    return ""


def _link_forms_to_handlers(
    forms: list[SemanticNode],
    routine_by_name: dict[str, SemanticNode],
) -> list[SemanticEdge]:
    edges: list[SemanticEdge] = []
    for form in forms:
        for source_key, handler_name in _form_handler_names(form.attributes):
            handler = routine_by_name.get(handler_name.casefold())
            if handler is None:
                continue
            edges.append(
                _edge(
                    EdgeKind.HANDLES,
                    form,
                    handler,
                    form.source_ref.source_path,
                    form.source_ref.line_start or 1,
                    {
                        "handler_name": handler_name,
                        "handler_source": source_key,
                        "link_type": "FORM_EVENT",
                    },
                )
            )
    return edges


def _form_handler_names(attributes: dict) -> list[tuple[str, str]]:
    handler_keys = {
        "oncreate",
        "onopen",
        "onclose",
        "beforeclose",
        "beforewrite",
        "afterwrite",
        "onread",
        "onchange",
        "event",
        "handler",
        "method",
        "methodname",
        "присозданиинсервере",
        "присозданиинаклиенте",
        "приоткрытии",
        "передзакрытием",
        "призакрытии",
        "передзаписью",
        "призаписи",
        "причтении",
        "приизменении",
        "событие",
        "обработчик",
        "метод",
        "имяметода",
    }
    handlers: list[tuple[str, str]] = []
    for key, value in attributes.items():
        if value and str(key).casefold() in handler_keys:
            handlers.append((str(key), str(value).split(".")[-1]))
    return handlers


def _find_metadata_module_owner(
    root: Path,
    source_file: Path,
    by_qualified: dict[str, SemanticNode],
    by_kind_and_name: dict[tuple[NodeKind, str], SemanticNode],
) -> SemanticNode | None:
    for qualified_name, kind, name in _metadata_owner_candidates(root, source_file):
        owner = by_qualified.get(_normalize_lookup_key(qualified_name))
        if owner is not None:
            return owner
        owner = by_kind_and_name.get((kind, _normalize_lookup_key(name)))
        if owner is not None:
            return owner
    return None


def _metadata_owner_candidates(root: Path, source_file: Path) -> list[tuple[str, NodeKind, str]]:
    relative = _relative_path(source_file, root)
    parts = list(relative.parts)
    normalized_parts = [_normalize_path_part(part) for part in parts]
    candidates: list[tuple[str, NodeKind, str]] = []

    for index, part in enumerate(normalized_parts[:-1]):
        alias = _PATH_METADATA_ALIASES.get(part)
        if alias is None:
            continue
        prefix, kind = alias
        if index + 1 >= len(parts):
            continue
        name = parts[index + 1]
        if _normalize_path_part(name) in {"ext", "forms", "формы", "templates", "макеты"}:
            continue
        candidates.append((f"{prefix}.{name}", kind, name))

    if len(parts) >= 2:
        module_name = source_file.stem
        parent_name = parts[-2]
        for alias_part in ("commonmodules", "общиемодули"):
            if alias_part in normalized_parts:
                candidates.append((f"ОбщийМодуль.{module_name}", NodeKind.COMMON_MODULE, module_name))
        candidates.append((parent_name, NodeKind.COMMON_MODULE, parent_name))

    return candidates


def _relative_path(source_file: Path, root: Path) -> Path:
    try:
        base = root if root.is_dir() else root.parent
        return source_file.resolve().relative_to(base.resolve())
    except (OSError, ValueError):
        return source_file


def _module_role(source_file: Path) -> str:
    stem = source_file.stem.lower()
    normalized_parts = [_normalize_path_part(part) for part in source_file.parts]
    if any(part in {"forms", "формы"} for part in normalized_parts):
        return "FORM_MODULE"
    return {
        "objectmodule": "OBJECT_MODULE",
        "модульобъекта": "OBJECT_MODULE",
        "managermodule": "MANAGER_MODULE",
        "модульменеджера": "MANAGER_MODULE",
        "recordsetmodule": "RECORD_SET_MODULE",
        "модульнабора": "RECORD_SET_MODULE",
        "module": "MODULE",
        "модуль": "MODULE",
    }.get(stem, "MODULE")


def _form_name_for_module(root: Path, source_file: Path) -> str:
    parts = list(_relative_path(source_file, root).parts)
    normalized_parts = [_normalize_path_part(part) for part in parts]
    for marker in ("forms", "формы"):
        if marker in normalized_parts:
            index = normalized_parts.index(marker)
            if index + 1 < len(parts):
                return parts[index + 1]
    return ""


def _normalize_path_part(value: str) -> str:
    return re.sub(r"[\s_.-]+", "", value).lower()


def _normalize_lookup_key(value: str) -> str:
    return value.replace("\\", "/").lower()


def _dedupe_nodes(nodes: list[SemanticNode]) -> list[SemanticNode]:
    deduped, _aliases = _dedupe_nodes_with_aliases(nodes)
    return deduped


def _dedupe_nodes_with_aliases(nodes: list[SemanticNode]) -> tuple[list[SemanticNode], dict[str, str]]:
    seen_lineages: set[str] = set()
    seen_semantic_ids: dict[str, str] = {}
    result: list[SemanticNode] = []
    lineage_aliases: dict[str, str] = {}
    for node in nodes:
        if node.lineage_id in seen_lineages:
            continue
        canonical_lineage = seen_semantic_ids.get(node.semantic_id)
        if canonical_lineage is not None:
            lineage_aliases[node.lineage_id] = canonical_lineage
            seen_lineages.add(node.lineage_id)
            continue
        seen_lineages.add(node.lineage_id)
        seen_semantic_ids[node.semantic_id] = node.lineage_id
        result.append(node)
    return result, lineage_aliases


def _remap_and_dedupe_edges(
    edges: list[SemanticEdge],
    nodes: list[SemanticNode],
    lineage_aliases: dict[str, str],
) -> tuple[list[SemanticEdge], list[Diagnostic]]:
    known_lineages = {node.lineage_id for node in nodes}
    remapped_edges: list[SemanticEdge] = []
    diagnostics: list[Diagnostic] = []
    for edge in edges:
        source_lineage = lineage_aliases.get(edge.source_lineage, edge.source_lineage)
        target_lineage = lineage_aliases.get(edge.target_lineage, edge.target_lineage)
        if source_lineage not in known_lineages or target_lineage not in known_lineages:
            diagnostics.append(_dangling_edge_diagnostic(edge, source_lineage, target_lineage))
            continue
        if source_lineage != edge.source_lineage or target_lineage != edge.target_lineage:
            edge = edge.model_copy(update={"source_lineage": source_lineage, "target_lineage": target_lineage})
        remapped_edges.append(edge)
    return _dedupe_edges(remapped_edges), diagnostics


def _dangling_edge_diagnostic(edge: SemanticEdge, source_lineage: str, target_lineage: str) -> Diagnostic:
    source_path = edge.source_ref.source_path if edge.source_ref else ""
    line = edge.source_ref.line_start if edge.source_ref and edge.source_ref.line_start else 1
    source_hash = edge.source_ref.source_hash if edge.source_ref and edge.source_ref.source_hash else ""
    return _diagnostic(
        "SIR_DANGLING_EDGE_DROPPED",
        DiagnosticSeverity.WARNING,
        f"Dropped dangling edge {edge.kind.value}: {source_lineage} -> {target_lineage}",
        source_path,
        line,
        source_hash,
        {"edge_id": edge.edge_id, "source_lineage": edge.source_lineage, "target_lineage": edge.target_lineage},
    )


def _dedupe_edges(edges: list[SemanticEdge]) -> list[SemanticEdge]:
    seen: dict[str, SemanticEdge] = {}
    for edge in edges:
        seen.setdefault(edge.edge_id, edge)
    return list(seen.values())


def _source_hash(source: str) -> str:
    return hashlib.sha256(source.encode("utf-8")).hexdigest()


def _read_text_file(path: Path) -> str:
    data = path.read_bytes()
    for encoding in ("utf-8-sig", "utf-16", "cp1251"):
        try:
            return data.decode(encoding)
        except UnicodeDecodeError:
            continue
    return data.decode("utf-8", errors="replace")


def _stable_hash(value: str) -> str:
    return hashlib.sha1(value.encode("utf-8")).hexdigest()[:16]


__all__ = [
    "ParsedQuery",
    "ParsedRoutine",
    "ParsedWrite",
    "index_project",
    "parse_bsl_module",
    "parse_bsl_module_file",
    "parse_bsl_module_from_rust_json",
    "resolve_rust_bsl_parser",
]