Initial SFERA platform baseline
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
# sfera-semantic-search
|
||||
|
||||
Deterministic semantic search over SIR snapshots.
|
||||
|
||||
Search covers:
|
||||
|
||||
- node name, qualified name, kind, and source path;
|
||||
- metadata attributes;
|
||||
- indexed module source text where present.
|
||||
@@ -0,0 +1,11 @@
|
||||
[project]
|
||||
name = "sfera-semantic-search"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"pydantic>=2.0",
|
||||
"sfera-sir",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
package = true
|
||||
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from sir import SemanticNode, SirSnapshot
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
node: SemanticNode
|
||||
score: float
|
||||
matched_fields: list[str]
|
||||
|
||||
|
||||
def search_snapshot(
|
||||
snapshot: SirSnapshot,
|
||||
query: str,
|
||||
*,
|
||||
kinds: set[str] | None = None,
|
||||
limit: int = 20,
|
||||
) -> list[SearchResult]:
|
||||
normalized_query = query.casefold().strip()
|
||||
if not normalized_query:
|
||||
return []
|
||||
|
||||
results: list[SearchResult] = []
|
||||
for node in snapshot.nodes:
|
||||
if kinds is not None and node.kind.value not in kinds:
|
||||
continue
|
||||
score, fields = _score_node(node, normalized_query)
|
||||
if score > 0:
|
||||
results.append(SearchResult(node=node, score=score, matched_fields=fields))
|
||||
|
||||
results.sort(key=lambda result: (-result.score, result.node.qualified_name))
|
||||
return results[:limit]
|
||||
|
||||
|
||||
def _score_node(node: SemanticNode, query: str) -> tuple[float, list[str]]:
|
||||
fields = {
|
||||
"name": node.name,
|
||||
"qualified_name": node.qualified_name,
|
||||
"kind": node.kind.value,
|
||||
"source_path": node.source_ref.source_path,
|
||||
}
|
||||
score = 0.0
|
||||
matched: list[str] = []
|
||||
for field, value in fields.items():
|
||||
field_score = _score_text(value, query)
|
||||
if field_score:
|
||||
score += field_score
|
||||
matched.append(field)
|
||||
for field, value in _attribute_search_fields(node.attributes):
|
||||
field_score = _score_text(value, query)
|
||||
if field_score:
|
||||
score += max(field_score - 1.0, 1.0)
|
||||
matched.append(field)
|
||||
return score, matched
|
||||
|
||||
|
||||
def _score_text(value: object, query: str) -> float:
|
||||
normalized = str(value).casefold()
|
||||
if normalized == query:
|
||||
return 10.0
|
||||
if normalized.startswith(query):
|
||||
return 5.0
|
||||
if query in normalized:
|
||||
return 2.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def _attribute_search_fields(attributes: dict) -> Iterable[tuple[str, object]]:
|
||||
for key, value in sorted(attributes.items()):
|
||||
field = f"attributes.{key}"
|
||||
if isinstance(value, dict):
|
||||
for nested_key, nested_value in _attribute_search_fields(value):
|
||||
yield f"{field}.{nested_key.removeprefix('attributes.')}", nested_value
|
||||
elif isinstance(value, list):
|
||||
for index, item in enumerate(value):
|
||||
yield f"{field}[{index}]", item
|
||||
else:
|
||||
yield field, value
|
||||
|
||||
|
||||
__all__ = ["SearchResult", "search_snapshot"]
|
||||
@@ -0,0 +1,59 @@
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_kernel import index_project
|
||||
from semantic_search import search_snapshot
|
||||
|
||||
|
||||
def test_search_snapshot_finds_routine_by_partial_name(tmp_path: Path):
|
||||
module = tmp_path / "demo_module.bsl"
|
||||
module.write_text(
|
||||
"""
|
||||
Процедура Проведение()
|
||||
КонецПроцедуры
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
snapshot = index_project(tmp_path, project_id="demo")
|
||||
|
||||
results = search_snapshot(snapshot, "Пров", kinds={"PROCEDURE"})
|
||||
|
||||
assert results
|
||||
assert results[0].node.name == "Проведение"
|
||||
|
||||
|
||||
def test_search_snapshot_finds_node_by_source_text(tmp_path: Path):
|
||||
module = tmp_path / "integration.bsl"
|
||||
module.write_text(
|
||||
"""
|
||||
Процедура Отправить()
|
||||
Адрес = "https://api.example.local/orders";
|
||||
КонецПроцедуры
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
snapshot = index_project(tmp_path, project_id="search-source-text")
|
||||
|
||||
results = search_snapshot(snapshot, "api.example.local")
|
||||
|
||||
assert results
|
||||
module_result = next(result for result in results if result.node.name == "integration")
|
||||
assert "attributes.source_text" in module_result.matched_fields
|
||||
|
||||
|
||||
def test_search_snapshot_finds_node_by_metadata_attribute(tmp_path: Path):
|
||||
xml = tmp_path / "metadata.xml"
|
||||
xml.write_text(
|
||||
"""
|
||||
<Configuration>
|
||||
<Document name="ЗаказПокупателя" qualifiedName="Документ.ЗаказПокупателя" synonym="Customer Order" />
|
||||
</Configuration>
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
snapshot = index_project(tmp_path, project_id="search-attributes")
|
||||
|
||||
results = search_snapshot(snapshot, "Customer")
|
||||
|
||||
assert results
|
||||
assert results[0].node.qualified_name == "Документ.ЗаказПокупателя"
|
||||
assert "attributes.synonym" in results[0].matched_fields
|
||||
Reference in New Issue
Block a user