Initial SFERA platform baseline
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
[project]
|
||||
name = "sfera-pattern-mining"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"pydantic>=2.0",
|
||||
"sfera-sir",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
package = true
|
||||
@@ -0,0 +1,132 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from sir import EdgeKind, NodeKind, SemanticNode, SirSnapshot
|
||||
|
||||
|
||||
class SemanticPattern(BaseModel):
|
||||
pattern_id: str
|
||||
kind: str
|
||||
title: str
|
||||
participants: list[SemanticNode] = Field(default_factory=list)
|
||||
targets: list[SemanticNode] = Field(default_factory=list)
|
||||
support: int
|
||||
attributes: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
def mine_patterns(snapshot: SirSnapshot, *, min_support: int = 2) -> list[SemanticPattern]:
|
||||
nodes = {node.lineage_id: node for node in snapshot.nodes}
|
||||
patterns: list[SemanticPattern] = []
|
||||
patterns.extend(_table_usage_patterns(snapshot, nodes, min_support=min_support))
|
||||
patterns.extend(_routine_read_write_patterns(snapshot, nodes, min_support=min_support))
|
||||
return sorted(patterns, key=lambda item: (item.kind, -item.support, item.title))
|
||||
|
||||
|
||||
def _table_usage_patterns(
|
||||
snapshot: SirSnapshot,
|
||||
nodes: dict[str, SemanticNode],
|
||||
*,
|
||||
min_support: int,
|
||||
) -> list[SemanticPattern]:
|
||||
query_owner: dict[str, SemanticNode] = {}
|
||||
for edge in snapshot.edges:
|
||||
if edge.kind == EdgeKind.OWNS_QUERY and edge.source_lineage in nodes:
|
||||
query_owner[edge.target_lineage] = nodes[edge.source_lineage]
|
||||
|
||||
readers_by_table: dict[str, dict[str, SemanticNode]] = defaultdict(dict)
|
||||
writers_by_table: dict[str, dict[str, SemanticNode]] = defaultdict(dict)
|
||||
for edge in snapshot.edges:
|
||||
if edge.kind == EdgeKind.READS_TABLE:
|
||||
table = nodes.get(edge.target_lineage)
|
||||
owner = query_owner.get(edge.source_lineage)
|
||||
if table is not None and owner is not None:
|
||||
readers_by_table[table.lineage_id][owner.lineage_id] = owner
|
||||
elif edge.kind == EdgeKind.WRITES:
|
||||
table = nodes.get(edge.target_lineage)
|
||||
writer = nodes.get(edge.source_lineage)
|
||||
if table is not None and writer is not None:
|
||||
writers_by_table[table.lineage_id][writer.lineage_id] = writer
|
||||
|
||||
patterns: list[SemanticPattern] = []
|
||||
for table_lineage, readers in readers_by_table.items():
|
||||
table = nodes[table_lineage]
|
||||
if len(readers) >= min_support:
|
||||
patterns.append(
|
||||
SemanticPattern(
|
||||
pattern_id=f"pattern.repeated_read.{table.lineage_id}",
|
||||
kind="REPEATED_TABLE_READ",
|
||||
title=f"Repeated reads of {table.qualified_name}",
|
||||
participants=sorted(readers.values(), key=lambda node: node.qualified_name),
|
||||
targets=[table],
|
||||
support=len(readers),
|
||||
)
|
||||
)
|
||||
for table_lineage, writers in writers_by_table.items():
|
||||
table = nodes[table_lineage]
|
||||
if len(writers) >= min_support:
|
||||
patterns.append(
|
||||
SemanticPattern(
|
||||
pattern_id=f"pattern.repeated_write.{table.lineage_id}",
|
||||
kind="REPEATED_TABLE_WRITE",
|
||||
title=f"Repeated writes to {table.qualified_name}",
|
||||
participants=sorted(writers.values(), key=lambda node: node.qualified_name),
|
||||
targets=[table],
|
||||
support=len(writers),
|
||||
)
|
||||
)
|
||||
return patterns
|
||||
|
||||
|
||||
def _routine_read_write_patterns(
|
||||
snapshot: SirSnapshot,
|
||||
nodes: dict[str, SemanticNode],
|
||||
*,
|
||||
min_support: int,
|
||||
) -> list[SemanticPattern]:
|
||||
query_owner: dict[str, str] = {}
|
||||
for edge in snapshot.edges:
|
||||
if edge.kind == EdgeKind.OWNS_QUERY:
|
||||
query_owner[edge.target_lineage] = edge.source_lineage
|
||||
|
||||
reads_by_routine: dict[str, set[str]] = defaultdict(set)
|
||||
writes_by_routine: dict[str, set[str]] = defaultdict(set)
|
||||
for edge in snapshot.edges:
|
||||
if edge.kind == EdgeKind.READS_TABLE and edge.source_lineage in query_owner:
|
||||
reads_by_routine[query_owner[edge.source_lineage]].add(edge.target_lineage)
|
||||
elif edge.kind == EdgeKind.WRITES:
|
||||
writes_by_routine[edge.source_lineage].add(edge.target_lineage)
|
||||
|
||||
grouped: dict[tuple[tuple[str, ...], tuple[str, ...]], list[SemanticNode]] = defaultdict(list)
|
||||
routine_kinds = {NodeKind.PROCEDURE, NodeKind.FUNCTION}
|
||||
for routine_lineage in sorted(set(reads_by_routine) | set(writes_by_routine)):
|
||||
routine = nodes.get(routine_lineage)
|
||||
if routine is None or routine.kind not in routine_kinds:
|
||||
continue
|
||||
key = (tuple(sorted(reads_by_routine[routine_lineage])), tuple(sorted(writes_by_routine[routine_lineage])))
|
||||
if key == ((), ()):
|
||||
continue
|
||||
grouped[key].append(routine)
|
||||
|
||||
patterns: list[SemanticPattern] = []
|
||||
for index, ((reads, writes), routines) in enumerate(sorted(grouped.items()), start=1):
|
||||
if len(routines) < min_support:
|
||||
continue
|
||||
targets = [nodes[lineage] for lineage in [*reads, *writes] if lineage in nodes]
|
||||
patterns.append(
|
||||
SemanticPattern(
|
||||
pattern_id=f"pattern.routine_io.{index}",
|
||||
kind="REPEATED_ROUTINE_IO",
|
||||
title="Repeated routine read/write shape",
|
||||
participants=sorted(routines, key=lambda node: node.qualified_name),
|
||||
targets=sorted(targets, key=lambda node: node.qualified_name),
|
||||
support=len(routines),
|
||||
attributes={"read_count": len(reads), "write_count": len(writes)},
|
||||
)
|
||||
)
|
||||
return patterns
|
||||
|
||||
|
||||
__all__ = ["SemanticPattern", "mine_patterns"]
|
||||
@@ -0,0 +1,59 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pattern_mining import mine_patterns
|
||||
from semantic_kernel import index_project
|
||||
|
||||
|
||||
def test_mine_patterns_finds_repeated_table_writes(tmp_path: Path):
|
||||
(tmp_path / "module.bsl").write_text(
|
||||
"""
|
||||
Процедура ПровестиЗаказ()
|
||||
Движения.ОстаткиТоваров.Записать();
|
||||
КонецПроцедуры
|
||||
|
||||
Процедура ОтменитьЗаказ()
|
||||
Движения.ОстаткиТоваров.Записать();
|
||||
КонецПроцедуры
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
snapshot = index_project(tmp_path, project_id="patterns")
|
||||
|
||||
patterns = mine_patterns(snapshot)
|
||||
|
||||
repeated_writes = [pattern for pattern in patterns if pattern.kind == "REPEATED_TABLE_WRITE"]
|
||||
assert repeated_writes
|
||||
assert repeated_writes[0].support == 2
|
||||
assert repeated_writes[0].targets[0].name == "ОстаткиТоваров"
|
||||
|
||||
|
||||
def test_mine_patterns_finds_repeated_query_reads(tmp_path: Path):
|
||||
(tmp_path / "module.bsl").write_text(
|
||||
"""
|
||||
Процедура ПроверитьОстатки()
|
||||
Запрос = Новый Запрос;
|
||||
Запрос.Текст =
|
||||
"ВЫБРАТЬ
|
||||
Остатки.Номенклатура
|
||||
ИЗ
|
||||
РегистрНакопления.ОстаткиТоваров КАК Остатки";
|
||||
КонецПроцедуры
|
||||
|
||||
Процедура РассчитатьОстатки()
|
||||
Запрос = Новый Запрос;
|
||||
Запрос.Текст =
|
||||
"ВЫБРАТЬ
|
||||
Остатки.Номенклатура
|
||||
ИЗ
|
||||
РегистрНакопления.ОстаткиТоваров КАК Остатки";
|
||||
КонецПроцедуры
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
snapshot = index_project(tmp_path, project_id="patterns")
|
||||
|
||||
patterns = mine_patterns(snapshot)
|
||||
|
||||
repeated_reads = [pattern for pattern in patterns if pattern.kind == "REPEATED_TABLE_READ"]
|
||||
assert repeated_reads
|
||||
assert repeated_reads[0].support == 2
|
||||
Reference in New Issue
Block a user