Speed up stored snapshot inventory
CI / python (push) Has been cancelled
CI / rust (push) Has been cancelled

This commit is contained in:
2026-05-16 19:54:14 +03:00
parent c27998693e
commit f37d45066d
3 changed files with 46 additions and 9 deletions
@@ -45,12 +45,35 @@ class FileStorage:
self.initialize()
result: list[StoredSnapshotInfo] = []
for path in sorted(self.snapshots_dir.glob("*.json")):
snapshot = snapshot_from_json(path.read_bytes())
try:
payload = orjson.loads(path.read_bytes())
except (FileNotFoundError, orjson.JSONDecodeError):
continue
project_id = payload.get("project_id")
snapshot_id = payload.get("snapshot_id")
if not isinstance(project_id, str) or not isinstance(snapshot_id, str):
continue
snapshot_hash = payload.get("snapshot_hash")
result.append(
StoredSnapshotInfo(
project_id=snapshot.project_id,
snapshot_id=snapshot.snapshot_id,
snapshot_hash=snapshot.snapshot_hash,
project_id=project_id,
snapshot_id=snapshot_id,
snapshot_hash=snapshot_hash if isinstance(snapshot_hash, str) else None,
path=path.as_posix(),
)
)
return result
def list_snapshot_refs(self) -> list[StoredSnapshotInfo]:
self.initialize()
result: list[StoredSnapshotInfo] = []
for path in sorted(self.snapshots_dir.glob("*.json")):
project_id = path.stem
result.append(
StoredSnapshotInfo(
project_id=project_id,
snapshot_id=f"stored.{project_id}",
snapshot_hash=None,
path=path.as_posix(),
)
)
@@ -59,6 +82,11 @@ class FileStorage:
def has_snapshot(self, project_id: str) -> bool:
return self._snapshot_path(project_id).exists()
def count_snapshots(self) -> int:
if not self.snapshots_dir.exists():
return 0
return sum(1 for _ in self.snapshots_dir.glob("*.json"))
def write_document(self, collection: str, document_id: str, payload: dict[str, Any]) -> Path:
directory = self._collection_dir(collection)
directory.mkdir(parents=True, exist_ok=True)
@@ -17,6 +17,10 @@ def test_file_storage_saves_and_loads_snapshot(tmp_path: Path):
assert info.project_id == "demo"
assert restored.snapshot_hash == snapshot.snapshot_hash
assert storage.list_snapshots()[0].snapshot_id == snapshot.snapshot_id
snapshot_ref = storage.list_snapshot_refs()[0]
assert snapshot_ref.project_id == "demo"
assert snapshot_ref.snapshot_id == "stored.demo"
assert snapshot_ref.path.endswith("demo.json")
def test_file_storage_generic_documents(tmp_path: Path):
+10 -5
View File
@@ -2937,7 +2937,7 @@ async def incremental_file(project_id: str, request: IncrementalFileRequest) ->
@app.get("/storage/snapshots", response_model=list[StoredSnapshotInfo])
async def list_stored_snapshots() -> list[StoredSnapshotInfo]:
return _storage.list_snapshots()
return _storage.list_snapshot_refs()
def _project_snapshot_or_404(project_id: str) -> SirSnapshot:
@@ -6612,7 +6612,7 @@ async def license_state() -> LicenseState:
async def admin_summary() -> dict:
return {
"indexed_projects": len(_snapshots),
"stored_snapshots": len(_storage.list_snapshots()),
"stored_snapshots": _storage.count_snapshots(),
"knowledge_records": len(_knowledge.list_records()),
"knowledge_packs": len(_knowledge.list_packs()),
"users": len(_collaboration.users),
@@ -7476,12 +7476,12 @@ def _project_setup_response(project_id: str) -> ProjectSetupResponse:
def _project_has_stored_snapshot(project_id: str) -> bool:
return any(snapshot.project_id == project_id for snapshot in _storage.list_snapshots())
return _storage.has_snapshot(project_id)
def _project_summaries() -> list[ProjectSummaryResponse]:
project_ids = set(_project_setup.keys())
stored_snapshots = _storage.list_snapshots()
stored_snapshots = _storage.list_snapshot_refs()
project_ids.update(snapshot.project_id for snapshot in stored_snapshots)
snapshot_project_ids = {snapshot.project_id for snapshot in stored_snapshots}
result: list[ProjectSummaryResponse] = []
@@ -7514,6 +7514,11 @@ def _normalize_project_id(project_id: str) -> str:
def _delete_project_data(project_id: str, request: ProjectDeleteRequest) -> list[str]:
deleted: list[str] = []
had_indexed_data = (
project_id in _snapshots
or project_id in _normalized_projects
or _storage.has_snapshot(project_id)
)
_project_setup.pop(project_id, None)
_snapshots.pop(project_id, None)
_graphs.pop(project_id, None)
@@ -7534,7 +7539,7 @@ def _delete_project_data(project_id: str, request: ProjectDeleteRequest) -> list
if folder.exists():
shutil.rmtree(folder)
deleted.append(folder.name)
if request.delete_versions:
if request.delete_versions and had_indexed_data:
count = _storage.delete_documents_matching("object_versions", lambda payload: payload.get("project_id") == project_id)
if count:
deleted.append(f"object_versions:{count}")