Files
AIRegulation-DocAnalysis/backend/app/infrastructure/storage/json_document_repository.py

110 lines
4.3 KiB
Python

"""Implement infrastructure support for json document repository."""
from __future__ import annotations
import json
from datetime import UTC, datetime
from pathlib import Path
from app.domain.documents import Document, DocumentRepository, DocumentStatus
# Keep adapter behavior explicit so integration details remain easy to audit.
class JsonDocumentRepository(DocumentRepository):
"""Provide the Json Document Repository repository implementation."""
def __init__(self, file_path: str) -> None:
"""Initialize the Json Document Repository instance."""
self.file_path = Path(file_path)
self.file_path.parent.mkdir(parents=True, exist_ok=True)
if not self.file_path.exists():
self.file_path.write_text("{}", encoding="utf-8")
def _load(self) -> dict[str, dict]:
"""Handle load for this module for the Json Document Repository instance."""
return json.loads(self.file_path.read_text(encoding="utf-8") or "{}")
def _save(self, payload: dict[str, dict]) -> None:
"""Handle save for this module for the Json Document Repository instance."""
self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def _serialize(self, document: Document) -> dict:
"""Handle serialize for this module for the Json Document Repository instance."""
payload = document.__dict__.copy()
payload["status"] = document.status.value
payload["created_at"] = document.created_at.isoformat()
payload["updated_at"] = document.updated_at.isoformat()
return payload
def _deserialize(self, payload: dict) -> Document:
"""Handle deserialize for this module for the Json Document Repository instance."""
return Document(
**{
**payload,
"status": DocumentStatus(payload["status"]),
"created_at": datetime.fromisoformat(payload["created_at"]),
"updated_at": datetime.fromisoformat(payload["updated_at"]),
}
)
def create(self, document: Document) -> Document:
"""Handle create for the Json Document Repository instance."""
payload = self._load()
payload[document.doc_id] = self._serialize(document)
self._save(payload)
return document
def update(self, document: Document) -> Document:
"""Handle update for the Json Document Repository instance."""
document.updated_at = datetime.now(UTC)
payload = self._load()
payload[document.doc_id] = self._serialize(document)
self._save(payload)
return document
def get(self, doc_id: str) -> Document | None:
"""Handle get for the Json Document Repository instance."""
payload = self._load()
item = payload.get(doc_id)
return self._deserialize(item) if item else None
def list(self, limit: int | None = None) -> list[Document]:
"""Handle list for the Json Document Repository instance."""
payload = self._load()
documents = [self._deserialize(item) for item in payload.values()]
documents.sort(key=lambda item: item.updated_at, reverse=True)
return documents[:limit] if limit is not None else documents
def update_status(
self,
doc_id: str,
status: DocumentStatus,
*,
error_message: str = "",
chunk_count: int | None = None,
summary: str | None = None,
summary_latency_ms: int | None = None,
parser_name: str | None = None,
index_name: str | None = None,
metadata: dict | None = None,
) -> Document | None:
"""Update status for the Json Document Repository instance."""
document = self.get(doc_id)
if not document:
return None
document.status = status
document.error_message = error_message
if chunk_count is not None:
document.chunk_count = chunk_count
if summary is not None:
document.summary = summary
if summary_latency_ms is not None:
document.summary_latency_ms = summary_latency_ms
if parser_name is not None:
document.parser_name = parser_name
if index_name is not None:
document.index_name = index_name
if metadata:
document.metadata.update(metadata)
return self.update(document)