"""Implement infrastructure support for json document repository.""" from __future__ import annotations import json from datetime import UTC, datetime from pathlib import Path from app.domain.documents import Document, DocumentRepository, DocumentStatus # Keep adapter behavior explicit so integration details remain easy to audit. class JsonDocumentRepository(DocumentRepository): """Provide the Json Document Repository repository implementation.""" def __init__(self, file_path: str) -> None: """Initialize the Json Document Repository instance.""" self.file_path = Path(file_path) self.file_path.parent.mkdir(parents=True, exist_ok=True) if not self.file_path.exists(): self.file_path.write_text("{}", encoding="utf-8") def _load(self) -> dict[str, dict]: """Handle load for this module for the Json Document Repository instance.""" return json.loads(self.file_path.read_text(encoding="utf-8") or "{}") def _save(self, payload: dict[str, dict]) -> None: """Handle save for this module for the Json Document Repository instance.""" self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") def _serialize(self, document: Document) -> dict: """Handle serialize for this module for the Json Document Repository instance.""" payload = document.__dict__.copy() payload["status"] = document.status.value payload["created_at"] = document.created_at.isoformat() payload["updated_at"] = document.updated_at.isoformat() return payload def _deserialize(self, payload: dict) -> Document: """Handle deserialize for this module for the Json Document Repository instance.""" return Document( **{ **payload, "status": DocumentStatus(payload["status"]), "created_at": datetime.fromisoformat(payload["created_at"]), "updated_at": datetime.fromisoformat(payload["updated_at"]), } ) def create(self, document: Document) -> Document: """Handle create for the Json Document Repository instance.""" payload = self._load() payload[document.doc_id] = self._serialize(document) self._save(payload) return document def update(self, document: Document) -> Document: """Handle update for the Json Document Repository instance.""" document.updated_at = datetime.now(UTC) payload = self._load() payload[document.doc_id] = self._serialize(document) self._save(payload) return document def get(self, doc_id: str) -> Document | None: """Handle get for the Json Document Repository instance.""" payload = self._load() item = payload.get(doc_id) return self._deserialize(item) if item else None def list(self, limit: int | None = None) -> list[Document]: """Handle list for the Json Document Repository instance.""" payload = self._load() documents = [self._deserialize(item) for item in payload.values()] documents.sort(key=lambda item: item.updated_at, reverse=True) return documents[:limit] if limit is not None else documents def update_status( self, doc_id: str, status: DocumentStatus, *, error_message: str = "", chunk_count: int | None = None, summary: str | None = None, summary_latency_ms: int | None = None, parser_name: str | None = None, index_name: str | None = None, metadata: dict | None = None, ) -> Document | None: """Update status for the Json Document Repository instance.""" document = self.get(doc_id) if not document: return None document.status = status document.error_message = error_message if chunk_count is not None: document.chunk_count = chunk_count if summary is not None: document.summary = summary if summary_latency_ms is not None: document.summary_latency_ms = summary_latency_ms if parser_name is not None: document.parser_name = parser_name if index_name is not None: document.index_name = index_name if metadata: document.metadata.update(metadata) return self.update(document)