110 lines
4.3 KiB
Python
110 lines
4.3 KiB
Python
|
|
"""Implement infrastructure support for json document repository."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import json
|
||
|
|
from datetime import UTC, datetime
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from app.domain.documents import Document, DocumentRepository, DocumentStatus
|
||
|
|
# Keep adapter behavior explicit so integration details remain easy to audit.
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
class JsonDocumentRepository(DocumentRepository):
|
||
|
|
"""Provide the Json Document Repository repository implementation."""
|
||
|
|
def __init__(self, file_path: str) -> None:
|
||
|
|
"""Initialize the Json Document Repository instance."""
|
||
|
|
self.file_path = Path(file_path)
|
||
|
|
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
if not self.file_path.exists():
|
||
|
|
self.file_path.write_text("{}", encoding="utf-8")
|
||
|
|
|
||
|
|
def _load(self) -> dict[str, dict]:
|
||
|
|
"""Handle load for this module for the Json Document Repository instance."""
|
||
|
|
return json.loads(self.file_path.read_text(encoding="utf-8") or "{}")
|
||
|
|
|
||
|
|
def _save(self, payload: dict[str, dict]) -> None:
|
||
|
|
"""Handle save for this module for the Json Document Repository instance."""
|
||
|
|
self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
|
|
||
|
|
def _serialize(self, document: Document) -> dict:
|
||
|
|
"""Handle serialize for this module for the Json Document Repository instance."""
|
||
|
|
payload = document.__dict__.copy()
|
||
|
|
payload["status"] = document.status.value
|
||
|
|
payload["created_at"] = document.created_at.isoformat()
|
||
|
|
payload["updated_at"] = document.updated_at.isoformat()
|
||
|
|
return payload
|
||
|
|
|
||
|
|
def _deserialize(self, payload: dict) -> Document:
|
||
|
|
"""Handle deserialize for this module for the Json Document Repository instance."""
|
||
|
|
return Document(
|
||
|
|
**{
|
||
|
|
**payload,
|
||
|
|
"status": DocumentStatus(payload["status"]),
|
||
|
|
"created_at": datetime.fromisoformat(payload["created_at"]),
|
||
|
|
"updated_at": datetime.fromisoformat(payload["updated_at"]),
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
def create(self, document: Document) -> Document:
|
||
|
|
"""Handle create for the Json Document Repository instance."""
|
||
|
|
payload = self._load()
|
||
|
|
payload[document.doc_id] = self._serialize(document)
|
||
|
|
self._save(payload)
|
||
|
|
return document
|
||
|
|
|
||
|
|
def update(self, document: Document) -> Document:
|
||
|
|
"""Handle update for the Json Document Repository instance."""
|
||
|
|
document.updated_at = datetime.now(UTC)
|
||
|
|
payload = self._load()
|
||
|
|
payload[document.doc_id] = self._serialize(document)
|
||
|
|
self._save(payload)
|
||
|
|
return document
|
||
|
|
|
||
|
|
def get(self, doc_id: str) -> Document | None:
|
||
|
|
"""Handle get for the Json Document Repository instance."""
|
||
|
|
payload = self._load()
|
||
|
|
item = payload.get(doc_id)
|
||
|
|
return self._deserialize(item) if item else None
|
||
|
|
|
||
|
|
def list(self, limit: int | None = None) -> list[Document]:
|
||
|
|
"""Handle list for the Json Document Repository instance."""
|
||
|
|
payload = self._load()
|
||
|
|
documents = [self._deserialize(item) for item in payload.values()]
|
||
|
|
documents.sort(key=lambda item: item.updated_at, reverse=True)
|
||
|
|
return documents[:limit] if limit is not None else documents
|
||
|
|
|
||
|
|
def update_status(
|
||
|
|
self,
|
||
|
|
doc_id: str,
|
||
|
|
status: DocumentStatus,
|
||
|
|
*,
|
||
|
|
error_message: str = "",
|
||
|
|
chunk_count: int | None = None,
|
||
|
|
summary: str | None = None,
|
||
|
|
summary_latency_ms: int | None = None,
|
||
|
|
parser_name: str | None = None,
|
||
|
|
index_name: str | None = None,
|
||
|
|
metadata: dict | None = None,
|
||
|
|
) -> Document | None:
|
||
|
|
"""Update status for the Json Document Repository instance."""
|
||
|
|
document = self.get(doc_id)
|
||
|
|
if not document:
|
||
|
|
return None
|
||
|
|
document.status = status
|
||
|
|
document.error_message = error_message
|
||
|
|
if chunk_count is not None:
|
||
|
|
document.chunk_count = chunk_count
|
||
|
|
if summary is not None:
|
||
|
|
document.summary = summary
|
||
|
|
if summary_latency_ms is not None:
|
||
|
|
document.summary_latency_ms = summary_latency_ms
|
||
|
|
if parser_name is not None:
|
||
|
|
document.parser_name = parser_name
|
||
|
|
if index_name is not None:
|
||
|
|
document.index_name = index_name
|
||
|
|
if metadata:
|
||
|
|
document.metadata.update(metadata)
|
||
|
|
return self.update(document)
|