diff --git a/backend/app/application/documents/services.py b/backend/app/application/documents/services.py index 2a05870..afc9e41 100644 --- a/backend/app/application/documents/services.py +++ b/backend/app/application/documents/services.py @@ -7,16 +7,22 @@ import tempfile import uuid import json from dataclasses import dataclass +from datetime import UTC, datetime from loguru import logger +from app.config.settings import settings from app.domain.documents import ( ChunkBuilder, Document, + DocumentArtifact, DocumentBinaryStore, DocumentParser, + DocumentProcessingRun, + DocumentProcessingStore, DocumentRepository, DocumentStatus, + DocumentStatusEvent, ParseArtifactStore, ParsedDocument, ) @@ -39,6 +45,7 @@ class DocumentProcessResult: class DocumentCommandService: """Provide the Document Command Service service.""" + def __init__( self, *, @@ -49,6 +56,7 @@ class DocumentCommandService: embedding_provider: EmbeddingProvider, vector_index: VectorIndex, parse_artifact_store: ParseArtifactStore | None = None, + document_processing_store: DocumentProcessingStore | None = None, ) -> None: """Initialize the Document Command Service instance.""" self.document_repository = document_repository @@ -58,6 +66,11 @@ class DocumentCommandService: self.embedding_provider = embedding_provider self.vector_index = vector_index self.parse_artifact_store = parse_artifact_store + self.document_processing_store = document_processing_store + + def _utcnow(self) -> datetime: + """Return the current UTC timestamp for persisted processing metadata.""" + return datetime.now(UTC) def _save_parse_artifacts(self, *, doc_id: str, parsed_document: ParsedDocument) -> dict[str, str]: """Persist parse artifacts so troubleshooting does not depend on provider retention windows.""" @@ -80,6 +93,143 @@ class DocumentCommandService: artifact_keys[name] = object_name return artifact_keys + def _safe_create_processing_run(self, *, doc_id: str, trigger_type: str, generate_summary: bool) -> str | None: + """Create a processing run record when the optional store is available.""" + if not self.document_processing_store: + return None + run = DocumentProcessingRun( + run_id=str(uuid.uuid4()), + doc_id=doc_id, + trigger_type=trigger_type, + run_status="running", + parser_backend=settings.parser_backend, + chunk_backend=settings.chunk_backend, + embedding_model=settings.embedding_model, + metadata={"generate_summary": generate_summary}, + ) + try: + created = self.document_processing_store.create_run(run) + return created.run_id + except Exception: + logger.warning("DocumentProcessingStore.create_run failed for doc_id={}", doc_id) + return None + + def _safe_append_status_event( + self, + *, + doc_id: str, + run_id: str | None, + from_status: str, + to_status: str, + stage: str, + message: str = "", + metadata: dict | None = None, + ) -> None: + """Append a status event without allowing auxiliary persistence failures to abort processing.""" + if not self.document_processing_store or not run_id: + return + event = DocumentStatusEvent( + event_id=str(uuid.uuid4()), + doc_id=doc_id, + run_id=run_id, + from_status=from_status, + to_status=to_status, + stage=stage, + message=message, + metadata=metadata or {}, + ) + try: + self.document_processing_store.append_status_event(event) + except Exception: + logger.warning( + "DocumentProcessingStore.append_status_event failed for doc_id={}, run_id={}", + doc_id, + run_id, + ) + + def _safe_mark_run_stored(self, *, doc_id: str, run_id: str | None) -> None: + """Mark the processing run as stored without affecting the main workflow.""" + if not self.document_processing_store or not run_id: + return + try: + self.document_processing_store.mark_run_stored(run_id, stored_at=self._utcnow()) + except Exception: + logger.warning("DocumentProcessingStore.mark_run_stored failed for doc_id={}, run_id={}", doc_id, run_id) + + def _safe_mark_run_parsed(self, *, doc_id: str, run_id: str | None, parsed_document: ParsedDocument) -> None: + """Persist parse completion details without failing the document pipeline.""" + if not self.document_processing_store or not run_id: + return + try: + self.document_processing_store.mark_run_parsed( + run_id, + parser_backend=parsed_document.parser_name, + layout_count=int(parsed_document.metadata.get("layout_count", len(parsed_document.raw_layouts)) or 0), + structure_node_count=len(parsed_document.structure_nodes), + semantic_block_count=len(parsed_document.semantic_blocks), + vector_chunk_count=len(parsed_document.vector_chunks), + parsed_at=self._utcnow(), + metadata={"parse_task_id": parsed_document.metadata.get("task_id", "")}, + ) + except Exception: + logger.warning("DocumentProcessingStore.mark_run_parsed failed for doc_id={}, run_id={}", doc_id, run_id) + + def _safe_replace_processing_artifacts(self, *, doc_id: str, run_id: str | None, artifact_keys: dict[str, str]) -> None: + """Store artifact references without turning persistence drift into a user-visible failure.""" + if not self.document_processing_store or not run_id: + return + artifacts = [ + DocumentArtifact( + artifact_id=str(uuid.uuid4()), + doc_id=doc_id, + run_id=run_id, + artifact_type=artifact_type, + object_name=object_name, + content_type="application/json", + byte_size=0, + checksum="", + ) + for artifact_type, object_name in artifact_keys.items() + ] + try: + self.document_processing_store.replace_artifacts_for_run(run_id, artifacts) + except Exception: + logger.warning( + "DocumentProcessingStore.replace_artifacts_for_run failed for doc_id={}, run_id={}", + doc_id, + run_id, + ) + + def _safe_mark_run_indexed(self, *, doc_id: str, run_id: str | None, chunk_count: int, index_name: str) -> None: + """Mark the processing run as indexed without affecting the success path.""" + if not self.document_processing_store or not run_id: + return + now = self._utcnow() + try: + self.document_processing_store.mark_run_indexed( + run_id, + chunk_count=chunk_count, + index_name=index_name, + indexed_at=now, + finished_at=now, + ) + except Exception: + logger.warning("DocumentProcessingStore.mark_run_indexed failed for doc_id={}, run_id={}", doc_id, run_id) + + def _safe_mark_run_failed(self, *, doc_id: str, run_id: str | None, failure_stage: str, error_message: str) -> None: + """Mark the processing run as failed without masking the original error handling path.""" + if not self.document_processing_store or not run_id: + return + try: + self.document_processing_store.mark_run_failed( + run_id, + failure_stage=failure_stage, + error_message=error_message, + finished_at=self._utcnow(), + ) + except Exception: + logger.warning("DocumentProcessingStore.mark_run_failed failed for doc_id={}, run_id={}", doc_id, run_id) + def upload_and_process( self, *, @@ -91,11 +241,15 @@ class DocumentCommandService: regulation_type: str, version: str, generate_summary: bool, + trigger_type: str = "upload", ) -> DocumentProcessResult: """Handle upload and process for the Document Command Service instance.""" doc_id = doc_id or str(uuid.uuid4())[:8] final_doc_name = doc_name or file_name object_name = f"{doc_id}/{file_name}" + run_id: str | None = None + current_status = DocumentStatus.PENDING + current_stage = "store" document = Document( doc_id=doc_id, @@ -109,6 +263,19 @@ class DocumentCommandService: metadata={"generate_summary": generate_summary}, ) self.document_repository.create(document) + run_id = self._safe_create_processing_run( + doc_id=doc_id, + trigger_type=trigger_type, + generate_summary=generate_summary, + ) + self._safe_append_status_event( + doc_id=doc_id, + run_id=run_id, + from_status="", + to_status=DocumentStatus.PENDING.value, + stage="document_created", + message="Document record created", + ) temp_path = "" try: @@ -119,6 +286,17 @@ class DocumentCommandService: metadata={"doc_id": doc_id}, ) self.document_repository.update_status(doc_id, DocumentStatus.STORED) + current_status = DocumentStatus.STORED + current_stage = "parse" + self._safe_mark_run_stored(doc_id=doc_id, run_id=run_id) + self._safe_append_status_event( + doc_id=doc_id, + run_id=run_id, + from_status=DocumentStatus.PENDING.value, + to_status=DocumentStatus.STORED.value, + stage="store", + message="Source file stored", + ) suffix = os.path.splitext(file_name)[1] with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: @@ -130,7 +308,13 @@ class DocumentCommandService: doc_id=doc_id, doc_name=final_doc_name, ) - artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document) + self._safe_mark_run_parsed(doc_id=doc_id, run_id=run_id, parsed_document=parsed_document) + + artifact_keys: dict[str, str] = {} + try: + artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document) + except Exception: + logger.warning("Parse artifact binary persistence failed for doc_id={}", doc_id) self.document_repository.update_status( doc_id, DocumentStatus.PARSED, @@ -146,6 +330,18 @@ class DocumentCommandService: "processing_stage": "parsed", }, ) + current_status = DocumentStatus.PARSED + current_stage = "embed" + self._safe_replace_processing_artifacts(doc_id=doc_id, run_id=run_id, artifact_keys=artifact_keys) + self._safe_append_status_event( + doc_id=doc_id, + run_id=run_id, + from_status=DocumentStatus.STORED.value, + to_status=DocumentStatus.PARSED.value, + stage="parse", + message="Document parsed", + metadata={"artifact_count": len(artifact_keys)}, + ) if self.parse_artifact_store: try: self.parse_artifact_store.save( @@ -165,6 +361,7 @@ class DocumentCommandService: raise ValueError("解析完成但没有生成可入库的 chunks") vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks]) + current_stage = "index" inserted = self.vector_index.upsert(chunks, vectors) if inserted != len(chunks): logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks)) @@ -182,6 +379,23 @@ class DocumentCommandService: "processing_stage": "indexed", }, ) + current_status = DocumentStatus.INDEXED + index_name = health.get("collection_name", "") + self._safe_mark_run_indexed( + doc_id=doc_id, + run_id=run_id, + chunk_count=len(chunks), + index_name=index_name, + ) + self._safe_append_status_event( + doc_id=doc_id, + run_id=run_id, + from_status=DocumentStatus.PARSED.value, + to_status=DocumentStatus.INDEXED.value, + stage="index", + message="Document indexed", + metadata={"chunk_count": len(chunks), "index_name": index_name}, + ) stored = self.document_repository.get(doc_id) return DocumentProcessResult( doc_id=doc_id, @@ -194,6 +408,7 @@ class DocumentCommandService: ) except Exception as exc: logger.exception("文档处理失败: doc_id={}", doc_id) + failure_stage = current_stage self.document_repository.update_status( doc_id, DocumentStatus.FAILED, @@ -201,8 +416,23 @@ class DocumentCommandService: metadata={ "failure_reason": str(exc), "processing_stage": "failed", + "failure_stage": failure_stage, }, ) + self._safe_mark_run_failed( + doc_id=doc_id, + run_id=run_id, + failure_stage=failure_stage, + error_message=str(exc), + ) + self._safe_append_status_event( + doc_id=doc_id, + run_id=run_id, + from_status=current_status.value, + to_status=DocumentStatus.FAILED.value, + stage=failure_stage, + message=str(exc), + ) return DocumentProcessResult( doc_id=doc_id, doc_name=final_doc_name, @@ -235,6 +465,11 @@ class DocumentCommandService: self.parse_artifact_store.delete(doc_id) except Exception: logger.warning("ParseArtifactStore delete failed for doc_id={}", doc_id) + if self.document_processing_store: + try: + self.document_processing_store.delete_by_document(doc_id) + except Exception: + logger.warning("DocumentProcessingStore delete failed for doc_id={}", doc_id) self.document_repository.delete(doc_id) return True @@ -253,6 +488,7 @@ class DocumentCommandService: regulation_type=document.regulation_type, version=document.version, generate_summary=bool(document.metadata.get("generate_summary", False)), + trigger_type="retry", ) diff --git a/backend/app/config/settings.py b/backend/app/config/settings.py index b8b9a3c..6d4daf9 100644 --- a/backend/app/config/settings.py +++ b/backend/app/config/settings.py @@ -78,6 +78,7 @@ class Settings(BaseSettings): chunk_overlap: int = Field(default=50, description="分块重叠大小") max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)") document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径") + document_processing_metadata_path: str = Field(default="backend/data/document_processing.json", description="文档处理历史存储路径") parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)") chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)") document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)") diff --git a/backend/app/domain/documents/__init__.py b/backend/app/domain/documents/__init__.py index 2fcaabc..b2c1ade 100644 --- a/backend/app/domain/documents/__init__.py +++ b/backend/app/domain/documents/__init__.py @@ -1,18 +1,29 @@ """Initialize the app.domain.documents package.""" -from .models import Chunk, Document, DocumentStatus, ParsedDocument -from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository, ParseArtifactStore +from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument +from .ports import ( + ChunkBuilder, + DocumentBinaryStore, + DocumentParser, + DocumentProcessingStore, + DocumentRepository, + ParseArtifactStore, +) # Keep package boundaries explicit so backend imports stay predictable. __all__ = [ "Chunk", "Document", + "DocumentArtifact", + "DocumentProcessingRun", "DocumentStatus", + "DocumentStatusEvent", "ParsedDocument", "ChunkBuilder", "DocumentBinaryStore", "DocumentParser", + "DocumentProcessingStore", "DocumentRepository", "ParseArtifactStore", ] diff --git a/backend/app/domain/documents/models.py b/backend/app/domain/documents/models.py index dc4ee67..0e1beb2 100644 --- a/backend/app/domain/documents/models.py +++ b/backend/app/domain/documents/models.py @@ -76,3 +76,61 @@ class Chunk: semantic_id: str = "" block_type: str = "" metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DocumentProcessingRun: + """Represent one processing attempt for a document.""" + + run_id: str + doc_id: str + trigger_type: str + run_status: str + parser_backend: str = "" + chunk_backend: str = "" + embedding_model: str = "" + index_name: str = "" + started_at: datetime = field(default_factory=utcnow) + stored_at: datetime | None = None + parsed_at: datetime | None = None + indexed_at: datetime | None = None + finished_at: datetime | None = None + layout_count: int = 0 + structure_node_count: int = 0 + semantic_block_count: int = 0 + vector_chunk_count: int = 0 + chunk_count: int = 0 + failure_stage: str = "" + error_message: str = "" + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DocumentStatusEvent: + """Represent a document lifecycle event emitted during processing.""" + + event_id: str + doc_id: str + run_id: str + from_status: str + to_status: str + stage: str + message: str = "" + metadata: dict[str, Any] = field(default_factory=dict) + occurred_at: datetime = field(default_factory=utcnow) + + +@dataclass +class DocumentArtifact: + """Represent a persisted artifact reference for one processing run.""" + + artifact_id: str + doc_id: str + run_id: str + artifact_type: str + object_name: str + content_type: str + byte_size: int = 0 + checksum: str = "" + metadata: dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=utcnow) diff --git a/backend/app/domain/documents/ports.py b/backend/app/domain/documents/ports.py index 6807f86..177b102 100644 --- a/backend/app/domain/documents/ports.py +++ b/backend/app/domain/documents/ports.py @@ -4,7 +4,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from .models import Chunk, Document, DocumentStatus, ParsedDocument +from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument # Keep domain contracts explicit so adapters can swap implementations cleanly. @@ -128,3 +128,111 @@ class ParseArtifactStore(ABC): def get_structure_nodes(self, doc_id: str) -> list[dict]: """Return all structure nodes for a document.""" pass + + +class DocumentProcessingStore(ABC): + """Persist document processing runs, events, and artifact references.""" + + @abstractmethod + def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun: + """Create a new processing run record.""" + pass + + @abstractmethod + def mark_run_stored( + self, + run_id: str, + *, + stored_at: object | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as having persisted the source file.""" + pass + + @abstractmethod + def mark_run_parsed( + self, + run_id: str, + *, + parser_backend: str, + layout_count: int, + structure_node_count: int, + semantic_block_count: int, + vector_chunk_count: int, + parsed_at: object | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record parse completion details for a run.""" + pass + + @abstractmethod + def mark_run_indexed( + self, + run_id: str, + *, + chunk_count: int, + index_name: str, + indexed_at: object | None = None, + finished_at: object | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as successfully indexed.""" + pass + + @abstractmethod + def mark_run_failed( + self, + run_id: str, + *, + failure_stage: str, + error_message: str, + finished_at: object | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as failed.""" + pass + + @abstractmethod + def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent: + """Append a document status event.""" + pass + + @abstractmethod + def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]: + """Replace all artifacts for a run with the provided list.""" + pass + + @abstractmethod + def delete_by_document(self, doc_id: str) -> None: + """Delete all processing data for a document.""" + pass + + @abstractmethod + def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]: + """List all processing runs for a document.""" + pass + + @abstractmethod + def get_run(self, run_id: str) -> DocumentProcessingRun | None: + """Return one processing run by identifier.""" + pass + + @abstractmethod + def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]: + """List status events for a document.""" + pass + + @abstractmethod + def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]: + """List status events for a run.""" + pass + + @abstractmethod + def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]: + """List artifact references for a document.""" + pass + + @abstractmethod + def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]: + """List artifact references for a run.""" + pass diff --git a/backend/app/infrastructure/storage/json_document_processing_store.py b/backend/app/infrastructure/storage/json_document_processing_store.py new file mode 100644 index 0000000..eca737d --- /dev/null +++ b/backend/app/infrastructure/storage/json_document_processing_store.py @@ -0,0 +1,373 @@ +"""Implement infrastructure support for json document processing history.""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent +# Keep JSON persistence behavior aligned with the lightweight document repository adapter. + + +class JsonDocumentProcessingStore(DocumentProcessingStore): + """Persist processing history in a standalone JSON file.""" + + def __init__(self, file_path: str) -> None: + """Initialize the JSON processing history store.""" + self.file_path = Path(file_path) + self.file_path.parent.mkdir(parents=True, exist_ok=True) + if not self.file_path.exists(): + self._save(self._empty_payload()) + + def _empty_payload(self) -> dict[str, dict[str, dict[str, Any]]]: + """Return the canonical empty JSON structure for processing history.""" + return {"runs": {}, "status_events": {}, "artifacts": {}} + + def _load(self) -> dict[str, dict[str, dict[str, Any]]]: + """Load the full JSON payload and normalize missing sections.""" + if not self.file_path.exists(): + return self._empty_payload() + payload = json.loads(self.file_path.read_text(encoding="utf-8") or "{}") + normalized = self._empty_payload() + for key in normalized: + section = payload.get(key, {}) + normalized[key] = section if isinstance(section, dict) else {} + return normalized + + def _save(self, payload: dict[str, dict[str, dict[str, Any]]]) -> None: + """Persist the full JSON payload with stable formatting.""" + self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + def _serialize_datetime(self, value: datetime | None) -> str | None: + """Serialize optional datetimes into ISO8601 strings.""" + return value.isoformat() if value is not None else None + + def _deserialize_datetime(self, value: str | None) -> datetime | None: + """Deserialize optional ISO8601 strings into datetimes.""" + return datetime.fromisoformat(value) if value else None + + def _serialize_run(self, run: DocumentProcessingRun) -> dict[str, Any]: + """Serialize one processing run to a JSON-compatible payload.""" + return { + "run_id": run.run_id, + "doc_id": run.doc_id, + "trigger_type": run.trigger_type, + "run_status": run.run_status, + "parser_backend": run.parser_backend, + "chunk_backend": run.chunk_backend, + "embedding_model": run.embedding_model, + "index_name": run.index_name, + "started_at": self._serialize_datetime(run.started_at), + "stored_at": self._serialize_datetime(run.stored_at), + "parsed_at": self._serialize_datetime(run.parsed_at), + "indexed_at": self._serialize_datetime(run.indexed_at), + "finished_at": self._serialize_datetime(run.finished_at), + "layout_count": run.layout_count, + "structure_node_count": run.structure_node_count, + "semantic_block_count": run.semantic_block_count, + "vector_chunk_count": run.vector_chunk_count, + "chunk_count": run.chunk_count, + "failure_stage": run.failure_stage, + "error_message": run.error_message, + "metadata": run.metadata, + } + + def _deserialize_run(self, payload: dict[str, Any]) -> DocumentProcessingRun: + """Deserialize one JSON payload into a processing run dataclass.""" + return DocumentProcessingRun( + run_id=payload["run_id"], + doc_id=payload["doc_id"], + trigger_type=payload["trigger_type"], + run_status=payload["run_status"], + parser_backend=payload.get("parser_backend", ""), + chunk_backend=payload.get("chunk_backend", ""), + embedding_model=payload.get("embedding_model", ""), + index_name=payload.get("index_name", ""), + started_at=self._deserialize_datetime(payload.get("started_at")) or datetime.now(UTC), + stored_at=self._deserialize_datetime(payload.get("stored_at")), + parsed_at=self._deserialize_datetime(payload.get("parsed_at")), + indexed_at=self._deserialize_datetime(payload.get("indexed_at")), + finished_at=self._deserialize_datetime(payload.get("finished_at")), + layout_count=int(payload.get("layout_count", 0) or 0), + structure_node_count=int(payload.get("structure_node_count", 0) or 0), + semantic_block_count=int(payload.get("semantic_block_count", 0) or 0), + vector_chunk_count=int(payload.get("vector_chunk_count", 0) or 0), + chunk_count=int(payload.get("chunk_count", 0) or 0), + failure_stage=payload.get("failure_stage", ""), + error_message=payload.get("error_message", ""), + metadata=payload.get("metadata", {}), + ) + + def _serialize_event(self, event: DocumentStatusEvent) -> dict[str, Any]: + """Serialize one status event to a JSON-compatible payload.""" + return { + "event_id": event.event_id, + "doc_id": event.doc_id, + "run_id": event.run_id, + "from_status": event.from_status, + "to_status": event.to_status, + "stage": event.stage, + "message": event.message, + "metadata": event.metadata, + "occurred_at": self._serialize_datetime(event.occurred_at), + } + + def _deserialize_event(self, payload: dict[str, Any]) -> DocumentStatusEvent: + """Deserialize one JSON payload into a status event dataclass.""" + return DocumentStatusEvent( + event_id=payload["event_id"], + doc_id=payload["doc_id"], + run_id=payload["run_id"], + from_status=payload.get("from_status", ""), + to_status=payload["to_status"], + stage=payload.get("stage", ""), + message=payload.get("message", ""), + metadata=payload.get("metadata", {}), + occurred_at=self._deserialize_datetime(payload.get("occurred_at")) or datetime.now(UTC), + ) + + def _serialize_artifact(self, artifact: DocumentArtifact) -> dict[str, Any]: + """Serialize one artifact reference to a JSON-compatible payload.""" + return { + "artifact_id": artifact.artifact_id, + "doc_id": artifact.doc_id, + "run_id": artifact.run_id, + "artifact_type": artifact.artifact_type, + "object_name": artifact.object_name, + "content_type": artifact.content_type, + "byte_size": artifact.byte_size, + "checksum": artifact.checksum, + "metadata": artifact.metadata, + "created_at": self._serialize_datetime(artifact.created_at), + } + + def _deserialize_artifact(self, payload: dict[str, Any]) -> DocumentArtifact: + """Deserialize one JSON payload into an artifact dataclass.""" + return DocumentArtifact( + artifact_id=payload["artifact_id"], + doc_id=payload["doc_id"], + run_id=payload["run_id"], + artifact_type=payload["artifact_type"], + object_name=payload["object_name"], + content_type=payload.get("content_type", ""), + byte_size=int(payload.get("byte_size", 0) or 0), + checksum=payload.get("checksum", ""), + metadata=payload.get("metadata", {}), + created_at=self._deserialize_datetime(payload.get("created_at")) or datetime.now(UTC), + ) + + def _merge_metadata(self, original: dict[str, Any], update: dict | None) -> dict[str, Any]: + """Merge metadata updates onto an existing payload.""" + merged = dict(original) + if update: + merged.update(update) + return merged + + def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun: + """Create a new processing run record.""" + payload = self._load() + payload["runs"][run.run_id] = self._serialize_run(run) + self._save(payload) + return run + + def mark_run_stored( + self, + run_id: str, + *, + stored_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as having persisted the source file.""" + payload = self._load() + run_payload = payload["runs"].get(run_id) + if not run_payload: + return None + run = self._deserialize_run(run_payload) + run.stored_at = stored_at or datetime.now(UTC) + run.metadata = self._merge_metadata(run.metadata, metadata) + payload["runs"][run_id] = self._serialize_run(run) + self._save(payload) + return run + + def mark_run_parsed( + self, + run_id: str, + *, + parser_backend: str, + layout_count: int, + structure_node_count: int, + semantic_block_count: int, + vector_chunk_count: int, + parsed_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record parse completion details for a run.""" + payload = self._load() + run_payload = payload["runs"].get(run_id) + if not run_payload: + return None + run = self._deserialize_run(run_payload) + run.parser_backend = parser_backend + run.layout_count = layout_count + run.structure_node_count = structure_node_count + run.semantic_block_count = semantic_block_count + run.vector_chunk_count = vector_chunk_count + run.parsed_at = parsed_at or datetime.now(UTC) + run.metadata = self._merge_metadata(run.metadata, metadata) + payload["runs"][run_id] = self._serialize_run(run) + self._save(payload) + return run + + def mark_run_indexed( + self, + run_id: str, + *, + chunk_count: int, + index_name: str, + indexed_at: datetime | None = None, + finished_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as successfully indexed.""" + payload = self._load() + run_payload = payload["runs"].get(run_id) + if not run_payload: + return None + run = self._deserialize_run(run_payload) + now = datetime.now(UTC) + run.run_status = "succeeded" + run.chunk_count = chunk_count + run.index_name = index_name + run.indexed_at = indexed_at or now + run.finished_at = finished_at or now + run.metadata = self._merge_metadata(run.metadata, metadata) + payload["runs"][run_id] = self._serialize_run(run) + self._save(payload) + return run + + def mark_run_failed( + self, + run_id: str, + *, + failure_stage: str, + error_message: str, + finished_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as failed.""" + payload = self._load() + run_payload = payload["runs"].get(run_id) + if not run_payload: + return None + run = self._deserialize_run(run_payload) + run.run_status = "failed" + run.failure_stage = failure_stage + run.error_message = error_message + run.finished_at = finished_at or datetime.now(UTC) + run.metadata = self._merge_metadata(run.metadata, metadata) + payload["runs"][run_id] = self._serialize_run(run) + self._save(payload) + return run + + def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent: + """Append a document status event.""" + payload = self._load() + payload["status_events"][event.event_id] = self._serialize_event(event) + self._save(payload) + return event + + def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]: + """Replace all artifacts for a run with the provided list.""" + payload = self._load() + payload["artifacts"] = { + artifact_id: artifact_payload + for artifact_id, artifact_payload in payload["artifacts"].items() + if artifact_payload.get("run_id") != run_id + } + for artifact in artifacts: + payload["artifacts"][artifact.artifact_id] = self._serialize_artifact(artifact) + self._save(payload) + return artifacts + + def delete_by_document(self, doc_id: str) -> None: + """Delete all processing data for a document.""" + payload = self._load() + payload["runs"] = { + run_id: run_payload + for run_id, run_payload in payload["runs"].items() + if run_payload.get("doc_id") != doc_id + } + payload["status_events"] = { + event_id: event_payload + for event_id, event_payload in payload["status_events"].items() + if event_payload.get("doc_id") != doc_id + } + payload["artifacts"] = { + artifact_id: artifact_payload + for artifact_id, artifact_payload in payload["artifacts"].items() + if artifact_payload.get("doc_id") != doc_id + } + self._save(payload) + + def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]: + """List all processing runs for a document.""" + payload = self._load() + runs = [ + self._deserialize_run(run_payload) + for run_payload in payload["runs"].values() + if run_payload.get("doc_id") == doc_id + ] + runs.sort(key=lambda run: run.started_at) + return runs + + def get_run(self, run_id: str) -> DocumentProcessingRun | None: + """Return one processing run by identifier.""" + payload = self._load() + run_payload = payload["runs"].get(run_id) + return self._deserialize_run(run_payload) if run_payload else None + + def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]: + """List status events for a document.""" + payload = self._load() + events = [ + self._deserialize_event(event_payload) + for event_payload in payload["status_events"].values() + if event_payload.get("doc_id") == doc_id + ] + events.sort(key=lambda event: event.occurred_at) + return events + + def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]: + """List status events for a run.""" + payload = self._load() + events = [ + self._deserialize_event(event_payload) + for event_payload in payload["status_events"].values() + if event_payload.get("run_id") == run_id + ] + events.sort(key=lambda event: event.occurred_at) + return events + + def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]: + """List artifact references for a document.""" + payload = self._load() + artifacts = [ + self._deserialize_artifact(artifact_payload) + for artifact_payload in payload["artifacts"].values() + if artifact_payload.get("doc_id") == doc_id + ] + artifacts.sort(key=lambda artifact: artifact.created_at) + return artifacts + + def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]: + """List artifact references for a run.""" + payload = self._load() + artifacts = [ + self._deserialize_artifact(artifact_payload) + for artifact_payload in payload["artifacts"].values() + if artifact_payload.get("run_id") == run_id + ] + artifacts.sort(key=lambda artifact: artifact.created_at) + return artifacts diff --git a/backend/app/infrastructure/storage/postgres_document_processing_store.py b/backend/app/infrastructure/storage/postgres_document_processing_store.py new file mode 100644 index 0000000..d8a5cb3 --- /dev/null +++ b/backend/app/infrastructure/storage/postgres_document_processing_store.py @@ -0,0 +1,466 @@ +"""Implement infrastructure support for postgres document processing history.""" + +from __future__ import annotations + +import json +from contextlib import contextmanager +from datetime import UTC, datetime +from typing import Any + +import psycopg2 +import psycopg2.extras +from psycopg2.pool import ThreadedConnectionPool + +from app.config.settings import settings +from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent +# Keep SQL mapping local to this adapter so the domain stays storage-agnostic. + +_CREATE_RUNS_TABLE = """ +CREATE TABLE IF NOT EXISTS document_processing_runs ( + run_id VARCHAR(128) PRIMARY KEY, + doc_id VARCHAR(128) NOT NULL, + trigger_type VARCHAR(32) NOT NULL, + run_status VARCHAR(32) NOT NULL DEFAULT 'running', + parser_backend VARCHAR(128) NOT NULL DEFAULT '', + chunk_backend VARCHAR(128) NOT NULL DEFAULT '', + embedding_model VARCHAR(256) NOT NULL DEFAULT '', + index_name VARCHAR(128) NOT NULL DEFAULT '', + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + stored_at TIMESTAMPTZ, + parsed_at TIMESTAMPTZ, + indexed_at TIMESTAMPTZ, + finished_at TIMESTAMPTZ, + layout_count INTEGER NOT NULL DEFAULT 0, + structure_node_count INTEGER NOT NULL DEFAULT 0, + semantic_block_count INTEGER NOT NULL DEFAULT 0, + vector_chunk_count INTEGER NOT NULL DEFAULT 0, + chunk_count INTEGER NOT NULL DEFAULT 0, + failure_stage VARCHAR(64) NOT NULL DEFAULT '', + error_message TEXT NOT NULL DEFAULT '', + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT fk_dpr_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE +); +CREATE INDEX IF NOT EXISTS idx_document_processing_runs_doc_id ON document_processing_runs(doc_id, started_at DESC); +""" + +_CREATE_EVENTS_TABLE = """ +CREATE TABLE IF NOT EXISTS document_status_history ( + event_id VARCHAR(128) PRIMARY KEY, + doc_id VARCHAR(128) NOT NULL, + run_id VARCHAR(128) NOT NULL, + from_status VARCHAR(32) NOT NULL DEFAULT '', + to_status VARCHAR(32) NOT NULL, + stage VARCHAR(64) NOT NULL DEFAULT '', + message TEXT NOT NULL DEFAULT '', + metadata JSONB NOT NULL DEFAULT '{}', + occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT fk_dsh_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE, + CONSTRAINT fk_dsh_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE +); +CREATE INDEX IF NOT EXISTS idx_document_status_history_doc_id ON document_status_history(doc_id, occurred_at ASC); +CREATE INDEX IF NOT EXISTS idx_document_status_history_run_id ON document_status_history(run_id, occurred_at ASC); +""" + +_CREATE_ARTIFACTS_TABLE = """ +CREATE TABLE IF NOT EXISTS document_artifacts ( + artifact_id VARCHAR(128) PRIMARY KEY, + doc_id VARCHAR(128) NOT NULL, + run_id VARCHAR(128) NOT NULL, + artifact_type VARCHAR(64) NOT NULL, + object_name VARCHAR(1024) NOT NULL, + content_type VARCHAR(128) NOT NULL DEFAULT '', + byte_size BIGINT NOT NULL DEFAULT 0, + checksum VARCHAR(256) NOT NULL DEFAULT '', + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT fk_da_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE, + CONSTRAINT fk_da_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE +); +CREATE INDEX IF NOT EXISTS idx_document_artifacts_doc_id ON document_artifacts(doc_id, created_at ASC); +CREATE INDEX IF NOT EXISTS idx_document_artifacts_run_id ON document_artifacts(run_id, created_at ASC); +""" + + +class PostgresDocumentProcessingStore(DocumentProcessingStore): + """Persist processing history in PostgreSQL using handwritten SQL.""" + + def __init__(self) -> None: + """Initialize the store and ensure the required tables exist.""" + self._pool = ThreadedConnectionPool( + minconn=1, + maxconn=5, + host=settings.postgres_host, + port=settings.postgres_port, + user=settings.postgres_user, + password=settings.postgres_password, + dbname=settings.postgres_db, + ) + self._ensure_schema() + + def _ensure_schema(self) -> None: + """Create processing history tables and indexes if they are missing.""" + with self._conn() as conn: + with conn.cursor() as cur: + cur.execute(_CREATE_RUNS_TABLE) + cur.execute(_CREATE_EVENTS_TABLE) + cur.execute(_CREATE_ARTIFACTS_TABLE) + conn.commit() + + @contextmanager + def _conn(self): + """Borrow one connection from the pool and return it afterwards.""" + conn = self._pool.getconn() + try: + yield conn + finally: + self._pool.putconn(conn) + + def _normalize_metadata(self, value: Any) -> dict[str, Any]: + """Return a JSON-object payload regardless of the row representation.""" + if isinstance(value, dict): + return value + if not value: + return {} + return json.loads(value) + + def _row_to_run(self, row: dict[str, Any]) -> DocumentProcessingRun: + """Map one run row into the domain dataclass.""" + return DocumentProcessingRun( + run_id=row["run_id"], + doc_id=row["doc_id"], + trigger_type=row["trigger_type"], + run_status=row["run_status"], + parser_backend=row["parser_backend"], + chunk_backend=row["chunk_backend"], + embedding_model=row["embedding_model"], + index_name=row["index_name"], + started_at=row["started_at"], + stored_at=row["stored_at"], + parsed_at=row["parsed_at"], + indexed_at=row["indexed_at"], + finished_at=row["finished_at"], + layout_count=row["layout_count"], + structure_node_count=row["structure_node_count"], + semantic_block_count=row["semantic_block_count"], + vector_chunk_count=row["vector_chunk_count"], + chunk_count=row["chunk_count"], + failure_stage=row["failure_stage"], + error_message=row["error_message"], + metadata=self._normalize_metadata(row["metadata"]), + ) + + def _row_to_event(self, row: dict[str, Any]) -> DocumentStatusEvent: + """Map one event row into the domain dataclass.""" + return DocumentStatusEvent( + event_id=row["event_id"], + doc_id=row["doc_id"], + run_id=row["run_id"], + from_status=row["from_status"], + to_status=row["to_status"], + stage=row["stage"], + message=row["message"], + metadata=self._normalize_metadata(row["metadata"]), + occurred_at=row["occurred_at"], + ) + + def _row_to_artifact(self, row: dict[str, Any]) -> DocumentArtifact: + """Map one artifact row into the domain dataclass.""" + return DocumentArtifact( + artifact_id=row["artifact_id"], + doc_id=row["doc_id"], + run_id=row["run_id"], + artifact_type=row["artifact_type"], + object_name=row["object_name"], + content_type=row["content_type"], + byte_size=row["byte_size"], + checksum=row["checksum"], + metadata=self._normalize_metadata(row["metadata"]), + created_at=row["created_at"], + ) + + def _update_run( + self, + run_id: str, + *, + assignments: dict[str, Any], + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Update one run row and return the latest stored state.""" + set_clauses = [] + params: dict[str, Any] = {"run_id": run_id, "updated_at": datetime.now(UTC)} + for key, value in assignments.items(): + set_clauses.append(f"{key} = %({key})s") + params[key] = value + set_clauses.append("updated_at = %(updated_at)s") + if metadata is not None: + set_clauses.append("metadata = COALESCE(metadata, '{}'::jsonb) || %(metadata)s::jsonb") + params["metadata"] = json.dumps(metadata, ensure_ascii=False) + sql = f""" + UPDATE document_processing_runs + SET {", ".join(set_clauses)} + WHERE run_id = %(run_id)s + RETURNING * + """ + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, params) + row = cur.fetchone() + conn.commit() + return self._row_to_run(dict(row)) if row else None + + def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun: + """Create a new processing run record.""" + sql = """ + INSERT INTO document_processing_runs + (run_id, doc_id, trigger_type, run_status, parser_backend, chunk_backend, + embedding_model, index_name, started_at, stored_at, parsed_at, indexed_at, + finished_at, layout_count, structure_node_count, semantic_block_count, + vector_chunk_count, chunk_count, failure_stage, error_message, metadata) + VALUES + (%(run_id)s, %(doc_id)s, %(trigger_type)s, %(run_status)s, %(parser_backend)s, + %(chunk_backend)s, %(embedding_model)s, %(index_name)s, %(started_at)s, + %(stored_at)s, %(parsed_at)s, %(indexed_at)s, %(finished_at)s, %(layout_count)s, + %(structure_node_count)s, %(semantic_block_count)s, %(vector_chunk_count)s, + %(chunk_count)s, %(failure_stage)s, %(error_message)s, %(metadata)s) + """ + with self._conn() as conn: + with conn.cursor() as cur: + cur.execute( + sql, + { + "run_id": run.run_id, + "doc_id": run.doc_id, + "trigger_type": run.trigger_type, + "run_status": run.run_status, + "parser_backend": run.parser_backend, + "chunk_backend": run.chunk_backend, + "embedding_model": run.embedding_model, + "index_name": run.index_name, + "started_at": run.started_at, + "stored_at": run.stored_at, + "parsed_at": run.parsed_at, + "indexed_at": run.indexed_at, + "finished_at": run.finished_at, + "layout_count": run.layout_count, + "structure_node_count": run.structure_node_count, + "semantic_block_count": run.semantic_block_count, + "vector_chunk_count": run.vector_chunk_count, + "chunk_count": run.chunk_count, + "failure_stage": run.failure_stage, + "error_message": run.error_message, + "metadata": json.dumps(run.metadata, ensure_ascii=False), + }, + ) + conn.commit() + return run + + def mark_run_stored( + self, + run_id: str, + *, + stored_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as having persisted its source file.""" + return self._update_run( + run_id, + assignments={"stored_at": stored_at or datetime.now(UTC)}, + metadata=metadata, + ) + + def mark_run_parsed( + self, + run_id: str, + *, + parser_backend: str, + layout_count: int, + structure_node_count: int, + semantic_block_count: int, + vector_chunk_count: int, + parsed_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record parse completion metrics for a run.""" + return self._update_run( + run_id, + assignments={ + "parser_backend": parser_backend, + "parsed_at": parsed_at or datetime.now(UTC), + "layout_count": layout_count, + "structure_node_count": structure_node_count, + "semantic_block_count": semantic_block_count, + "vector_chunk_count": vector_chunk_count, + }, + metadata=metadata, + ) + + def mark_run_indexed( + self, + run_id: str, + *, + chunk_count: int, + index_name: str, + indexed_at: datetime | None = None, + finished_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as successfully indexed.""" + now = datetime.now(UTC) + return self._update_run( + run_id, + assignments={ + "run_status": "succeeded", + "chunk_count": chunk_count, + "index_name": index_name, + "indexed_at": indexed_at or now, + "finished_at": finished_at or now, + }, + metadata=metadata, + ) + + def mark_run_failed( + self, + run_id: str, + *, + failure_stage: str, + error_message: str, + finished_at: datetime | None = None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Mark a run as failed and persist the terminal error details.""" + return self._update_run( + run_id, + assignments={ + "run_status": "failed", + "failure_stage": failure_stage, + "error_message": error_message, + "finished_at": finished_at or datetime.now(UTC), + }, + metadata=metadata, + ) + + def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent: + """Append a document status event.""" + sql = """ + INSERT INTO document_status_history + (event_id, doc_id, run_id, from_status, to_status, stage, message, metadata, occurred_at) + VALUES + (%(event_id)s, %(doc_id)s, %(run_id)s, %(from_status)s, %(to_status)s, + %(stage)s, %(message)s, %(metadata)s, %(occurred_at)s) + """ + with self._conn() as conn: + with conn.cursor() as cur: + cur.execute( + sql, + { + "event_id": event.event_id, + "doc_id": event.doc_id, + "run_id": event.run_id, + "from_status": event.from_status, + "to_status": event.to_status, + "stage": event.stage, + "message": event.message, + "metadata": json.dumps(event.metadata, ensure_ascii=False), + "occurred_at": event.occurred_at, + }, + ) + conn.commit() + return event + + def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]: + """Replace all artifact references for one run using a delete-then-insert strategy.""" + with self._conn() as conn: + with conn.cursor() as cur: + cur.execute("DELETE FROM document_artifacts WHERE run_id = %s", (run_id,)) + if artifacts: + psycopg2.extras.execute_values( + cur, + """ + INSERT INTO document_artifacts + (artifact_id, doc_id, run_id, artifact_type, object_name, + content_type, byte_size, checksum, metadata, created_at) + VALUES %s + """, + [ + ( + artifact.artifact_id, + artifact.doc_id, + artifact.run_id, + artifact.artifact_type, + artifact.object_name, + artifact.content_type, + artifact.byte_size, + artifact.checksum, + json.dumps(artifact.metadata, ensure_ascii=False), + artifact.created_at, + ) + for artifact in artifacts + ], + ) + conn.commit() + return artifacts + + def delete_by_document(self, doc_id: str) -> None: + """Delete all processing rows for a document explicitly.""" + with self._conn() as conn: + with conn.cursor() as cur: + cur.execute("DELETE FROM document_status_history WHERE doc_id = %s", (doc_id,)) + cur.execute("DELETE FROM document_artifacts WHERE doc_id = %s", (doc_id,)) + cur.execute("DELETE FROM document_processing_runs WHERE doc_id = %s", (doc_id,)) + conn.commit() + + def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]: + """List processing runs for a document in chronological order.""" + sql = "SELECT * FROM document_processing_runs WHERE doc_id = %s ORDER BY started_at ASC" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (doc_id,)) + rows = cur.fetchall() + return [self._row_to_run(dict(row)) for row in rows] + + def get_run(self, run_id: str) -> DocumentProcessingRun | None: + """Return one processing run by identifier.""" + sql = "SELECT * FROM document_processing_runs WHERE run_id = %s" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (run_id,)) + row = cur.fetchone() + return self._row_to_run(dict(row)) if row else None + + def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]: + """List all status events for a document.""" + sql = "SELECT * FROM document_status_history WHERE doc_id = %s ORDER BY occurred_at ASC" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (doc_id,)) + rows = cur.fetchall() + return [self._row_to_event(dict(row)) for row in rows] + + def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]: + """List all status events for a run.""" + sql = "SELECT * FROM document_status_history WHERE run_id = %s ORDER BY occurred_at ASC" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (run_id,)) + rows = cur.fetchall() + return [self._row_to_event(dict(row)) for row in rows] + + def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]: + """List all artifact references for a document.""" + sql = "SELECT * FROM document_artifacts WHERE doc_id = %s ORDER BY created_at ASC" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (doc_id,)) + rows = cur.fetchall() + return [self._row_to_artifact(dict(row)) for row in rows] + + def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]: + """List all artifact references for a run.""" + sql = "SELECT * FROM document_artifacts WHERE run_id = %s ORDER BY created_at ASC" + with self._conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, (run_id,)) + rows = cur.fetchall() + return [self._row_to_artifact(dict(row)) for row in rows] diff --git a/backend/app/shared/bootstrap.py b/backend/app/shared/bootstrap.py index e11aba3..ef5d81a 100644 --- a/backend/app/shared/bootstrap.py +++ b/backend/app/shared/bootstrap.py @@ -20,8 +20,10 @@ from app.infrastructure.parser.local_document_parser import LocalDocumentParser from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder from app.infrastructure.perception.mock_event_store import MockEventStore from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore +from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore from app.infrastructure.storage.json_document_repository import JsonDocumentRepository from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore +from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever @@ -148,6 +150,14 @@ def get_parse_artifact_store(): return None +@lru_cache +def get_document_processing_store(): + """Return document processing store for the active repository backend.""" + if settings.document_repository_backend == "postgres": + return PostgresDocumentProcessingStore() + return JsonDocumentProcessingStore(settings.document_processing_metadata_path) + + @lru_cache def get_binary_store() -> DocumentBinaryStore: """Return binary store.""" @@ -226,6 +236,7 @@ def get_document_command_service() -> DocumentCommandService: embedding_provider=get_embedding_provider(), vector_index=get_vector_index(), parse_artifact_store=get_parse_artifact_store(), + document_processing_store=get_document_processing_store(), ) diff --git a/frontend/src/components/layout/AppShell.tsx b/frontend/src/components/layout/AppShell.tsx index 5f62643..2603c39 100644 --- a/frontend/src/components/layout/AppShell.tsx +++ b/frontend/src/components/layout/AppShell.tsx @@ -1,4 +1,4 @@ -import { Outlet, useLocation } from 'react-router-dom'; +import { useLocation } from 'react-router-dom'; import { FooterLayout } from './FooterLayout'; import { HeaderLayout } from './HeaderLayout'; @@ -15,7 +15,6 @@ export function AppShell() { - diff --git a/frontend/src/components/layout/ContentLayout.tsx b/frontend/src/components/layout/ContentLayout.tsx index a63802b..5530af9 100644 --- a/frontend/src/components/layout/ContentLayout.tsx +++ b/frontend/src/components/layout/ContentLayout.tsx @@ -22,12 +22,12 @@ export function ContentLayout({ children, tab }: ContentLayoutProps) {
=4.5.0", "minio>=7.1.0", "psycopg2-binary>=2.9.0", - "sqlalchemy>=2.0.0", ] [dependency-groups] diff --git a/tests/test_document_processing_store.py b/tests/test_document_processing_store.py new file mode 100644 index 0000000..ef8b40d --- /dev/null +++ b/tests/test_document_processing_store.py @@ -0,0 +1,216 @@ +"""Test PostgreSQL-backed document processing history storage.""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime + +import psycopg2 +import pytest + +from app.domain.documents import Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent +from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore +from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository +# Keep these tests focused on relational storage behavior only. + + +def _build_document(doc_id: str) -> Document: + """Create a minimal document row required by the foreign keys.""" + return Document( + doc_id=doc_id, + doc_name="Processing Test", + file_name="processing-test.pdf", + object_name=f"{doc_id}/processing-test.pdf", + content_type="application/pdf", + size_bytes=128, + status=DocumentStatus.PENDING, + ) + + +def _connectivity_ready() -> bool: + """Return whether the configured PostgreSQL instance is reachable for integration tests.""" + try: + repository = PostgresDocumentRepository() + except psycopg2.Error: + return False + try: + repository.list(limit=1) + return True + except psycopg2.Error: + return False + + +pytestmark = pytest.mark.skipif(not _connectivity_ready(), reason="PostgreSQL test backend is not reachable") + + +def test_postgres_document_processing_store_supports_full_run_lifecycle(): + """Persist run, event, and artifact history and read it back as dataclasses.""" + repository = PostgresDocumentRepository() + store = PostgresDocumentProcessingStore() + doc_id = f"proc-{uuid.uuid4().hex[:10]}" + run_id = f"run-{uuid.uuid4().hex[:10]}" + base_time = datetime.now(UTC) + + repository.create(_build_document(doc_id)) + try: + created = store.create_run( + DocumentProcessingRun( + run_id=run_id, + doc_id=doc_id, + trigger_type="upload", + run_status="running", + parser_backend="aliyun", + chunk_backend="aliyun", + embedding_model="text-embedding-v3", + started_at=base_time, + metadata={"origin": "test"}, + ) + ) + + stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True}) + parsed = store.mark_run_parsed( + run_id, + parser_backend="fake_parser", + layout_count=2, + structure_node_count=3, + semantic_block_count=4, + vector_chunk_count=5, + parsed_at=base_time, + metadata={"parse_task_id": "task-1"}, + ) + indexed = store.mark_run_indexed( + run_id, + chunk_count=6, + index_name="regulations_dense_1024_v1", + indexed_at=base_time, + finished_at=base_time, + metadata={"collection": "regulations_dense_1024_v1"}, + ) + event = store.append_status_event( + DocumentStatusEvent( + event_id=f"evt-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + from_status="parsed", + to_status="indexed", + stage="index", + message="Indexed successfully", + metadata={"chunk_count": 6}, + occurred_at=base_time, + ) + ) + artifacts = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id=f"art-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name=f"artifacts/{doc_id}/layouts.json", + content_type="application/json", + created_at=base_time, + ), + DocumentArtifact( + artifact_id=f"art-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + artifact_type="vector_chunks", + object_name=f"artifacts/{doc_id}/vector_chunks.json", + content_type="application/json", + created_at=base_time, + ), + ], + ) + + fetched = store.get_run(run_id) + run_rows = store.list_runs_by_document(doc_id) + event_rows = store.list_status_events_by_document(doc_id) + artifact_rows = store.list_artifacts_by_run(run_id) + + assert created.run_id == run_id + assert stored is not None and stored.stored_at is not None + assert parsed is not None and parsed.parser_backend == "fake_parser" + assert indexed is not None and indexed.run_status == "succeeded" + assert fetched is not None and fetched.chunk_count == 6 + assert isinstance(run_rows[0], DocumentProcessingRun) + assert isinstance(event_rows[0], DocumentStatusEvent) + assert isinstance(artifact_rows[0], DocumentArtifact) + assert event_rows[0].event_id == event.event_id + assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows} + finally: + store.delete_by_document(doc_id) + repository.delete(doc_id) + + +def test_postgres_document_processing_store_replaces_artifacts_and_deletes_document_data(): + """Replace artifact rows idempotently and remove all history rows for one document.""" + repository = PostgresDocumentRepository() + store = PostgresDocumentProcessingStore() + doc_id = f"proc-{uuid.uuid4().hex[:10]}" + run_id = f"run-{uuid.uuid4().hex[:10]}" + + repository.create(_build_document(doc_id)) + try: + store.create_run( + DocumentProcessingRun( + run_id=run_id, + doc_id=doc_id, + trigger_type="retry", + run_status="running", + ) + ) + + first = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id=f"art-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name=f"artifacts/{doc_id}/layouts-v1.json", + content_type="application/json", + ) + ], + ) + second = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id=f"art-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name=f"artifacts/{doc_id}/layouts-v2.json", + content_type="application/json", + ) + ], + ) + store.append_status_event( + DocumentStatusEvent( + event_id=f"evt-{uuid.uuid4().hex[:10]}", + doc_id=doc_id, + run_id=run_id, + from_status="pending", + to_status="failed", + stage="parse", + message="failed", + ) + ) + failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom") + + artifact_rows = store.list_artifacts_by_run(run_id) + assert len(first) == 1 + assert len(second) == 1 + assert len(artifact_rows) == 1 + assert artifact_rows[0].object_name.endswith("layouts-v2.json") + assert failed is not None and failed.run_status == "failed" + + store.delete_by_document(doc_id) + + assert store.list_runs_by_document(doc_id) == [] + assert store.list_status_events_by_document(doc_id) == [] + assert store.list_artifacts_by_document(doc_id) == [] + finally: + repository.delete(doc_id) diff --git a/tests/test_embedding.py b/tests/test_embedding.py index adfcfaf..d07aaa1 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -3,13 +3,18 @@ from __future__ import annotations from dataclasses import dataclass +from pathlib import Path from app.application.documents.services import DocumentCommandService -from app.domain.documents import Chunk, Document, DocumentStatus, ParsedDocument +from app.domain.documents import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument +from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore +from app.infrastructure.storage.json_document_repository import JsonDocumentRepository from app.shared import bootstrap class FakeRepository: + """Store document rows in memory for application service tests.""" + def __init__(self) -> None: self.documents: dict[str, Document] = {} @@ -25,9 +30,14 @@ class FakeRepository: return self.documents.get(doc_id) def list(self, limit: int | None = None) -> list[Document]: + """Return stored documents in insertion order.""" values = list(self.documents.values()) return values[:limit] if limit is not None else values + def delete(self, doc_id: str) -> bool: + """Delete one document from the in-memory repository.""" + return self.documents.pop(doc_id, None) is not None + def update_status( self, doc_id: str, @@ -62,6 +72,8 @@ class FakeRepository: class FakeBinaryStore: + """Store binary payloads in memory for upload and retry tests.""" + def __init__(self) -> None: self.saved: dict[str, bytes] = {} @@ -76,6 +88,8 @@ class FakeBinaryStore: class FakeParser: + """Return a stable parsed document for deterministic service tests.""" + def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument: return ParsedDocument( doc_id=doc_id, @@ -101,6 +115,8 @@ class FakeParser: class FakeChunkBuilder: + """Build one deterministic chunk from the fake parsed document.""" + def build(self, *, parsed_document: ParsedDocument, regulation_type: str, version: str) -> list[Chunk]: return [ Chunk( @@ -122,6 +138,8 @@ class FakeChunkBuilder: class FakeEmbeddingProvider: + """Capture embedding calls and return fixed-length vectors.""" + def __init__(self) -> None: self.calls: list[list[str]] = [] @@ -134,6 +152,8 @@ class FakeEmbeddingProvider: class FakeVectorIndex: + """Capture vector upserts for service assertions.""" + def __init__(self) -> None: self.upserts: list[tuple[list[Chunk], list[list[float]]]] = [] @@ -151,11 +171,159 @@ class FakeVectorIndex: return {"collection_name": "regulations_dense_1024_v1"} +@dataclass +class FakeProcessingStore: + """Record processing history method calls for orchestration assertions.""" + + runs: list[DocumentProcessingRun] = None + status_events: list[DocumentStatusEvent] = None + artifact_batches: list[list[DocumentArtifact]] = None + deleted_doc_ids: list[str] = None + stored_run_ids: list[str] = None + parsed_calls: list[dict] = None + indexed_calls: list[dict] = None + failed_calls: list[dict] = None + + def __post_init__(self) -> None: + """Initialize mutable call collections for each fake instance.""" + self.runs = [] + self.status_events = [] + self.artifact_batches = [] + self.deleted_doc_ids = [] + self.stored_run_ids = [] + self.parsed_calls = [] + self.indexed_calls = [] + self.failed_calls = [] + + def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun: + """Store the created run and return it unchanged.""" + self.runs.append(run) + return run + + def mark_run_stored(self, run_id: str, *, stored_at=None, metadata: dict | None = None) -> DocumentProcessingRun | None: + """Record that one run reached the stored stage.""" + self.stored_run_ids.append(run_id) + return next((run for run in self.runs if run.run_id == run_id), None) + + def mark_run_parsed( + self, + run_id: str, + *, + parser_backend: str, + layout_count: int, + structure_node_count: int, + semantic_block_count: int, + vector_chunk_count: int, + parsed_at=None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record parse metrics for one run.""" + self.parsed_calls.append( + { + "run_id": run_id, + "parser_backend": parser_backend, + "layout_count": layout_count, + "structure_node_count": structure_node_count, + "semantic_block_count": semantic_block_count, + "vector_chunk_count": vector_chunk_count, + "metadata": metadata or {}, + } + ) + return next((run for run in self.runs if run.run_id == run_id), None) + + def mark_run_indexed( + self, + run_id: str, + *, + chunk_count: int, + index_name: str, + indexed_at=None, + finished_at=None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record index completion for one run.""" + self.indexed_calls.append( + { + "run_id": run_id, + "chunk_count": chunk_count, + "index_name": index_name, + "metadata": metadata or {}, + } + ) + return next((run for run in self.runs if run.run_id == run_id), None) + + def mark_run_failed( + self, + run_id: str, + *, + failure_stage: str, + error_message: str, + finished_at=None, + metadata: dict | None = None, + ) -> DocumentProcessingRun | None: + """Record terminal failure details for one run.""" + self.failed_calls.append( + { + "run_id": run_id, + "failure_stage": failure_stage, + "error_message": error_message, + "metadata": metadata or {}, + } + ) + return next((run for run in self.runs if run.run_id == run_id), None) + + def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent: + """Store one status event.""" + self.status_events.append(event) + return event + + def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]: + """Store one artifact replacement batch.""" + self.artifact_batches.append(artifacts) + return artifacts + + def delete_by_document(self, doc_id: str) -> None: + """Record an explicit document-history delete request.""" + self.deleted_doc_ids.append(doc_id) + + def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]: + """Return runs for completeness of the fake port.""" + return [run for run in self.runs if run.doc_id == doc_id] + + def get_run(self, run_id: str) -> DocumentProcessingRun | None: + """Return one run for completeness of the fake port.""" + return next((run for run in self.runs if run.run_id == run_id), None) + + def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]: + """Return status events for completeness of the fake port.""" + return [event for event in self.status_events if event.doc_id == doc_id] + + def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]: + """Return status events for completeness of the fake port.""" + return [event for event in self.status_events if event.run_id == run_id] + + def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]: + """Return artifact references for completeness of the fake port.""" + return [artifact for batch in self.artifact_batches for artifact in batch if artifact.doc_id == doc_id] + + def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]: + """Return artifact references for completeness of the fake port.""" + return [artifact for batch in self.artifact_batches for artifact in batch if artifact.run_id == run_id] + + +class FailingParser: + """Raise a deterministic parser failure for failure-stage assertions.""" + + def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument: + raise RuntimeError("parser exploded") + + def test_document_command_service_uses_1024_dense_embedding_and_updates_status(): repository = FakeRepository() binary_store = FakeBinaryStore() embedding_provider = FakeEmbeddingProvider() vector_index = FakeVectorIndex() + processing_store = FakeProcessingStore() service = DocumentCommandService( document_repository=repository, binary_store=binary_store, @@ -163,6 +331,7 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status() chunk_builder=FakeChunkBuilder(), embedding_provider=embedding_provider, vector_index=vector_index, + document_processing_store=processing_store, ) result = service.upload_and_process( @@ -188,6 +357,264 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status() assert stored.index_name == "regulations_dense_1024_v1" assert stored.metadata["parse_task_id"] == "task-123" assert stored.metadata["artifact_keys"]["vector_chunks"].endswith("/vector_chunks.json") + assert len(processing_store.runs) == 1 + assert processing_store.runs[0].trigger_type == "upload" + assert processing_store.stored_run_ids == [processing_store.runs[0].run_id] + assert processing_store.parsed_calls[0]["vector_chunk_count"] == 1 + assert processing_store.indexed_calls[0]["index_name"] == "regulations_dense_1024_v1" + assert [event.to_status for event in processing_store.status_events] == ["pending", "stored", "parsed", "indexed"] + assert {artifact.artifact_type for artifact in processing_store.artifact_batches[0]} == { + "layouts", + "structure_nodes", + "semantic_blocks", + "vector_chunks", + } + + +def test_document_command_service_retry_marks_processing_run_as_retry(): + repository = FakeRepository() + binary_store = FakeBinaryStore() + embedding_provider = FakeEmbeddingProvider() + vector_index = FakeVectorIndex() + processing_store = FakeProcessingStore() + + repository.create( + Document( + doc_id="doc-retry", + doc_name="Retry Doc", + file_name="retry.pdf", + object_name="doc-retry/retry.pdf", + content_type="application/pdf", + size_bytes=4, + regulation_type="车辆安全", + version="2026", + metadata={"generate_summary": False}, + ) + ) + binary_store.save( + object_name="doc-retry/retry.pdf", + data=b"data", + content_type="application/pdf", + metadata={"doc_id": "doc-retry"}, + ) + + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FakeParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=embedding_provider, + vector_index=vector_index, + document_processing_store=processing_store, + ) + + result = service.retry("doc-retry") + + assert result.status == "indexed" + assert processing_store.runs[0].trigger_type == "retry" + + +def test_document_command_service_records_failed_processing_stage(): + repository = FakeRepository() + binary_store = FakeBinaryStore() + embedding_provider = FakeEmbeddingProvider() + vector_index = FakeVectorIndex() + processing_store = FakeProcessingStore() + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FailingParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=embedding_provider, + vector_index=vector_index, + document_processing_store=processing_store, + ) + + result = service.upload_and_process( + doc_id="doc-fail", + file_name="test.pdf", + content=b"dummy pdf bytes", + content_type="application/pdf", + doc_name="测试法规", + regulation_type="车辆安全", + version="2026", + generate_summary=False, + ) + + assert result.status == "failed" + assert processing_store.failed_calls[0]["failure_stage"] == "parse" + assert processing_store.status_events[-1].to_status == "failed" + assert repository.get("doc-fail").metadata["failure_stage"] == "parse" + + +def test_document_command_service_delete_cleans_processing_history_when_present(): + repository = FakeRepository() + binary_store = FakeBinaryStore() + vector_index = FakeVectorIndex() + processing_store = FakeProcessingStore() + repository.create( + Document( + doc_id="doc-delete", + doc_name="Delete Doc", + file_name="delete.pdf", + object_name="doc-delete/delete.pdf", + content_type="application/pdf", + size_bytes=4, + ) + ) + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FakeParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=FakeEmbeddingProvider(), + vector_index=vector_index, + document_processing_store=processing_store, + ) + + deleted = service.delete("doc-delete") + + assert deleted is True + assert processing_store.deleted_doc_ids == ["doc-delete"] + + +def test_document_command_service_persists_processing_history_with_json_store(tmp_path: Path): + repository = JsonDocumentRepository(str(tmp_path / "documents.json")) + processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json")) + binary_store = FakeBinaryStore() + embedding_provider = FakeEmbeddingProvider() + vector_index = FakeVectorIndex() + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FakeParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=embedding_provider, + vector_index=vector_index, + document_processing_store=processing_store, + ) + + result = service.upload_and_process( + doc_id="doc-json-flow", + file_name="test.pdf", + content=b"dummy pdf bytes", + content_type="application/pdf", + doc_name="测试法规", + regulation_type="车辆安全", + version="2026", + generate_summary=False, + ) + + stored = repository.get("doc-json-flow") + runs = processing_store.list_runs_by_document("doc-json-flow") + events = processing_store.list_status_events_by_document("doc-json-flow") + artifacts = processing_store.list_artifacts_by_document("doc-json-flow") + + assert result.status == "indexed" + assert stored is not None and stored.status == DocumentStatus.INDEXED + assert len(runs) == 1 + assert runs[0].trigger_type == "upload" + assert runs[0].run_status == "succeeded" + assert [event.to_status for event in events] == ["pending", "stored", "parsed", "indexed"] + assert {artifact.artifact_type for artifact in artifacts} == { + "layouts", + "structure_nodes", + "semantic_blocks", + "vector_chunks", + } + + +def test_document_command_service_retry_creates_second_json_processing_run(tmp_path: Path): + repository = JsonDocumentRepository(str(tmp_path / "documents.json")) + processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json")) + binary_store = FakeBinaryStore() + repository.create( + Document( + doc_id="doc-json-retry", + doc_name="Retry Doc", + file_name="retry.pdf", + object_name="doc-json-retry/retry.pdf", + content_type="application/pdf", + size_bytes=4, + regulation_type="车辆安全", + version="2026", + metadata={"generate_summary": False}, + ) + ) + binary_store.save( + object_name="doc-json-retry/retry.pdf", + data=b"data", + content_type="application/pdf", + metadata={"doc_id": "doc-json-retry"}, + ) + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FakeParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=FakeEmbeddingProvider(), + vector_index=FakeVectorIndex(), + document_processing_store=processing_store, + ) + + first = service.retry("doc-json-retry") + second = service.retry("doc-json-retry") + runs = processing_store.list_runs_by_document("doc-json-retry") + + assert first.status == "indexed" + assert second.status == "indexed" + assert len(runs) == 2 + assert {run.trigger_type for run in runs} == {"retry"} + + +def test_document_command_service_delete_removes_json_processing_history(tmp_path: Path): + repository = JsonDocumentRepository(str(tmp_path / "documents.json")) + processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json")) + binary_store = FakeBinaryStore() + service = DocumentCommandService( + document_repository=repository, + binary_store=binary_store, + parser=FakeParser(), + chunk_builder=FakeChunkBuilder(), + embedding_provider=FakeEmbeddingProvider(), + vector_index=FakeVectorIndex(), + document_processing_store=processing_store, + ) + + service.upload_and_process( + doc_id="doc-json-delete", + file_name="delete.pdf", + content=b"delete me", + content_type="application/pdf", + doc_name="Delete Doc", + regulation_type="车辆安全", + version="2026", + generate_summary=False, + ) + + deleted = service.delete("doc-json-delete") + + assert deleted is True + assert processing_store.list_runs_by_document("doc-json-delete") == [] + assert processing_store.list_status_events_by_document("doc-json-delete") == [] + assert processing_store.list_artifacts_by_document("doc-json-delete") == [] + + +def test_bootstrap_returns_json_processing_store_for_json_backend(tmp_path: Path): + original_backend = bootstrap.settings.document_repository_backend + original_path = bootstrap.settings.document_processing_metadata_path + bootstrap.get_document_processing_store.cache_clear() + try: + bootstrap.settings.document_repository_backend = "json" + bootstrap.settings.document_processing_metadata_path = str(tmp_path / "document_processing.json") + + store = bootstrap.get_document_processing_store() + + assert store.__class__.__name__ == "JsonDocumentProcessingStore" + finally: + bootstrap.settings.document_repository_backend = original_backend + bootstrap.settings.document_processing_metadata_path = original_path + bootstrap.get_document_processing_store.cache_clear() def test_bootstrap_defaults_to_aliyun_parser_and_chunk_builder(): bootstrap.get_parser.cache_clear() diff --git a/tests/test_json_document_processing_store.py b/tests/test_json_document_processing_store.py new file mode 100644 index 0000000..25bb5f0 --- /dev/null +++ b/tests/test_json_document_processing_store.py @@ -0,0 +1,184 @@ +"""Test JSON-backed document processing history storage.""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path + +from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentStatusEvent +from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore +# Keep JSON processing-store tests focused on local file persistence behavior. + + +def test_json_document_processing_store_initializes_missing_file(tmp_path: Path): + """Create the backing file with the canonical empty payload on first use.""" + file_path = tmp_path / "document_processing.json" + + store = JsonDocumentProcessingStore(str(file_path)) + + payload = json.loads(file_path.read_text(encoding="utf-8")) + assert payload == {"runs": {}, "status_events": {}, "artifacts": {}} + assert store.list_runs_by_document("missing") == [] + + +def test_json_document_processing_store_supports_full_run_lifecycle(tmp_path: Path): + """Persist runs, events, and artifacts and read them back as dataclasses.""" + file_path = tmp_path / "document_processing.json" + store = JsonDocumentProcessingStore(str(file_path)) + doc_id = "doc-json" + run_id = "run-json" + event_id = "evt-json" + base_time = datetime.now(UTC) + + created = store.create_run( + DocumentProcessingRun( + run_id=run_id, + doc_id=doc_id, + trigger_type="upload", + run_status="running", + parser_backend="aliyun", + chunk_backend="aliyun", + embedding_model="text-embedding-v3", + started_at=base_time, + metadata={"origin": "json-test"}, + ) + ) + stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True}) + parsed = store.mark_run_parsed( + run_id, + parser_backend="fake_parser", + layout_count=1, + structure_node_count=2, + semantic_block_count=3, + vector_chunk_count=4, + parsed_at=base_time, + metadata={"parse_task_id": "task-json"}, + ) + indexed = store.mark_run_indexed( + run_id, + chunk_count=5, + index_name="regulations_dense_1024_v1", + indexed_at=base_time, + finished_at=base_time, + metadata={"collection": "regulations_dense_1024_v1"}, + ) + event = store.append_status_event( + DocumentStatusEvent( + event_id=event_id, + doc_id=doc_id, + run_id=run_id, + from_status="parsed", + to_status="indexed", + stage="index", + message="Indexed", + metadata={"chunk_count": 5}, + occurred_at=base_time, + ) + ) + artifacts = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id="art-layouts", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name="artifacts/doc-json/layouts.json", + content_type="application/json", + created_at=base_time, + ), + DocumentArtifact( + artifact_id="art-vectors", + doc_id=doc_id, + run_id=run_id, + artifact_type="vector_chunks", + object_name="artifacts/doc-json/vector_chunks.json", + content_type="application/json", + created_at=base_time, + ), + ], + ) + + fetched = store.get_run(run_id) + run_rows = store.list_runs_by_document(doc_id) + event_rows = store.list_status_events_by_run(run_id) + artifact_rows = store.list_artifacts_by_document(doc_id) + + assert created.run_id == run_id + assert stored is not None and stored.metadata["stored"] is True + assert parsed is not None and parsed.structure_node_count == 2 + assert indexed is not None and indexed.run_status == "succeeded" + assert fetched is not None and fetched.chunk_count == 5 + assert run_rows[0].started_at == base_time + assert event_rows[0].event_id == event.event_id + assert artifact_rows[0].doc_id == doc_id + assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows} + + +def test_json_document_processing_store_replaces_artifacts_and_deletes_by_document(tmp_path: Path): + """Replace one run's artifacts idempotently and remove all history for a document.""" + file_path = tmp_path / "document_processing.json" + store = JsonDocumentProcessingStore(str(file_path)) + doc_id = "doc-delete" + run_id = "run-delete" + + store.create_run( + DocumentProcessingRun( + run_id=run_id, + doc_id=doc_id, + trigger_type="retry", + run_status="running", + ) + ) + store.append_status_event( + DocumentStatusEvent( + event_id="evt-delete", + doc_id=doc_id, + run_id=run_id, + from_status="pending", + to_status="stored", + stage="store", + occurred_at=datetime.now(UTC), + ) + ) + first = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id="art-first", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name="artifacts/doc-delete/layouts-v1.json", + content_type="application/json", + ) + ], + ) + second = store.replace_artifacts_for_run( + run_id, + [ + DocumentArtifact( + artifact_id="art-second", + doc_id=doc_id, + run_id=run_id, + artifact_type="layouts", + object_name="artifacts/doc-delete/layouts-v2.json", + content_type="application/json", + ) + ], + ) + failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom") + + artifact_rows = store.list_artifacts_by_run(run_id) + assert len(first) == 1 + assert len(second) == 1 + assert len(artifact_rows) == 1 + assert artifact_rows[0].object_name.endswith("layouts-v2.json") + assert failed is not None and failed.run_status == "failed" + + store.delete_by_document(doc_id) + + assert store.list_runs_by_document(doc_id) == [] + assert store.list_status_events_by_document(doc_id) == [] + assert store.list_artifacts_by_document(doc_id) == [] diff --git a/uv.lock b/uv.lock index 99508b1..d3f3e3f 100644 --- a/uv.lock +++ b/uv.lock @@ -35,7 +35,6 @@ dependencies = [ { name = "python-dotenv" }, { name = "python-multipart" }, { name = "redis" }, - { name = "sqlalchemy" }, { name = "tenacity" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -68,7 +67,6 @@ requires-dist = [ { name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-multipart", specifier = ">=0.0.6" }, { name = "redis", specifier = ">=4.5.0" }, - { name = "sqlalchemy", specifier = ">=2.0.0" }, { name = "tenacity", specifier = ">=8.2.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.23.0" }, ] @@ -815,7 +813,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -959,76 +957,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, ] -[[package]] -name = "greenlet" -version = "3.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/6e/802acd792aebb2256fbbee8cacf2727faaeb6f240ac11008f09eae4414bc/greenlet-3.5.1.tar.gz", hash = "sha256:5a56aeb7d5d9cc4b3a735efb5095bd4b4f6f0e4f93e5ca876d0e2315137b7829", size = 197356, upload-time = "2026-05-20T15:05:03.917Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/21/117c8710abb7f146d804a124c07eb5964a60b90d02b72452885aecc18efa/greenlet-3.5.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7eacb17a9d41538a2bc4912eba5ef13823c83cb69e4d141d0813debe7163187f", size = 283510, upload-time = "2026-05-20T13:12:26.475Z" }, - { url = "https://files.pythonhosted.org/packages/b9/f7/6762a56fa5f6c2295c449c6524e10ce481e381c994cc44d9d03aef0700fb/greenlet-3.5.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e5cc9606aa5f4e0bde0d3bd502b44f743864c3ffa5cfa1011b1e30f5aa02366f", size = 599696, upload-time = "2026-05-20T14:00:02.906Z" }, - { url = "https://files.pythonhosted.org/packages/0f/05/85a511e68ee109aff0aa00b4b497806091dd2d82ce209e49c6e801bd5d92/greenlet-3.5.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c3d35f87c7253b715d13d679e0783d845910144f282cb939fe1ba4ac8616269c", size = 612618, upload-time = "2026-05-20T14:05:39.202Z" }, - { url = "https://files.pythonhosted.org/packages/89/b8/8b83d18ae07c46c019617f35afd7b47aab7f9b4fbb12fc637d681e10bdd8/greenlet-3.5.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:540dae7b956209af4d70a3be35927b4055f617763771e5e84a5255bea934d2f5", size = 612947, upload-time = "2026-05-20T13:14:23.469Z" }, - { url = "https://files.pythonhosted.org/packages/5d/14/ad1f9fc9b82384c010212464a3702bd911f95dab2f1180bc6fbcfb1f958c/greenlet-3.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed8cdb691169715a9a492844a83246f090182247d1a5031dc78a403f68ba1e97", size = 1571425, upload-time = "2026-05-20T14:02:22.671Z" }, - { url = "https://files.pythonhosted.org/packages/46/1c/43b8203cf10f4292c9e3d270e9e5f5ade79115a0a0ca5ea6f1be5f8915a7/greenlet-3.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d59e840387076a51016777a9328b3f2c427c6f9208a6e958bad251be50a648d", size = 1638688, upload-time = "2026-05-20T13:14:30.026Z" }, - { url = "https://files.pythonhosted.org/packages/ac/6e/0344b1e99f58f71715456e46492101fd2daa408957b8186ade0a4b515da7/greenlet-3.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:b9152fca4a6466e114aaec745ae61cba739903a109754a9d4e1262f01e9259b1", size = 237763, upload-time = "2026-05-20T13:11:35.659Z" }, - { url = "https://files.pythonhosted.org/packages/42/3c/ff890b466eaba2b0f5e6bdfff025f8c75f41b8ffdc3dbc3d24ad261e764a/greenlet-3.5.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:73f78f9b9f0a5c06e5c946ba1e8e36f5114923b6be109ee618c54f079c3ea14f", size = 284764, upload-time = "2026-05-20T13:09:10.204Z" }, - { url = "https://files.pythonhosted.org/packages/81/0e/5e5457be3d256918f6a4756f073548a3f0190836e2cc94aa6d0d617a940b/greenlet-3.5.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0cbed8bb44e23c5b199f888f4e4ce096b45ad9f25ff74a7ad0213875e936bb2", size = 603479, upload-time = "2026-05-20T14:00:04.757Z" }, - { url = "https://files.pythonhosted.org/packages/6d/e1/f89a21d58d308298e6f275f13a1b472ed96c680b601a371b08be6a725989/greenlet-3.5.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a203a8bd0acb0701653d3bbb26e404854a68674139ed5cbb778830f42b09bb33", size = 615495, upload-time = "2026-05-20T14:05:40.87Z" }, - { url = "https://files.pythonhosted.org/packages/75/de/af6cef182862d2ccd6975440d21c9058a77c3f9b469abf94e322dfd2e0e3/greenlet-3.5.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a271fcd66c74615cda6a964fda3f304267a12e50a084472218a39bb0376f563", size = 614754, upload-time = "2026-05-20T13:14:24.947Z" }, - { url = "https://files.pythonhosted.org/packages/1a/c6/50e520283a9f19388a7326b05f9e8637e566003475eacaadad04f558c68d/greenlet-3.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ded7b068c7c31c1a8657d4fd42d886b3e051ae29f88b80c5ff9d502257b0f071", size = 1574097, upload-time = "2026-05-20T14:02:24.003Z" }, - { url = "https://files.pythonhosted.org/packages/21/1c/13abd1f4860d987fa5e1170a01930d6e6cd40d328de487a3c9fdaff0ffd0/greenlet-3.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0932b81d72f552ded9d810d00021b64d89f2195a91ce115b893f943b7a4ab3c", size = 1641058, upload-time = "2026-05-20T13:14:31.83Z" }, - { url = "https://files.pythonhosted.org/packages/f5/56/5f332b7705545eac2dc01b4e9254d24a793f2656d55d5cc6b94ee59d22ae/greenlet-3.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:88e300d136eac057b2397aa1cfd7328b4c87c7eb66a09c7bc6a1292234db474e", size = 238089, upload-time = "2026-05-20T13:14:03.229Z" }, - { url = "https://files.pythonhosted.org/packages/d9/a9/a3c2fa886c5b94863fb0e61b3bc14610b7aa94cf4f17f8741b11708305fc/greenlet-3.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:cc6ab7e555c8a112ad3a76e368e86e12a2754bcae1652a5602e133ec7b635523", size = 234989, upload-time = "2026-05-20T13:08:27.715Z" }, - { url = "https://files.pythonhosted.org/packages/c4/37/4549f149c9797c21b32c2683c33522af22522099de128b2406672526d005/greenlet-3.5.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:fa4f98af3a528f0c3fd592a26df7f376f93329c8f4d987f6bb979057af8bf5e2", size = 286220, upload-time = "2026-05-20T13:07:28.463Z" }, - { url = "https://files.pythonhosted.org/packages/38/ff/a4f436709716965eaab9f36ea7b906c8a927fbe32fb1372a2071d964f6b1/greenlet-3.5.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffea73584b216150eab159b6d12348fb253e68757974de1e2c40d8a318ac89ed", size = 601585, upload-time = "2026-05-20T14:00:06.141Z" }, - { url = "https://files.pythonhosted.org/packages/65/ad/54bc3fcee3ad368a61b19b67d88117f7a8c29727bf71fffdeda81fbd946e/greenlet-3.5.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1072b4f9edcc1e192d9283a66a3e68d6b84c561de33a83d7858beb9ba1effe10", size = 614215, upload-time = "2026-05-20T14:05:42.675Z" }, - { url = "https://files.pythonhosted.org/packages/40/69/b91cda0647df839483201545913514c2827ebea5e5ccdf931842763bc127/greenlet-3.5.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:add5217d68b31130f0beca584d7fef4878327d2e31642b66618a14eef312b63b", size = 611358, upload-time = "2026-05-20T13:14:26.37Z" }, - { url = "https://files.pythonhosted.org/packages/59/90/3cf77e080350cd02fa307bb2abf05df48f4482c240275bbd2c203ba8bb1c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5ea42a752d47a145eae922b605cd1634665ac3d5ec1e72402d5048e8d60d207", size = 1570475, upload-time = "2026-05-20T14:02:25.29Z" }, - { url = "https://files.pythonhosted.org/packages/65/2c/18cece62045e74598c3c393f70dce4a63f56222015ba29a5d4eeb04f764c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5551170cf4f5ff5623e9af81323751979fee2c731e2287b61f73cd27257b823", size = 1635625, upload-time = "2026-05-20T13:14:34.027Z" }, - { url = "https://files.pythonhosted.org/packages/30/f5/310d104ddf41eb5a70f4c268d22508dfb0c3c8e86fec152be34d0d2ed819/greenlet-3.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c8bb982ad117d29478ef8f5533e97df21f1e2befd17a299257b0c96d1371c0b", size = 238791, upload-time = "2026-05-20T13:10:39.018Z" }, - { url = "https://files.pythonhosted.org/packages/62/90/ceca11f504cd23a8047a3dea31919adc48df9b626dd0c13f0d858734fdfd/greenlet-3.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:80eb4b04dadc4e67df3fae179a32c4706a3f495bc7f22fc8a81115d5f5512188", size = 235580, upload-time = "2026-05-20T13:08:45.056Z" }, - { url = "https://files.pythonhosted.org/packages/27/69/7f7e5372d998b81001899b1c0823c957aa413ba0f2662e65821611cc31e4/greenlet-3.5.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:51518ff74664078fc51bffcc6fc529b0df5ae58da192691cee765d45ce944a2b", size = 285060, upload-time = "2026-05-20T13:08:51.899Z" }, - { url = "https://files.pythonhosted.org/packages/b1/bf/387f9b6b865fd2ae0d0be09e0004827295a01b71be76ed350dd1e28a91a4/greenlet-3.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ffdb3c0bb002c99cd8f298957e046c3dbf6006b5b7cdf11a4e19194624a0a0a", size = 604370, upload-time = "2026-05-20T14:00:07.492Z" }, - { url = "https://files.pythonhosted.org/packages/32/f5/169ce3d4e4c67291bd18f8cbe0299c9f3e45102c7f1fb3c14780c93e4532/greenlet-3.5.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7715a5a2c3378ba602c3a440558261e13a820bb53a82693aacd7b7f6d964e283", size = 616987, upload-time = "2026-05-20T14:05:44.237Z" }, - { url = "https://files.pythonhosted.org/packages/ee/e5/7f2e41d5273be07e77560d61ea4e56485b4d6c316d2a84518c62d1364061/greenlet-3.5.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc71ff466927a201b08305acac451ebe1aedfcea002f62f1f2f2ac2ac1e6a135", size = 613911, upload-time = "2026-05-20T13:14:27.539Z" }, - { url = "https://files.pythonhosted.org/packages/c5/a4/fbdc67579b73615a1f91615e814303cc71e06128f7baaba87be79b8fb90c/greenlet-3.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cd443683db272ebaaca03af98c0b063ab30db70ea8a31a1559f35e3f7b744ccd", size = 1570689, upload-time = "2026-05-20T14:02:27.225Z" }, - { url = "https://files.pythonhosted.org/packages/e6/b4/77abbe35078be39718a46cd49caf16bceb35662f97a34101dca28aa98e47/greenlet-3.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:089fff7a6ce8d9316d1f65ebc00273a56be258c1725b32b94de90a3a979557e1", size = 1635602, upload-time = "2026-05-20T13:14:36.344Z" }, - { url = "https://files.pythonhosted.org/packages/37/f7/129f27ca700845b8ee8ca88ce7f43435a1239c2eddb7677fc938822762cf/greenlet-3.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:110a1ca7b49b014b097f6078272c3f4ed31af45b254de5228b79adba879f6af9", size = 238683, upload-time = "2026-05-20T13:11:50.57Z" }, - { url = "https://files.pythonhosted.org/packages/6d/5c/a485a36e87df8d8fd0632ee01511244f5156a20ed3746cc6599340326395/greenlet-3.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:f16ba1efc0715b680a18b8123d90dad887c6112ae3555b4b5c32c149540c6b4e", size = 235499, upload-time = "2026-05-20T13:12:42.028Z" }, - { url = "https://files.pythonhosted.org/packages/8a/cb/c62454606daf5640369c94d8a9dd540599b1bfc090e2d2180cb77f4038d2/greenlet-3.5.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8ab31c9de8651a2facdd5c5bb0011f2380dd1a7af78ce2adf4b56095294fc07", size = 285579, upload-time = "2026-05-20T13:08:56.396Z" }, - { url = "https://files.pythonhosted.org/packages/ec/71/c4270398c2eba968a6071af1dfbdcaeee6ec1c24bc8b435b8cc452700da6/greenlet-3.5.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e300185139abc337ade480c327183adf42a875ac7181bfe66d7d4efea31fbea", size = 651106, upload-time = "2026-05-20T14:00:09.448Z" }, - { url = "https://files.pythonhosted.org/packages/1a/ab/71e34b78a44ec271fb5f550c17bc46d301ddc5953890d935f270b0dcdb5a/greenlet-3.5.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7ffdb990dcaa0234cf9845aead5df2e3c3a8b6507d409274dd87e0d5ab05ffc2", size = 663478, upload-time = "2026-05-20T14:05:45.88Z" }, - { url = "https://files.pythonhosted.org/packages/77/96/4efd6fa5c62c85426a0c19077a586258ebc3a2a146ff2493e4312a697a22/greenlet-3.5.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f82b3597e9d83b63408affed0b48fd0f54935edac4302237b9a837be0dae33c", size = 660800, upload-time = "2026-05-20T13:14:29.129Z" }, - { url = "https://files.pythonhosted.org/packages/7a/e0/6c71401a25cac7000261304e866a2f2cc04dc74810d40e2f118aa4799495/greenlet-3.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c0141e37414c10164e702b8fb1473304221ad98f71600850c6ef7ff4880feba0", size = 1617518, upload-time = "2026-05-20T14:02:28.662Z" }, - { url = "https://files.pythonhosted.org/packages/41/26/c5c06643e8c0af9e7bf18e16cb51d0ab7625155f0392e1c9015d66d556cd/greenlet-3.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:50ae25a67bea74ea41fb14b960bc532df73eb713417b2d61892dced82fe8d3bc", size = 1681593, upload-time = "2026-05-20T13:14:39.417Z" }, - { url = "https://files.pythonhosted.org/packages/8a/bd/e11a108317485075e68af9d23039619b86b28130c3b50d227d42edece64b/greenlet-3.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:8a17c42330e261299766b75ac1ea32caa437a9453c8f65d16a13140db378ecd3", size = 239800, upload-time = "2026-05-20T13:09:30.128Z" }, - { url = "https://files.pythonhosted.org/packages/47/f8/8e8e8417b7bf28639a5a56356ef934d0375e1d0c70a57e04d7701e870ffe/greenlet-3.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:7b5f5fae05b8ac6d176a61b60c394a8cbdc2b5b91b81793066e68745cf165e54", size = 236862, upload-time = "2026-05-20T13:09:10.498Z" }, - { url = "https://files.pythonhosted.org/packages/90/12/41bf27fde4d3605d3773ae57751eda182b8be2f5398011c041173b1d9534/greenlet-3.5.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:ea8da1e900d758d078810d4255d8c6aa572181896a31ec79d779eb79c3adc9ad", size = 293637, upload-time = "2026-05-20T13:12:35.529Z" }, - { url = "https://files.pythonhosted.org/packages/44/44/ba14b23e9757707050c2f397d305bbcae62e5d7cad122f8b6baec5ae4a1f/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a19570c52a21420dcbc94e661994bc325c0b5b11304540fed514586da5dc8f2e", size = 650840, upload-time = "2026-05-20T14:00:11.079Z" }, - { url = "https://files.pythonhosted.org/packages/a8/37/5ddc2b686a6844f91abecef43411842426da2e1573f60b49ecf2547f4ae1/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3d955c89b75eeca4723d7cc14135f393cd47c32e2a6cb4a8e4c6e760a26b0986", size = 656416, upload-time = "2026-05-20T14:05:47.118Z" }, - { url = "https://files.pythonhosted.org/packages/e1/f0/d17510297c35a2992712f0bf84de3779749999f7d3d63aa1f09db7c62dbe/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2daaaebd1a5aa88c49045b6baf9310b3263796bd88db713edf37cf53e7bb4e", size = 654397, upload-time = "2026-05-20T13:14:30.696Z" }, - { url = "https://files.pythonhosted.org/packages/37/eb/147387705bb89092645b012586e7273cb5ed3c90ef7eaf3a69173eaf0209/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bfbd69cc349e43bf3a8ae1c85548ff0718efc887615c2db16c3833d7b0b072d", size = 1614469, upload-time = "2026-05-20T14:02:30.192Z" }, - { url = "https://files.pythonhosted.org/packages/a6/4e/37ee0da7732b7aa9896f17e15579a9df34b9fcb9dd494f0adfa749af6623/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4378720dd888136c27215a0214d32a4d37c3852765d45bc37aad0623423cfd78", size = 1675115, upload-time = "2026-05-20T13:14:40.972Z" }, - { url = "https://files.pythonhosted.org/packages/57/f3/97dfcf4a6eb5077f8a672234216fb5923eb89f2cab7081cb10b2cf75b605/greenlet-3.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:45718441607f9325d948db98cbc691276059316d0358c188c246da4e1d4d23d2", size = 245246, upload-time = "2026-05-20T13:12:22.646Z" }, - { url = "https://files.pythonhosted.org/packages/5d/73/d7f72e34b582f694f4a9b248162db7b09cc458a259ba8f0c0bfa1a34ea7d/greenlet-3.5.1-cp315-cp315-macosx_11_0_universal2.whl", hash = "sha256:2baee5ca02031757ffe8cc3d69f0cc0aec7065ce362622da74f32d3bcab1c541", size = 285575, upload-time = "2026-05-20T13:12:07.043Z" }, - { url = "https://files.pythonhosted.org/packages/df/59/fa9c6e87dc8ad27a95dabe2f29f372b733d05a8a67470f6c901ed9975655/greenlet-3.5.1-cp315-cp315-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b1ec3274918a81d3ea778b9e75b56b72b33f300edb6cf7f3a7fe1dae56683de", size = 656428, upload-time = "2026-05-20T14:00:12.556Z" }, - { url = "https://files.pythonhosted.org/packages/f6/f9/e753408871eaa61dfe35e619cfc67512b036fde99893685d50eea9e07146/greenlet-3.5.1-cp315-cp315-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:111e2390ffffc47d5840b01711dd7fac07d4c09283d0283e7f3264b14e284c64", size = 667064, upload-time = "2026-05-20T14:05:48.662Z" }, - { url = "https://files.pythonhosted.org/packages/96/27/5565b5b40389f1c7753003a07e21892fda8660926787036d5bc0308b8113/greenlet-3.5.1-cp315-cp315-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e630136e905fe5ff43e86945ae41220b6d1470956a39220e708110ac48d01ea5", size = 665697, upload-time = "2026-05-20T13:14:32.943Z" }, - { url = "https://files.pythonhosted.org/packages/cf/82/e7de4178c0c2d1c9a5a3be3cc0b33e46a85b3ee4a77c071bf7ad8600e079/greenlet-3.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:975eac34b44a7077ca4d421348455b94f0f518246a7f14bc6d2fdcfe5b584368", size = 1621256, upload-time = "2026-05-20T14:02:31.91Z" }, - { url = "https://files.pythonhosted.org/packages/00/10/f2dddcf7dacac17dfc68691809589adad06135eb28930429cf58a6467a2f/greenlet-3.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:9ab3c3a0b2ae6198e67c898dad5215a49f9ae0d0081b3c3ec59f333e39eeca26", size = 1685956, upload-time = "2026-05-20T13:14:42.55Z" }, - { url = "https://files.pythonhosted.org/packages/22/17/4a232b32133230ada52f70e9d7f5b65b0caef8772f01849bd8d149e7e4ca/greenlet-3.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:cbfc69be86e10dcfef5b1e6269d1d6926552aa89ee39e1de3353360c1b6989ab", size = 239802, upload-time = "2026-05-20T13:13:15.481Z" }, - { url = "https://files.pythonhosted.org/packages/c2/ae/4e623a7e6d4d2a5f4cb8e4c82de4169fc637942caae68d6e676b8a128ac5/greenlet-3.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:92fd6d44ac5e5a887c8a5dc4a8ba0ba908527c31c12f78c6bc7dcfe8aab279f6", size = 236853, upload-time = "2026-05-20T13:15:37.301Z" }, - { url = "https://files.pythonhosted.org/packages/7a/57/816d9cff29119da3505b3d6a5e14a8af89006ac36f47f891ff293ee05af1/greenlet-3.5.1-cp315-cp315t-macosx_11_0_universal2.whl", hash = "sha256:a6fdf2433a5441ef9a95464f7c3e674775da1c8c1177fff311cee1acad4626ed", size = 293877, upload-time = "2026-05-20T13:10:19.078Z" }, - { url = "https://files.pythonhosted.org/packages/23/a1/59b0a7c7d140ff1a75626680b9a9899b79a9176cab298b394968fb023295/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7546556f0d649f99f6a361098a55f761181bb2ea12ff150bb16d26092ad88244", size = 655333, upload-time = "2026-05-20T14:00:14.758Z" }, - { url = "https://files.pythonhosted.org/packages/72/1b/5efe127597625042218939d01855109f352779050768b670b52edcc16a6c/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d5ee3ea898009fa898f85f9982255d35278c477bebe185beca249cab42d4526c", size = 659443, upload-time = "2026-05-20T14:05:50.159Z" }, - { url = "https://files.pythonhosted.org/packages/6c/6d/c404246ea4d22d097a7426d0efb5b781bd7eb67715f09e79001bd552ab18/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5c81f74d204d3edd136ebfd50dce53acbb776995d721a0fe801626cfc93b8cd", size = 658356, upload-time = "2026-05-20T13:14:35.091Z" }, - { url = "https://files.pythonhosted.org/packages/51/02/f8ee37fb6d2219329f350af241c27fcf12df57e723d11f6fc6d3bacdadaa/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:2c18ef16bf6d4dd410e4dd52996888ea1497be26892fe5bbc73580aba4287b8e", size = 1619216, upload-time = "2026-05-20T14:02:33.403Z" }, - { url = "https://files.pythonhosted.org/packages/93/c5/3dc9475ace2c7a3680da12372cddd7f1ac874eb410a1ac48d3e9dab83782/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:17d86354f0ae6b61bf9be5148d0dd34e06c3cb7c602c671f79f29ac3b150e659", size = 1678427, upload-time = "2026-05-20T13:14:43.71Z" }, - { url = "https://files.pythonhosted.org/packages/df/4e/750c15c317a41ffb36f0bf40b933e3d744a7dede61889f74443ea69690cf/greenlet-3.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:e7516cf6ae6b8a582c2770a0caed47b8a48373ed732c33d69a72913ae6ac923e", size = 245225, upload-time = "2026-05-20T13:13:59.366Z" }, - { url = "https://files.pythonhosted.org/packages/4f/fd/d3baea2eeb7b617efd47e87ca06e2ec2c6118d303aa9e918e0ce16eadc10/greenlet-3.5.1-cp315-cp315t-win_arm64.whl", hash = "sha256:5028648bf2253ec4745add746129d3904121fa7fe871a76bed23c5720573ce0a", size = 239590, upload-time = "2026-05-20T13:13:37.382Z" }, -] - [[package]] name = "grpcio" version = "1.80.0" @@ -2756,61 +2684,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] -[[package]] -name = "sqlalchemy" -version = "2.0.50" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/57/da/6fbf010c8ebb347679d0d100b22fe9ba5e13fd04046c5df7280d2f0bf706/sqlalchemy-2.0.50.tar.gz", hash = "sha256:af5607d11ef90fd6a5c0549fe0045dce1663d427426bcfb506dcb5346a85a3b9", size = 9907424, upload-time = "2026-05-24T19:20:04.018Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/70/a9/812a775bd8c1af0966d660238d005baf25e9bced1f038c8e71f00aa637a7/sqlalchemy-2.0.50-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af6eeb84985bf840ba779018ff9424d61ff69b52e66b8789d3c8da7bf5341b2", size = 2161617, upload-time = "2026-05-24T20:00:00.761Z" }, - { url = "https://files.pythonhosted.org/packages/d5/74/5a6bc5496e9be8f740fbf80f9e6bd4ab965c8a80870eb07ab015e360957a/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fe7822866f3a9fc5f3db21a290ce8961a53050115f05edf9402b6a5feb92a9f", size = 3244104, upload-time = "2026-05-24T20:07:38.158Z" }, - { url = "https://files.pythonhosted.org/packages/81/55/b260d8df2adc9bb0bf294f67b5f802ff0d84d99442b536b9efd0ea72d447/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e1b0f6a4dcd9b4839e2320afb5df37a6981cbc20ff9c423ae11c5537bdbd21", size = 3243039, upload-time = "2026-05-24T20:14:23.765Z" }, - { url = "https://files.pythonhosted.org/packages/e5/6d/58714005cbf370f16c3f30d30324a43be10069efcfe764f7236a2e851947/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e195687f1af431c9515416288373b323b6eb599f774409814e89e9d603a56e39", size = 3195017, upload-time = "2026-05-24T20:07:40.086Z" }, - { url = "https://files.pythonhosted.org/packages/30/e8/67527fee039bd3e1a6ce3f03d2b62fd87ab9099c17052810d79496727b66/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ea1a8a2db4b2217d456c8d7a873bfc605f06fe3584d315264ea18c2a17585d0b", size = 3215308, upload-time = "2026-05-24T20:14:26.034Z" }, - { url = "https://files.pythonhosted.org/packages/94/b2/dd3155a6a6706cb89adecf5ee6e0512f7b0ee5cf3e6f4cde67d3c20ebfda/sqlalchemy-2.0.50-cp310-cp310-win32.whl", hash = "sha256:68b154b08088b4ec32bb4d2958bfbb50e57549f91a4cd3e7f928e3553ed69031", size = 2121637, upload-time = "2026-05-24T20:08:06.401Z" }, - { url = "https://files.pythonhosted.org/packages/93/a1/a09c463ee3e7764b5ce5bd19a7f0b6eefbde62e637439ab58498cdbd6b47/sqlalchemy-2.0.50-cp310-cp310-win_amd64.whl", hash = "sha256:66e374271ecb7101273f57af1a62446a953d327eec4f8089147de57c591bbacc", size = 2144673, upload-time = "2026-05-24T20:08:07.936Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5d/3172686af1770e4de2805f919a51441085f589ddadf3dd76ec582f84f497/sqlalchemy-2.0.50-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1aa6e403663a9c43c8fef7ce4bdb4cf48bcd8d352e91deda2a99f963270bd508", size = 2161366, upload-time = "2026-05-24T20:00:02.061Z" }, - { url = "https://files.pythonhosted.org/packages/0f/90/e98dedea3c3e663a17afcd003a34ba45efdac2cea3b6f2e4585e2b1e2537/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51b637a84f9fa35ae1f9017e786cb142974a25305085e1b378b3647a67f65ad3", size = 3318926, upload-time = "2026-05-24T20:07:42.369Z" }, - { url = "https://files.pythonhosted.org/packages/3b/4f/501308c2babb62c11753ecb4ee88ba9eef019419a4d6cbf7cb13e2bad353/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2dab927761d9108550f0cf8e66ff21af56f907a0ce0a689793db615e2b55f62c", size = 3319199, upload-time = "2026-05-24T20:14:28.551Z" }, - { url = "https://files.pythonhosted.org/packages/ac/39/d88996c5e03ed6248c3a788d20f0b8d8b376b9f8a495e4bab9df7c72d2f8/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:545eae198d37bcf837a10ede3684e2af32458d6f35c597c35c2de7502dc38fc4", size = 3270301, upload-time = "2026-05-24T20:07:44.917Z" }, - { url = "https://files.pythonhosted.org/packages/42/1b/1ae0e65161b51cc43e5ca75430ef79d80e23b5042d645586c2c342c3b92e/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fec460e18cdbb4c7773531122ce9a27e96c6ca17af3933941d94da475ad2c86", size = 3293465, upload-time = "2026-05-24T20:14:30.501Z" }, - { url = "https://files.pythonhosted.org/packages/83/29/17c0003f2c0dfa6d1b97672475707e3ec5980db09defd7fa20beb6833bbd/sqlalchemy-2.0.50-cp311-cp311-win32.whl", hash = "sha256:e6e814658818fd165e749e3d8490ef16cc7f379a118c37ada8b0589ffbaaac22", size = 2120694, upload-time = "2026-05-24T20:08:09.237Z" }, - { url = "https://files.pythonhosted.org/packages/c9/18/280d00654cc19d1fccf236fa5070f6dd04b84dde6f1b2e637bde0ff340a7/sqlalchemy-2.0.50-cp311-cp311-win_amd64.whl", hash = "sha256:1c5f858fe79c9f5d8fda065c06186356acb7f8df3cd52dbd5ee3f200e4b144f5", size = 2145315, upload-time = "2026-05-24T20:08:10.952Z" }, - { url = "https://files.pythonhosted.org/packages/be/b0/a9d19b43f38f878b1278bca5b00b909f7540d41494396dd2561f9ad0956d/sqlalchemy-2.0.50-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae23d8b9d344d30d0a92f06d45825024a5790f1c1dd4cf452636a50d3e58cb", size = 2159807, upload-time = "2026-05-24T19:27:53.086Z" }, - { url = "https://files.pythonhosted.org/packages/f5/2c/191dd58a248fd2cfd4780fa82c375c505e4ad98c8b522fa69ec492130d77/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47b71b933e7b4ebad407c8fdfd70d2c4f08b78b3238bb30eebdd6eb32ca51b89", size = 3343358, upload-time = "2026-05-24T20:09:29.279Z" }, - { url = "https://files.pythonhosted.org/packages/8a/2b/514fce8a7df81cf5bad7ff7865de7ac0c5776a38cc043475c4703eb7fe8b/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:110fdac56ace278949f00de805edacbd6141e382d992f9ba28238b3a0827a600", size = 3357994, upload-time = "2026-05-24T20:17:13.495Z" }, - { url = "https://files.pythonhosted.org/packages/35/a6/a0e283f5494f92b0d77e319ff77e437b1ffe4a051ba67c81d53234825475/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5e4ac70e9e757f6b3e87c0491ff034442ecd8dfd36d041a50564c322dafc0e", size = 3289399, upload-time = "2026-05-24T20:09:32.239Z" }, - { url = "https://files.pythonhosted.org/packages/b7/96/1b07325ba71752d6a028b77d07bed1483ad545f794e8b1dc89b3ba3b3c68/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:724f3dcbe53dd0151e3cb5e7ec4ba4c620bede579caacd16275dc35ce06e8615", size = 3321216, upload-time = "2026-05-24T20:17:15.581Z" }, - { url = "https://files.pythonhosted.org/packages/ed/8e/bad6ed253e8a99edfc99af02f7173ec48a1d3ed1b9b35a1b8bc1700900cc/sqlalchemy-2.0.50-cp312-cp312-win32.whl", hash = "sha256:1208050441471d003b7c8cb4054fb084f185cf35ac3f0ea270803865bca9939a", size = 2119194, upload-time = "2026-05-24T19:50:04.943Z" }, - { url = "https://files.pythonhosted.org/packages/b6/2d/314a6690dda4b9cfc571eab1a63cf6fe6e1470aa3759ccda6aa016ee0f5a/sqlalchemy-2.0.50-cp312-cp312-win_amd64.whl", hash = "sha256:9d1af51558029a156a70986b7df88f042b3d158d7c8d8fb5072912d4b32d89c7", size = 2146186, upload-time = "2026-05-24T19:50:06.74Z" }, - { url = "https://files.pythonhosted.org/packages/0b/c4/c42356b527296e9862f67990efce31ef78b4cf69cd3f80873a528a060320/sqlalchemy-2.0.50-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:06a9210bdc5f4298cff0781087e2ff45683922252dacc452846373a58761f093", size = 2156697, upload-time = "2026-05-24T19:27:54.764Z" }, - { url = "https://files.pythonhosted.org/packages/60/a1/b1a70e3c4365ac7fe9e347f3710f19b562c866fb96d45e3c891588789a7b/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b53784972ade4f8174b9aa661f31a06f8a936d2cfdd602913ff3c6dd40ae873", size = 3284260, upload-time = "2026-05-24T20:09:34.195Z" }, - { url = "https://files.pythonhosted.org/packages/3f/4a/f3ac3caa19f263d57b0a47f8c91bbf56583dc2d3fc63acfbf644abb24fe0/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31648fa14460537e768a7303b078e4344d208e0d23e06867c1f376a227ed82db", size = 3302280, upload-time = "2026-05-24T20:17:17.825Z" }, - { url = "https://files.pythonhosted.org/packages/66/55/ccada3e3d62254587819749a0bc69f41173eb48a6e385d10e66d32a9c88e/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:03f4323c980ad0e918cc9e5369b015f759f4e534db5bbaf4dc36832c10d05064", size = 3231580, upload-time = "2026-05-24T20:09:36.406Z" }, - { url = "https://files.pythonhosted.org/packages/05/f6/6809349130a2de0e109e7f00fd7d431da9565b9b2868b32ee684754f672b/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2b9dcc43afef8ac157cd92fce96985d6b8b0cfbd3df4d666f66b4d55a75d202f", size = 3269375, upload-time = "2026-05-24T20:17:20.34Z" }, - { url = "https://files.pythonhosted.org/packages/48/84/278a811ef4e07be9c89dc5cdd7be833268509a66a68c4897cf585e67428f/sqlalchemy-2.0.50-cp313-cp313-win32.whl", hash = "sha256:60922d6599065ddca2c6f376b9aa2f41a6b85a271725e0909490bbc50b1998a5", size = 2117229, upload-time = "2026-05-24T19:50:08.215Z" }, - { url = "https://files.pythonhosted.org/packages/f6/1c/067cc6187ed32d2ec222fe6d2643acc1659a6d0659f8a7cbc5ad3ae83280/sqlalchemy-2.0.50-cp313-cp313-win_amd64.whl", hash = "sha256:287086e67275a212c4582d166a6fb03a65ccc5551d80866270ce0dd9f34eccd3", size = 2143126, upload-time = "2026-05-24T19:50:09.691Z" }, - { url = "https://files.pythonhosted.org/packages/df/32/10ac51b4be7cdecd7e93d069251c86dfbf70b7adbd7c67b48ccea6c49e1c/sqlalchemy-2.0.50-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c966932507a4d7d0a37314927dbfcd89720e3f37d2a1e3352e7ae7939fa8e8a0", size = 2158519, upload-time = "2026-05-24T19:27:56.472Z" }, - { url = "https://files.pythonhosted.org/packages/5a/76/e703d2f7681d7d66c4c891af3f07c7ccf4c76ad7f18351de035b5eda007a/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:faffef4bcc20a1892e65e155293d99d60855bbbc79250ab712819cfd56a8e6bb", size = 3282063, upload-time = "2026-05-24T20:09:38.57Z" }, - { url = "https://files.pythonhosted.org/packages/31/26/ef168b184a25701f9995e8fb7e503fafd7a99c1c77cda1bc1a26ea2ed486/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c206aec519a2e7bd08abbfb33436e325fd22c632d9c21a9047e376ce241646e", size = 3287069, upload-time = "2026-05-24T20:17:21.942Z" }, - { url = "https://files.pythonhosted.org/packages/c2/15/765acc2bc693bccc43ca4a95d5b69750da8aaf6db1b5c616536e087f8920/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bef4ac756363227ef6402a75fee025a4bc690f92328e825868939b3b3a446a6d", size = 3230453, upload-time = "2026-05-24T20:09:40.398Z" }, - { url = "https://files.pythonhosted.org/packages/63/61/08e03c3adbf5db0087a0b6816746fec8f3032fb2f7fc899a9bb9b2a48ce4/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:96fbee6b19c19cd1556c8bf9419447cf2ec149ffcab7ab64348c23e54ef8547f", size = 3252413, upload-time = "2026-05-24T20:17:24.067Z" }, - { url = "https://files.pythonhosted.org/packages/03/0c/370a1f2db38436c615e10134c8a37de3688e74084792380695f3f5083860/sqlalchemy-2.0.50-cp314-cp314-win32.whl", hash = "sha256:8f00e3eb43ba30eb1b238ee03a8a62309486d1321eda3328bb611e0340033ad8", size = 2120063, upload-time = "2026-05-24T19:50:11.08Z" }, - { url = "https://files.pythonhosted.org/packages/7f/a0/fe92bb9817863bc13ba093bda931979a26cc2ca69f8e8f26d07add3d7c6f/sqlalchemy-2.0.50-cp314-cp314-win_amd64.whl", hash = "sha256:15708c613cd5005b7dffe1f66ee6a63ee8f5e46799f71c70ebad74178c676a39", size = 2145830, upload-time = "2026-05-24T19:50:12.452Z" }, - { url = "https://files.pythonhosted.org/packages/cc/ff/e5640a98a0b2f491eb8fde10fb6c773621a2e44340de231fafcc9370f4a9/sqlalchemy-2.0.50-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3699dac4be410e97049a1658e9480da9cde956594aa0f3aebc60b88f21c5ba70", size = 2178435, upload-time = "2026-05-24T19:42:58.889Z" }, - { url = "https://files.pythonhosted.org/packages/b7/85/337116e186f1236375b5fb70c21cfac98e8e8ab0d3a47be838dc47a59e08/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f96233858e3df43932ac11589e22520da6e8aeb624b03fedfeebb0e8ea213086", size = 3566059, upload-time = "2026-05-24T20:01:20.848Z" }, - { url = "https://files.pythonhosted.org/packages/96/34/bb0e190e161c3c2c24314a65add57218be14a4a9486886b7f5047c1ff7c8/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4e70c46fad30c3bcc6a4708bc0130a3173e11a5b25f0ea4a9d8911b450f1f52", size = 3535366, upload-time = "2026-05-24T20:03:56.768Z" }, - { url = "https://files.pythonhosted.org/packages/df/5a/a7f759f97e4fd499c5d4e4488c760d5a7fbecf3028b465a04274fcd52384/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1918a3cf564d16d95bca7301005f41ab2ad50b07cd3b9da50d3ed986db148d6a", size = 3474879, upload-time = "2026-05-24T20:01:23.058Z" }, - { url = "https://files.pythonhosted.org/packages/9d/d9/2907ea38eb60687d297bf9c39e5ee58053c87b57fe8a9cae97090cecbf10/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b00098cdbdbd38c7be3d568b0c9c3122b8c0ec62b911b57cd5e6e0254d60a76d", size = 3486117, upload-time = "2026-05-24T20:03:59.052Z" }, - { url = "https://files.pythonhosted.org/packages/f2/e3/5aa06f167559f8c0bdae487e297d23ba548150ab016a3418265d617a4985/sqlalchemy-2.0.50-cp314-cp314t-win32.whl", hash = "sha256:1fbd55a969d7ac44a98e3dec75016074f809fa08f871585ace58dde110d1bf3e", size = 2150823, upload-time = "2026-05-24T20:08:58.644Z" }, - { url = "https://files.pythonhosted.org/packages/65/9b/112fb8f977582d7489d036e409e3723948bcf5320b3ac465f3c481bbe8f9/sqlalchemy-2.0.50-cp314-cp314t-win_amd64.whl", hash = "sha256:c5c3cdb753a9004183e1ccb634b41611654c989e61bc68617ce878e46d6f1e51", size = 2185794, upload-time = "2026-05-24T20:09:00.319Z" }, - { url = "https://files.pythonhosted.org/packages/d0/10/f7220e9b784d295d241c86ed99aeb537f92afcd469a64861f2717e9bb077/sqlalchemy-2.0.50-py3-none-any.whl", hash = "sha256:92064363517a3ff8212b5a93b8c62876579d8dfd1ca5b561335f30152d884fa9", size = 1943861, upload-time = "2026-05-24T19:59:01.119Z" }, -] - [[package]] name = "starlette" version = "1.1.0"