Fix centered content layout widths

This commit is contained in:
ash66
2026-05-26 12:34:12 +08:00
parent 34d72d7ce9
commit fec22a3a2c
16 changed files with 2102 additions and 140 deletions

View File

@@ -7,16 +7,22 @@ import tempfile
import uuid import uuid
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from datetime import UTC, datetime
from loguru import logger from loguru import logger
from app.config.settings import settings
from app.domain.documents import ( from app.domain.documents import (
ChunkBuilder, ChunkBuilder,
Document, Document,
DocumentArtifact,
DocumentBinaryStore, DocumentBinaryStore,
DocumentParser, DocumentParser,
DocumentProcessingRun,
DocumentProcessingStore,
DocumentRepository, DocumentRepository,
DocumentStatus, DocumentStatus,
DocumentStatusEvent,
ParseArtifactStore, ParseArtifactStore,
ParsedDocument, ParsedDocument,
) )
@@ -39,6 +45,7 @@ class DocumentProcessResult:
class DocumentCommandService: class DocumentCommandService:
"""Provide the Document Command Service service.""" """Provide the Document Command Service service."""
def __init__( def __init__(
self, self,
*, *,
@@ -49,6 +56,7 @@ class DocumentCommandService:
embedding_provider: EmbeddingProvider, embedding_provider: EmbeddingProvider,
vector_index: VectorIndex, vector_index: VectorIndex,
parse_artifact_store: ParseArtifactStore | None = None, parse_artifact_store: ParseArtifactStore | None = None,
document_processing_store: DocumentProcessingStore | None = None,
) -> None: ) -> None:
"""Initialize the Document Command Service instance.""" """Initialize the Document Command Service instance."""
self.document_repository = document_repository self.document_repository = document_repository
@@ -58,6 +66,11 @@ class DocumentCommandService:
self.embedding_provider = embedding_provider self.embedding_provider = embedding_provider
self.vector_index = vector_index self.vector_index = vector_index
self.parse_artifact_store = parse_artifact_store self.parse_artifact_store = parse_artifact_store
self.document_processing_store = document_processing_store
def _utcnow(self) -> datetime:
"""Return the current UTC timestamp for persisted processing metadata."""
return datetime.now(UTC)
def _save_parse_artifacts(self, *, doc_id: str, parsed_document: ParsedDocument) -> dict[str, str]: def _save_parse_artifacts(self, *, doc_id: str, parsed_document: ParsedDocument) -> dict[str, str]:
"""Persist parse artifacts so troubleshooting does not depend on provider retention windows.""" """Persist parse artifacts so troubleshooting does not depend on provider retention windows."""
@@ -80,6 +93,143 @@ class DocumentCommandService:
artifact_keys[name] = object_name artifact_keys[name] = object_name
return artifact_keys return artifact_keys
def _safe_create_processing_run(self, *, doc_id: str, trigger_type: str, generate_summary: bool) -> str | None:
"""Create a processing run record when the optional store is available."""
if not self.document_processing_store:
return None
run = DocumentProcessingRun(
run_id=str(uuid.uuid4()),
doc_id=doc_id,
trigger_type=trigger_type,
run_status="running",
parser_backend=settings.parser_backend,
chunk_backend=settings.chunk_backend,
embedding_model=settings.embedding_model,
metadata={"generate_summary": generate_summary},
)
try:
created = self.document_processing_store.create_run(run)
return created.run_id
except Exception:
logger.warning("DocumentProcessingStore.create_run failed for doc_id={}", doc_id)
return None
def _safe_append_status_event(
self,
*,
doc_id: str,
run_id: str | None,
from_status: str,
to_status: str,
stage: str,
message: str = "",
metadata: dict | None = None,
) -> None:
"""Append a status event without allowing auxiliary persistence failures to abort processing."""
if not self.document_processing_store or not run_id:
return
event = DocumentStatusEvent(
event_id=str(uuid.uuid4()),
doc_id=doc_id,
run_id=run_id,
from_status=from_status,
to_status=to_status,
stage=stage,
message=message,
metadata=metadata or {},
)
try:
self.document_processing_store.append_status_event(event)
except Exception:
logger.warning(
"DocumentProcessingStore.append_status_event failed for doc_id={}, run_id={}",
doc_id,
run_id,
)
def _safe_mark_run_stored(self, *, doc_id: str, run_id: str | None) -> None:
"""Mark the processing run as stored without affecting the main workflow."""
if not self.document_processing_store or not run_id:
return
try:
self.document_processing_store.mark_run_stored(run_id, stored_at=self._utcnow())
except Exception:
logger.warning("DocumentProcessingStore.mark_run_stored failed for doc_id={}, run_id={}", doc_id, run_id)
def _safe_mark_run_parsed(self, *, doc_id: str, run_id: str | None, parsed_document: ParsedDocument) -> None:
"""Persist parse completion details without failing the document pipeline."""
if not self.document_processing_store or not run_id:
return
try:
self.document_processing_store.mark_run_parsed(
run_id,
parser_backend=parsed_document.parser_name,
layout_count=int(parsed_document.metadata.get("layout_count", len(parsed_document.raw_layouts)) or 0),
structure_node_count=len(parsed_document.structure_nodes),
semantic_block_count=len(parsed_document.semantic_blocks),
vector_chunk_count=len(parsed_document.vector_chunks),
parsed_at=self._utcnow(),
metadata={"parse_task_id": parsed_document.metadata.get("task_id", "")},
)
except Exception:
logger.warning("DocumentProcessingStore.mark_run_parsed failed for doc_id={}, run_id={}", doc_id, run_id)
def _safe_replace_processing_artifacts(self, *, doc_id: str, run_id: str | None, artifact_keys: dict[str, str]) -> None:
"""Store artifact references without turning persistence drift into a user-visible failure."""
if not self.document_processing_store or not run_id:
return
artifacts = [
DocumentArtifact(
artifact_id=str(uuid.uuid4()),
doc_id=doc_id,
run_id=run_id,
artifact_type=artifact_type,
object_name=object_name,
content_type="application/json",
byte_size=0,
checksum="",
)
for artifact_type, object_name in artifact_keys.items()
]
try:
self.document_processing_store.replace_artifacts_for_run(run_id, artifacts)
except Exception:
logger.warning(
"DocumentProcessingStore.replace_artifacts_for_run failed for doc_id={}, run_id={}",
doc_id,
run_id,
)
def _safe_mark_run_indexed(self, *, doc_id: str, run_id: str | None, chunk_count: int, index_name: str) -> None:
"""Mark the processing run as indexed without affecting the success path."""
if not self.document_processing_store or not run_id:
return
now = self._utcnow()
try:
self.document_processing_store.mark_run_indexed(
run_id,
chunk_count=chunk_count,
index_name=index_name,
indexed_at=now,
finished_at=now,
)
except Exception:
logger.warning("DocumentProcessingStore.mark_run_indexed failed for doc_id={}, run_id={}", doc_id, run_id)
def _safe_mark_run_failed(self, *, doc_id: str, run_id: str | None, failure_stage: str, error_message: str) -> None:
"""Mark the processing run as failed without masking the original error handling path."""
if not self.document_processing_store or not run_id:
return
try:
self.document_processing_store.mark_run_failed(
run_id,
failure_stage=failure_stage,
error_message=error_message,
finished_at=self._utcnow(),
)
except Exception:
logger.warning("DocumentProcessingStore.mark_run_failed failed for doc_id={}, run_id={}", doc_id, run_id)
def upload_and_process( def upload_and_process(
self, self,
*, *,
@@ -91,11 +241,15 @@ class DocumentCommandService:
regulation_type: str, regulation_type: str,
version: str, version: str,
generate_summary: bool, generate_summary: bool,
trigger_type: str = "upload",
) -> DocumentProcessResult: ) -> DocumentProcessResult:
"""Handle upload and process for the Document Command Service instance.""" """Handle upload and process for the Document Command Service instance."""
doc_id = doc_id or str(uuid.uuid4())[:8] doc_id = doc_id or str(uuid.uuid4())[:8]
final_doc_name = doc_name or file_name final_doc_name = doc_name or file_name
object_name = f"{doc_id}/{file_name}" object_name = f"{doc_id}/{file_name}"
run_id: str | None = None
current_status = DocumentStatus.PENDING
current_stage = "store"
document = Document( document = Document(
doc_id=doc_id, doc_id=doc_id,
@@ -109,6 +263,19 @@ class DocumentCommandService:
metadata={"generate_summary": generate_summary}, metadata={"generate_summary": generate_summary},
) )
self.document_repository.create(document) self.document_repository.create(document)
run_id = self._safe_create_processing_run(
doc_id=doc_id,
trigger_type=trigger_type,
generate_summary=generate_summary,
)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status="",
to_status=DocumentStatus.PENDING.value,
stage="document_created",
message="Document record created",
)
temp_path = "" temp_path = ""
try: try:
@@ -119,6 +286,17 @@ class DocumentCommandService:
metadata={"doc_id": doc_id}, metadata={"doc_id": doc_id},
) )
self.document_repository.update_status(doc_id, DocumentStatus.STORED) self.document_repository.update_status(doc_id, DocumentStatus.STORED)
current_status = DocumentStatus.STORED
current_stage = "parse"
self._safe_mark_run_stored(doc_id=doc_id, run_id=run_id)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=DocumentStatus.PENDING.value,
to_status=DocumentStatus.STORED.value,
stage="store",
message="Source file stored",
)
suffix = os.path.splitext(file_name)[1] suffix = os.path.splitext(file_name)[1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
@@ -130,7 +308,13 @@ class DocumentCommandService:
doc_id=doc_id, doc_id=doc_id,
doc_name=final_doc_name, doc_name=final_doc_name,
) )
self._safe_mark_run_parsed(doc_id=doc_id, run_id=run_id, parsed_document=parsed_document)
artifact_keys: dict[str, str] = {}
try:
artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document) artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document)
except Exception:
logger.warning("Parse artifact binary persistence failed for doc_id={}", doc_id)
self.document_repository.update_status( self.document_repository.update_status(
doc_id, doc_id,
DocumentStatus.PARSED, DocumentStatus.PARSED,
@@ -146,6 +330,18 @@ class DocumentCommandService:
"processing_stage": "parsed", "processing_stage": "parsed",
}, },
) )
current_status = DocumentStatus.PARSED
current_stage = "embed"
self._safe_replace_processing_artifacts(doc_id=doc_id, run_id=run_id, artifact_keys=artifact_keys)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=DocumentStatus.STORED.value,
to_status=DocumentStatus.PARSED.value,
stage="parse",
message="Document parsed",
metadata={"artifact_count": len(artifact_keys)},
)
if self.parse_artifact_store: if self.parse_artifact_store:
try: try:
self.parse_artifact_store.save( self.parse_artifact_store.save(
@@ -165,6 +361,7 @@ class DocumentCommandService:
raise ValueError("解析完成但没有生成可入库的 chunks") raise ValueError("解析完成但没有生成可入库的 chunks")
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks]) vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
current_stage = "index"
inserted = self.vector_index.upsert(chunks, vectors) inserted = self.vector_index.upsert(chunks, vectors)
if inserted != len(chunks): if inserted != len(chunks):
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks)) logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
@@ -182,6 +379,23 @@ class DocumentCommandService:
"processing_stage": "indexed", "processing_stage": "indexed",
}, },
) )
current_status = DocumentStatus.INDEXED
index_name = health.get("collection_name", "")
self._safe_mark_run_indexed(
doc_id=doc_id,
run_id=run_id,
chunk_count=len(chunks),
index_name=index_name,
)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=DocumentStatus.PARSED.value,
to_status=DocumentStatus.INDEXED.value,
stage="index",
message="Document indexed",
metadata={"chunk_count": len(chunks), "index_name": index_name},
)
stored = self.document_repository.get(doc_id) stored = self.document_repository.get(doc_id)
return DocumentProcessResult( return DocumentProcessResult(
doc_id=doc_id, doc_id=doc_id,
@@ -194,6 +408,7 @@ class DocumentCommandService:
) )
except Exception as exc: except Exception as exc:
logger.exception("文档处理失败: doc_id={}", doc_id) logger.exception("文档处理失败: doc_id={}", doc_id)
failure_stage = current_stage
self.document_repository.update_status( self.document_repository.update_status(
doc_id, doc_id,
DocumentStatus.FAILED, DocumentStatus.FAILED,
@@ -201,8 +416,23 @@ class DocumentCommandService:
metadata={ metadata={
"failure_reason": str(exc), "failure_reason": str(exc),
"processing_stage": "failed", "processing_stage": "failed",
"failure_stage": failure_stage,
}, },
) )
self._safe_mark_run_failed(
doc_id=doc_id,
run_id=run_id,
failure_stage=failure_stage,
error_message=str(exc),
)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=current_status.value,
to_status=DocumentStatus.FAILED.value,
stage=failure_stage,
message=str(exc),
)
return DocumentProcessResult( return DocumentProcessResult(
doc_id=doc_id, doc_id=doc_id,
doc_name=final_doc_name, doc_name=final_doc_name,
@@ -235,6 +465,11 @@ class DocumentCommandService:
self.parse_artifact_store.delete(doc_id) self.parse_artifact_store.delete(doc_id)
except Exception: except Exception:
logger.warning("ParseArtifactStore delete failed for doc_id={}", doc_id) logger.warning("ParseArtifactStore delete failed for doc_id={}", doc_id)
if self.document_processing_store:
try:
self.document_processing_store.delete_by_document(doc_id)
except Exception:
logger.warning("DocumentProcessingStore delete failed for doc_id={}", doc_id)
self.document_repository.delete(doc_id) self.document_repository.delete(doc_id)
return True return True
@@ -253,6 +488,7 @@ class DocumentCommandService:
regulation_type=document.regulation_type, regulation_type=document.regulation_type,
version=document.version, version=document.version,
generate_summary=bool(document.metadata.get("generate_summary", False)), generate_summary=bool(document.metadata.get("generate_summary", False)),
trigger_type="retry",
) )

View File

@@ -78,6 +78,7 @@ class Settings(BaseSettings):
chunk_overlap: int = Field(default=50, description="分块重叠大小") chunk_overlap: int = Field(default=50, description="分块重叠大小")
max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)") max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)")
document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径") document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径")
document_processing_metadata_path: str = Field(default="backend/data/document_processing.json", description="文档处理历史存储路径")
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)") parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)") chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)") document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")

View File

@@ -1,18 +1,29 @@
"""Initialize the app.domain.documents package.""" """Initialize the app.domain.documents package."""
from .models import Chunk, Document, DocumentStatus, ParsedDocument from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository, ParseArtifactStore from .ports import (
ChunkBuilder,
DocumentBinaryStore,
DocumentParser,
DocumentProcessingStore,
DocumentRepository,
ParseArtifactStore,
)
# Keep package boundaries explicit so backend imports stay predictable. # Keep package boundaries explicit so backend imports stay predictable.
__all__ = [ __all__ = [
"Chunk", "Chunk",
"Document", "Document",
"DocumentArtifact",
"DocumentProcessingRun",
"DocumentStatus", "DocumentStatus",
"DocumentStatusEvent",
"ParsedDocument", "ParsedDocument",
"ChunkBuilder", "ChunkBuilder",
"DocumentBinaryStore", "DocumentBinaryStore",
"DocumentParser", "DocumentParser",
"DocumentProcessingStore",
"DocumentRepository", "DocumentRepository",
"ParseArtifactStore", "ParseArtifactStore",
] ]

View File

@@ -76,3 +76,61 @@ class Chunk:
semantic_id: str = "" semantic_id: str = ""
block_type: str = "" block_type: str = ""
metadata: dict[str, Any] = field(default_factory=dict) metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class DocumentProcessingRun:
"""Represent one processing attempt for a document."""
run_id: str
doc_id: str
trigger_type: str
run_status: str
parser_backend: str = ""
chunk_backend: str = ""
embedding_model: str = ""
index_name: str = ""
started_at: datetime = field(default_factory=utcnow)
stored_at: datetime | None = None
parsed_at: datetime | None = None
indexed_at: datetime | None = None
finished_at: datetime | None = None
layout_count: int = 0
structure_node_count: int = 0
semantic_block_count: int = 0
vector_chunk_count: int = 0
chunk_count: int = 0
failure_stage: str = ""
error_message: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class DocumentStatusEvent:
"""Represent a document lifecycle event emitted during processing."""
event_id: str
doc_id: str
run_id: str
from_status: str
to_status: str
stage: str
message: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
occurred_at: datetime = field(default_factory=utcnow)
@dataclass
class DocumentArtifact:
"""Represent a persisted artifact reference for one processing run."""
artifact_id: str
doc_id: str
run_id: str
artifact_type: str
object_name: str
content_type: str
byte_size: int = 0
checksum: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
created_at: datetime = field(default_factory=utcnow)

View File

@@ -4,7 +4,7 @@ from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from .models import Chunk, Document, DocumentStatus, ParsedDocument from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
# Keep domain contracts explicit so adapters can swap implementations cleanly. # Keep domain contracts explicit so adapters can swap implementations cleanly.
@@ -128,3 +128,111 @@ class ParseArtifactStore(ABC):
def get_structure_nodes(self, doc_id: str) -> list[dict]: def get_structure_nodes(self, doc_id: str) -> list[dict]:
"""Return all structure nodes for a document.""" """Return all structure nodes for a document."""
pass pass
class DocumentProcessingStore(ABC):
"""Persist document processing runs, events, and artifact references."""
@abstractmethod
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
"""Create a new processing run record."""
pass
@abstractmethod
def mark_run_stored(
self,
run_id: str,
*,
stored_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as having persisted the source file."""
pass
@abstractmethod
def mark_run_parsed(
self,
run_id: str,
*,
parser_backend: str,
layout_count: int,
structure_node_count: int,
semantic_block_count: int,
vector_chunk_count: int,
parsed_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record parse completion details for a run."""
pass
@abstractmethod
def mark_run_indexed(
self,
run_id: str,
*,
chunk_count: int,
index_name: str,
indexed_at: object | None = None,
finished_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as successfully indexed."""
pass
@abstractmethod
def mark_run_failed(
self,
run_id: str,
*,
failure_stage: str,
error_message: str,
finished_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as failed."""
pass
@abstractmethod
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
"""Append a document status event."""
pass
@abstractmethod
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
"""Replace all artifacts for a run with the provided list."""
pass
@abstractmethod
def delete_by_document(self, doc_id: str) -> None:
"""Delete all processing data for a document."""
pass
@abstractmethod
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
"""List all processing runs for a document."""
pass
@abstractmethod
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
"""Return one processing run by identifier."""
pass
@abstractmethod
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
"""List status events for a document."""
pass
@abstractmethod
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
"""List status events for a run."""
pass
@abstractmethod
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
"""List artifact references for a document."""
pass
@abstractmethod
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
"""List artifact references for a run."""
pass

View File

@@ -0,0 +1,373 @@
"""Implement infrastructure support for json document processing history."""
from __future__ import annotations
import json
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent
# Keep JSON persistence behavior aligned with the lightweight document repository adapter.
class JsonDocumentProcessingStore(DocumentProcessingStore):
"""Persist processing history in a standalone JSON file."""
def __init__(self, file_path: str) -> None:
"""Initialize the JSON processing history store."""
self.file_path = Path(file_path)
self.file_path.parent.mkdir(parents=True, exist_ok=True)
if not self.file_path.exists():
self._save(self._empty_payload())
def _empty_payload(self) -> dict[str, dict[str, dict[str, Any]]]:
"""Return the canonical empty JSON structure for processing history."""
return {"runs": {}, "status_events": {}, "artifacts": {}}
def _load(self) -> dict[str, dict[str, dict[str, Any]]]:
"""Load the full JSON payload and normalize missing sections."""
if not self.file_path.exists():
return self._empty_payload()
payload = json.loads(self.file_path.read_text(encoding="utf-8") or "{}")
normalized = self._empty_payload()
for key in normalized:
section = payload.get(key, {})
normalized[key] = section if isinstance(section, dict) else {}
return normalized
def _save(self, payload: dict[str, dict[str, dict[str, Any]]]) -> None:
"""Persist the full JSON payload with stable formatting."""
self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def _serialize_datetime(self, value: datetime | None) -> str | None:
"""Serialize optional datetimes into ISO8601 strings."""
return value.isoformat() if value is not None else None
def _deserialize_datetime(self, value: str | None) -> datetime | None:
"""Deserialize optional ISO8601 strings into datetimes."""
return datetime.fromisoformat(value) if value else None
def _serialize_run(self, run: DocumentProcessingRun) -> dict[str, Any]:
"""Serialize one processing run to a JSON-compatible payload."""
return {
"run_id": run.run_id,
"doc_id": run.doc_id,
"trigger_type": run.trigger_type,
"run_status": run.run_status,
"parser_backend": run.parser_backend,
"chunk_backend": run.chunk_backend,
"embedding_model": run.embedding_model,
"index_name": run.index_name,
"started_at": self._serialize_datetime(run.started_at),
"stored_at": self._serialize_datetime(run.stored_at),
"parsed_at": self._serialize_datetime(run.parsed_at),
"indexed_at": self._serialize_datetime(run.indexed_at),
"finished_at": self._serialize_datetime(run.finished_at),
"layout_count": run.layout_count,
"structure_node_count": run.structure_node_count,
"semantic_block_count": run.semantic_block_count,
"vector_chunk_count": run.vector_chunk_count,
"chunk_count": run.chunk_count,
"failure_stage": run.failure_stage,
"error_message": run.error_message,
"metadata": run.metadata,
}
def _deserialize_run(self, payload: dict[str, Any]) -> DocumentProcessingRun:
"""Deserialize one JSON payload into a processing run dataclass."""
return DocumentProcessingRun(
run_id=payload["run_id"],
doc_id=payload["doc_id"],
trigger_type=payload["trigger_type"],
run_status=payload["run_status"],
parser_backend=payload.get("parser_backend", ""),
chunk_backend=payload.get("chunk_backend", ""),
embedding_model=payload.get("embedding_model", ""),
index_name=payload.get("index_name", ""),
started_at=self._deserialize_datetime(payload.get("started_at")) or datetime.now(UTC),
stored_at=self._deserialize_datetime(payload.get("stored_at")),
parsed_at=self._deserialize_datetime(payload.get("parsed_at")),
indexed_at=self._deserialize_datetime(payload.get("indexed_at")),
finished_at=self._deserialize_datetime(payload.get("finished_at")),
layout_count=int(payload.get("layout_count", 0) or 0),
structure_node_count=int(payload.get("structure_node_count", 0) or 0),
semantic_block_count=int(payload.get("semantic_block_count", 0) or 0),
vector_chunk_count=int(payload.get("vector_chunk_count", 0) or 0),
chunk_count=int(payload.get("chunk_count", 0) or 0),
failure_stage=payload.get("failure_stage", ""),
error_message=payload.get("error_message", ""),
metadata=payload.get("metadata", {}),
)
def _serialize_event(self, event: DocumentStatusEvent) -> dict[str, Any]:
"""Serialize one status event to a JSON-compatible payload."""
return {
"event_id": event.event_id,
"doc_id": event.doc_id,
"run_id": event.run_id,
"from_status": event.from_status,
"to_status": event.to_status,
"stage": event.stage,
"message": event.message,
"metadata": event.metadata,
"occurred_at": self._serialize_datetime(event.occurred_at),
}
def _deserialize_event(self, payload: dict[str, Any]) -> DocumentStatusEvent:
"""Deserialize one JSON payload into a status event dataclass."""
return DocumentStatusEvent(
event_id=payload["event_id"],
doc_id=payload["doc_id"],
run_id=payload["run_id"],
from_status=payload.get("from_status", ""),
to_status=payload["to_status"],
stage=payload.get("stage", ""),
message=payload.get("message", ""),
metadata=payload.get("metadata", {}),
occurred_at=self._deserialize_datetime(payload.get("occurred_at")) or datetime.now(UTC),
)
def _serialize_artifact(self, artifact: DocumentArtifact) -> dict[str, Any]:
"""Serialize one artifact reference to a JSON-compatible payload."""
return {
"artifact_id": artifact.artifact_id,
"doc_id": artifact.doc_id,
"run_id": artifact.run_id,
"artifact_type": artifact.artifact_type,
"object_name": artifact.object_name,
"content_type": artifact.content_type,
"byte_size": artifact.byte_size,
"checksum": artifact.checksum,
"metadata": artifact.metadata,
"created_at": self._serialize_datetime(artifact.created_at),
}
def _deserialize_artifact(self, payload: dict[str, Any]) -> DocumentArtifact:
"""Deserialize one JSON payload into an artifact dataclass."""
return DocumentArtifact(
artifact_id=payload["artifact_id"],
doc_id=payload["doc_id"],
run_id=payload["run_id"],
artifact_type=payload["artifact_type"],
object_name=payload["object_name"],
content_type=payload.get("content_type", ""),
byte_size=int(payload.get("byte_size", 0) or 0),
checksum=payload.get("checksum", ""),
metadata=payload.get("metadata", {}),
created_at=self._deserialize_datetime(payload.get("created_at")) or datetime.now(UTC),
)
def _merge_metadata(self, original: dict[str, Any], update: dict | None) -> dict[str, Any]:
"""Merge metadata updates onto an existing payload."""
merged = dict(original)
if update:
merged.update(update)
return merged
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
"""Create a new processing run record."""
payload = self._load()
payload["runs"][run.run_id] = self._serialize_run(run)
self._save(payload)
return run
def mark_run_stored(
self,
run_id: str,
*,
stored_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as having persisted the source file."""
payload = self._load()
run_payload = payload["runs"].get(run_id)
if not run_payload:
return None
run = self._deserialize_run(run_payload)
run.stored_at = stored_at or datetime.now(UTC)
run.metadata = self._merge_metadata(run.metadata, metadata)
payload["runs"][run_id] = self._serialize_run(run)
self._save(payload)
return run
def mark_run_parsed(
self,
run_id: str,
*,
parser_backend: str,
layout_count: int,
structure_node_count: int,
semantic_block_count: int,
vector_chunk_count: int,
parsed_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record parse completion details for a run."""
payload = self._load()
run_payload = payload["runs"].get(run_id)
if not run_payload:
return None
run = self._deserialize_run(run_payload)
run.parser_backend = parser_backend
run.layout_count = layout_count
run.structure_node_count = structure_node_count
run.semantic_block_count = semantic_block_count
run.vector_chunk_count = vector_chunk_count
run.parsed_at = parsed_at or datetime.now(UTC)
run.metadata = self._merge_metadata(run.metadata, metadata)
payload["runs"][run_id] = self._serialize_run(run)
self._save(payload)
return run
def mark_run_indexed(
self,
run_id: str,
*,
chunk_count: int,
index_name: str,
indexed_at: datetime | None = None,
finished_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as successfully indexed."""
payload = self._load()
run_payload = payload["runs"].get(run_id)
if not run_payload:
return None
run = self._deserialize_run(run_payload)
now = datetime.now(UTC)
run.run_status = "succeeded"
run.chunk_count = chunk_count
run.index_name = index_name
run.indexed_at = indexed_at or now
run.finished_at = finished_at or now
run.metadata = self._merge_metadata(run.metadata, metadata)
payload["runs"][run_id] = self._serialize_run(run)
self._save(payload)
return run
def mark_run_failed(
self,
run_id: str,
*,
failure_stage: str,
error_message: str,
finished_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as failed."""
payload = self._load()
run_payload = payload["runs"].get(run_id)
if not run_payload:
return None
run = self._deserialize_run(run_payload)
run.run_status = "failed"
run.failure_stage = failure_stage
run.error_message = error_message
run.finished_at = finished_at or datetime.now(UTC)
run.metadata = self._merge_metadata(run.metadata, metadata)
payload["runs"][run_id] = self._serialize_run(run)
self._save(payload)
return run
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
"""Append a document status event."""
payload = self._load()
payload["status_events"][event.event_id] = self._serialize_event(event)
self._save(payload)
return event
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
"""Replace all artifacts for a run with the provided list."""
payload = self._load()
payload["artifacts"] = {
artifact_id: artifact_payload
for artifact_id, artifact_payload in payload["artifacts"].items()
if artifact_payload.get("run_id") != run_id
}
for artifact in artifacts:
payload["artifacts"][artifact.artifact_id] = self._serialize_artifact(artifact)
self._save(payload)
return artifacts
def delete_by_document(self, doc_id: str) -> None:
"""Delete all processing data for a document."""
payload = self._load()
payload["runs"] = {
run_id: run_payload
for run_id, run_payload in payload["runs"].items()
if run_payload.get("doc_id") != doc_id
}
payload["status_events"] = {
event_id: event_payload
for event_id, event_payload in payload["status_events"].items()
if event_payload.get("doc_id") != doc_id
}
payload["artifacts"] = {
artifact_id: artifact_payload
for artifact_id, artifact_payload in payload["artifacts"].items()
if artifact_payload.get("doc_id") != doc_id
}
self._save(payload)
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
"""List all processing runs for a document."""
payload = self._load()
runs = [
self._deserialize_run(run_payload)
for run_payload in payload["runs"].values()
if run_payload.get("doc_id") == doc_id
]
runs.sort(key=lambda run: run.started_at)
return runs
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
"""Return one processing run by identifier."""
payload = self._load()
run_payload = payload["runs"].get(run_id)
return self._deserialize_run(run_payload) if run_payload else None
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
"""List status events for a document."""
payload = self._load()
events = [
self._deserialize_event(event_payload)
for event_payload in payload["status_events"].values()
if event_payload.get("doc_id") == doc_id
]
events.sort(key=lambda event: event.occurred_at)
return events
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
"""List status events for a run."""
payload = self._load()
events = [
self._deserialize_event(event_payload)
for event_payload in payload["status_events"].values()
if event_payload.get("run_id") == run_id
]
events.sort(key=lambda event: event.occurred_at)
return events
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
"""List artifact references for a document."""
payload = self._load()
artifacts = [
self._deserialize_artifact(artifact_payload)
for artifact_payload in payload["artifacts"].values()
if artifact_payload.get("doc_id") == doc_id
]
artifacts.sort(key=lambda artifact: artifact.created_at)
return artifacts
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
"""List artifact references for a run."""
payload = self._load()
artifacts = [
self._deserialize_artifact(artifact_payload)
for artifact_payload in payload["artifacts"].values()
if artifact_payload.get("run_id") == run_id
]
artifacts.sort(key=lambda artifact: artifact.created_at)
return artifacts

View File

@@ -0,0 +1,466 @@
"""Implement infrastructure support for postgres document processing history."""
from __future__ import annotations
import json
from contextlib import contextmanager
from datetime import UTC, datetime
from typing import Any
import psycopg2
import psycopg2.extras
from psycopg2.pool import ThreadedConnectionPool
from app.config.settings import settings
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent
# Keep SQL mapping local to this adapter so the domain stays storage-agnostic.
_CREATE_RUNS_TABLE = """
CREATE TABLE IF NOT EXISTS document_processing_runs (
run_id VARCHAR(128) PRIMARY KEY,
doc_id VARCHAR(128) NOT NULL,
trigger_type VARCHAR(32) NOT NULL,
run_status VARCHAR(32) NOT NULL DEFAULT 'running',
parser_backend VARCHAR(128) NOT NULL DEFAULT '',
chunk_backend VARCHAR(128) NOT NULL DEFAULT '',
embedding_model VARCHAR(256) NOT NULL DEFAULT '',
index_name VARCHAR(128) NOT NULL DEFAULT '',
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
stored_at TIMESTAMPTZ,
parsed_at TIMESTAMPTZ,
indexed_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
layout_count INTEGER NOT NULL DEFAULT 0,
structure_node_count INTEGER NOT NULL DEFAULT 0,
semantic_block_count INTEGER NOT NULL DEFAULT 0,
vector_chunk_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
failure_stage VARCHAR(64) NOT NULL DEFAULT '',
error_message TEXT NOT NULL DEFAULT '',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT fk_dpr_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_document_processing_runs_doc_id ON document_processing_runs(doc_id, started_at DESC);
"""
_CREATE_EVENTS_TABLE = """
CREATE TABLE IF NOT EXISTS document_status_history (
event_id VARCHAR(128) PRIMARY KEY,
doc_id VARCHAR(128) NOT NULL,
run_id VARCHAR(128) NOT NULL,
from_status VARCHAR(32) NOT NULL DEFAULT '',
to_status VARCHAR(32) NOT NULL,
stage VARCHAR(64) NOT NULL DEFAULT '',
message TEXT NOT NULL DEFAULT '',
metadata JSONB NOT NULL DEFAULT '{}',
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT fk_dsh_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
CONSTRAINT fk_dsh_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_document_status_history_doc_id ON document_status_history(doc_id, occurred_at ASC);
CREATE INDEX IF NOT EXISTS idx_document_status_history_run_id ON document_status_history(run_id, occurred_at ASC);
"""
_CREATE_ARTIFACTS_TABLE = """
CREATE TABLE IF NOT EXISTS document_artifacts (
artifact_id VARCHAR(128) PRIMARY KEY,
doc_id VARCHAR(128) NOT NULL,
run_id VARCHAR(128) NOT NULL,
artifact_type VARCHAR(64) NOT NULL,
object_name VARCHAR(1024) NOT NULL,
content_type VARCHAR(128) NOT NULL DEFAULT '',
byte_size BIGINT NOT NULL DEFAULT 0,
checksum VARCHAR(256) NOT NULL DEFAULT '',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT fk_da_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
CONSTRAINT fk_da_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_document_artifacts_doc_id ON document_artifacts(doc_id, created_at ASC);
CREATE INDEX IF NOT EXISTS idx_document_artifacts_run_id ON document_artifacts(run_id, created_at ASC);
"""
class PostgresDocumentProcessingStore(DocumentProcessingStore):
"""Persist processing history in PostgreSQL using handwritten SQL."""
def __init__(self) -> None:
"""Initialize the store and ensure the required tables exist."""
self._pool = ThreadedConnectionPool(
minconn=1,
maxconn=5,
host=settings.postgres_host,
port=settings.postgres_port,
user=settings.postgres_user,
password=settings.postgres_password,
dbname=settings.postgres_db,
)
self._ensure_schema()
def _ensure_schema(self) -> None:
"""Create processing history tables and indexes if they are missing."""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute(_CREATE_RUNS_TABLE)
cur.execute(_CREATE_EVENTS_TABLE)
cur.execute(_CREATE_ARTIFACTS_TABLE)
conn.commit()
@contextmanager
def _conn(self):
"""Borrow one connection from the pool and return it afterwards."""
conn = self._pool.getconn()
try:
yield conn
finally:
self._pool.putconn(conn)
def _normalize_metadata(self, value: Any) -> dict[str, Any]:
"""Return a JSON-object payload regardless of the row representation."""
if isinstance(value, dict):
return value
if not value:
return {}
return json.loads(value)
def _row_to_run(self, row: dict[str, Any]) -> DocumentProcessingRun:
"""Map one run row into the domain dataclass."""
return DocumentProcessingRun(
run_id=row["run_id"],
doc_id=row["doc_id"],
trigger_type=row["trigger_type"],
run_status=row["run_status"],
parser_backend=row["parser_backend"],
chunk_backend=row["chunk_backend"],
embedding_model=row["embedding_model"],
index_name=row["index_name"],
started_at=row["started_at"],
stored_at=row["stored_at"],
parsed_at=row["parsed_at"],
indexed_at=row["indexed_at"],
finished_at=row["finished_at"],
layout_count=row["layout_count"],
structure_node_count=row["structure_node_count"],
semantic_block_count=row["semantic_block_count"],
vector_chunk_count=row["vector_chunk_count"],
chunk_count=row["chunk_count"],
failure_stage=row["failure_stage"],
error_message=row["error_message"],
metadata=self._normalize_metadata(row["metadata"]),
)
def _row_to_event(self, row: dict[str, Any]) -> DocumentStatusEvent:
"""Map one event row into the domain dataclass."""
return DocumentStatusEvent(
event_id=row["event_id"],
doc_id=row["doc_id"],
run_id=row["run_id"],
from_status=row["from_status"],
to_status=row["to_status"],
stage=row["stage"],
message=row["message"],
metadata=self._normalize_metadata(row["metadata"]),
occurred_at=row["occurred_at"],
)
def _row_to_artifact(self, row: dict[str, Any]) -> DocumentArtifact:
"""Map one artifact row into the domain dataclass."""
return DocumentArtifact(
artifact_id=row["artifact_id"],
doc_id=row["doc_id"],
run_id=row["run_id"],
artifact_type=row["artifact_type"],
object_name=row["object_name"],
content_type=row["content_type"],
byte_size=row["byte_size"],
checksum=row["checksum"],
metadata=self._normalize_metadata(row["metadata"]),
created_at=row["created_at"],
)
def _update_run(
self,
run_id: str,
*,
assignments: dict[str, Any],
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Update one run row and return the latest stored state."""
set_clauses = []
params: dict[str, Any] = {"run_id": run_id, "updated_at": datetime.now(UTC)}
for key, value in assignments.items():
set_clauses.append(f"{key} = %({key})s")
params[key] = value
set_clauses.append("updated_at = %(updated_at)s")
if metadata is not None:
set_clauses.append("metadata = COALESCE(metadata, '{}'::jsonb) || %(metadata)s::jsonb")
params["metadata"] = json.dumps(metadata, ensure_ascii=False)
sql = f"""
UPDATE document_processing_runs
SET {", ".join(set_clauses)}
WHERE run_id = %(run_id)s
RETURNING *
"""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
row = cur.fetchone()
conn.commit()
return self._row_to_run(dict(row)) if row else None
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
"""Create a new processing run record."""
sql = """
INSERT INTO document_processing_runs
(run_id, doc_id, trigger_type, run_status, parser_backend, chunk_backend,
embedding_model, index_name, started_at, stored_at, parsed_at, indexed_at,
finished_at, layout_count, structure_node_count, semantic_block_count,
vector_chunk_count, chunk_count, failure_stage, error_message, metadata)
VALUES
(%(run_id)s, %(doc_id)s, %(trigger_type)s, %(run_status)s, %(parser_backend)s,
%(chunk_backend)s, %(embedding_model)s, %(index_name)s, %(started_at)s,
%(stored_at)s, %(parsed_at)s, %(indexed_at)s, %(finished_at)s, %(layout_count)s,
%(structure_node_count)s, %(semantic_block_count)s, %(vector_chunk_count)s,
%(chunk_count)s, %(failure_stage)s, %(error_message)s, %(metadata)s)
"""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute(
sql,
{
"run_id": run.run_id,
"doc_id": run.doc_id,
"trigger_type": run.trigger_type,
"run_status": run.run_status,
"parser_backend": run.parser_backend,
"chunk_backend": run.chunk_backend,
"embedding_model": run.embedding_model,
"index_name": run.index_name,
"started_at": run.started_at,
"stored_at": run.stored_at,
"parsed_at": run.parsed_at,
"indexed_at": run.indexed_at,
"finished_at": run.finished_at,
"layout_count": run.layout_count,
"structure_node_count": run.structure_node_count,
"semantic_block_count": run.semantic_block_count,
"vector_chunk_count": run.vector_chunk_count,
"chunk_count": run.chunk_count,
"failure_stage": run.failure_stage,
"error_message": run.error_message,
"metadata": json.dumps(run.metadata, ensure_ascii=False),
},
)
conn.commit()
return run
def mark_run_stored(
self,
run_id: str,
*,
stored_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as having persisted its source file."""
return self._update_run(
run_id,
assignments={"stored_at": stored_at or datetime.now(UTC)},
metadata=metadata,
)
def mark_run_parsed(
self,
run_id: str,
*,
parser_backend: str,
layout_count: int,
structure_node_count: int,
semantic_block_count: int,
vector_chunk_count: int,
parsed_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record parse completion metrics for a run."""
return self._update_run(
run_id,
assignments={
"parser_backend": parser_backend,
"parsed_at": parsed_at or datetime.now(UTC),
"layout_count": layout_count,
"structure_node_count": structure_node_count,
"semantic_block_count": semantic_block_count,
"vector_chunk_count": vector_chunk_count,
},
metadata=metadata,
)
def mark_run_indexed(
self,
run_id: str,
*,
chunk_count: int,
index_name: str,
indexed_at: datetime | None = None,
finished_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as successfully indexed."""
now = datetime.now(UTC)
return self._update_run(
run_id,
assignments={
"run_status": "succeeded",
"chunk_count": chunk_count,
"index_name": index_name,
"indexed_at": indexed_at or now,
"finished_at": finished_at or now,
},
metadata=metadata,
)
def mark_run_failed(
self,
run_id: str,
*,
failure_stage: str,
error_message: str,
finished_at: datetime | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as failed and persist the terminal error details."""
return self._update_run(
run_id,
assignments={
"run_status": "failed",
"failure_stage": failure_stage,
"error_message": error_message,
"finished_at": finished_at or datetime.now(UTC),
},
metadata=metadata,
)
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
"""Append a document status event."""
sql = """
INSERT INTO document_status_history
(event_id, doc_id, run_id, from_status, to_status, stage, message, metadata, occurred_at)
VALUES
(%(event_id)s, %(doc_id)s, %(run_id)s, %(from_status)s, %(to_status)s,
%(stage)s, %(message)s, %(metadata)s, %(occurred_at)s)
"""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute(
sql,
{
"event_id": event.event_id,
"doc_id": event.doc_id,
"run_id": event.run_id,
"from_status": event.from_status,
"to_status": event.to_status,
"stage": event.stage,
"message": event.message,
"metadata": json.dumps(event.metadata, ensure_ascii=False),
"occurred_at": event.occurred_at,
},
)
conn.commit()
return event
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
"""Replace all artifact references for one run using a delete-then-insert strategy."""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM document_artifacts WHERE run_id = %s", (run_id,))
if artifacts:
psycopg2.extras.execute_values(
cur,
"""
INSERT INTO document_artifacts
(artifact_id, doc_id, run_id, artifact_type, object_name,
content_type, byte_size, checksum, metadata, created_at)
VALUES %s
""",
[
(
artifact.artifact_id,
artifact.doc_id,
artifact.run_id,
artifact.artifact_type,
artifact.object_name,
artifact.content_type,
artifact.byte_size,
artifact.checksum,
json.dumps(artifact.metadata, ensure_ascii=False),
artifact.created_at,
)
for artifact in artifacts
],
)
conn.commit()
return artifacts
def delete_by_document(self, doc_id: str) -> None:
"""Delete all processing rows for a document explicitly."""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM document_status_history WHERE doc_id = %s", (doc_id,))
cur.execute("DELETE FROM document_artifacts WHERE doc_id = %s", (doc_id,))
cur.execute("DELETE FROM document_processing_runs WHERE doc_id = %s", (doc_id,))
conn.commit()
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
"""List processing runs for a document in chronological order."""
sql = "SELECT * FROM document_processing_runs WHERE doc_id = %s ORDER BY started_at ASC"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (doc_id,))
rows = cur.fetchall()
return [self._row_to_run(dict(row)) for row in rows]
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
"""Return one processing run by identifier."""
sql = "SELECT * FROM document_processing_runs WHERE run_id = %s"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (run_id,))
row = cur.fetchone()
return self._row_to_run(dict(row)) if row else None
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
"""List all status events for a document."""
sql = "SELECT * FROM document_status_history WHERE doc_id = %s ORDER BY occurred_at ASC"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (doc_id,))
rows = cur.fetchall()
return [self._row_to_event(dict(row)) for row in rows]
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
"""List all status events for a run."""
sql = "SELECT * FROM document_status_history WHERE run_id = %s ORDER BY occurred_at ASC"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (run_id,))
rows = cur.fetchall()
return [self._row_to_event(dict(row)) for row in rows]
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
"""List all artifact references for a document."""
sql = "SELECT * FROM document_artifacts WHERE doc_id = %s ORDER BY created_at ASC"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (doc_id,))
rows = cur.fetchall()
return [self._row_to_artifact(dict(row)) for row in rows]
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
"""List all artifact references for a run."""
sql = "SELECT * FROM document_artifacts WHERE run_id = %s ORDER BY created_at ASC"
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (run_id,))
rows = cur.fetchall()
return [self._row_to_artifact(dict(row)) for row in rows]

View File

@@ -20,8 +20,10 @@ from app.infrastructure.parser.local_document_parser import LocalDocumentParser
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
from app.infrastructure.perception.mock_event_store import MockEventStore from app.infrastructure.perception.mock_event_store import MockEventStore
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
@@ -148,6 +150,14 @@ def get_parse_artifact_store():
return None return None
@lru_cache
def get_document_processing_store():
"""Return document processing store for the active repository backend."""
if settings.document_repository_backend == "postgres":
return PostgresDocumentProcessingStore()
return JsonDocumentProcessingStore(settings.document_processing_metadata_path)
@lru_cache @lru_cache
def get_binary_store() -> DocumentBinaryStore: def get_binary_store() -> DocumentBinaryStore:
"""Return binary store.""" """Return binary store."""
@@ -226,6 +236,7 @@ def get_document_command_service() -> DocumentCommandService:
embedding_provider=get_embedding_provider(), embedding_provider=get_embedding_provider(),
vector_index=get_vector_index(), vector_index=get_vector_index(),
parse_artifact_store=get_parse_artifact_store(), parse_artifact_store=get_parse_artifact_store(),
document_processing_store=get_document_processing_store(),
) )

View File

@@ -1,4 +1,4 @@
import { Outlet, useLocation } from 'react-router-dom'; import { useLocation } from 'react-router-dom';
import { FooterLayout } from './FooterLayout'; import { FooterLayout } from './FooterLayout';
import { HeaderLayout } from './HeaderLayout'; import { HeaderLayout } from './HeaderLayout';
@@ -15,7 +15,6 @@ export function AppShell() {
<HeaderLayout activeTab={activeTab} /> <HeaderLayout activeTab={activeTab} />
<ContentLayout tab={activeTab}> <ContentLayout tab={activeTab}>
<KeepAliveViewport activeTab={activeTab} /> <KeepAliveViewport activeTab={activeTab} />
<Outlet />
</ContentLayout> </ContentLayout>
<FooterLayout /> <FooterLayout />
</div> </div>

View File

@@ -22,12 +22,12 @@ export function ContentLayout({ children, tab }: ContentLayoutProps) {
<div <div
className={[ className={[
shellFrameClassName, shellFrameClassName,
'relative flex min-h-0 flex-1 py-8', 'relative flex min-h-0 flex-1 justify-center py-8',
].join(' ')} ].join(' ')}
> >
<div <div
className={[ className={[
'relative flex w-full min-h-0 flex-1', 'relative flex min-h-0 w-full',
widthClass, widthClass,
tab.fillHeight ? 'overflow-hidden' : '', tab.fillHeight ? 'overflow-hidden' : '',
].join(' ')} ].join(' ')}

View File

@@ -44,7 +44,7 @@ export const appTabs: AppTabConfig[] = [
label: '合规分析', label: '合规分析',
component: CompliancePage, component: CompliancePage,
keepAlive: true, keepAlive: true,
contentWidth: 'full', contentWidth: 'wide',
fillHeight: true, fillHeight: true,
}, },
{ {
@@ -61,7 +61,7 @@ export const appTabs: AppTabConfig[] = [
label: '法规对话', label: '法规对话',
component: RagChatPage, component: RagChatPage,
keepAlive: true, keepAlive: true,
contentWidth: 'full', contentWidth: 'wide',
fillHeight: true, fillHeight: true,
}, },
]; ];

View File

@@ -31,7 +31,6 @@ dependencies = [
"redis>=4.5.0", "redis>=4.5.0",
"minio>=7.1.0", "minio>=7.1.0",
"psycopg2-binary>=2.9.0", "psycopg2-binary>=2.9.0",
"sqlalchemy>=2.0.0",
] ]
[dependency-groups] [dependency-groups]

View File

@@ -0,0 +1,216 @@
"""Test PostgreSQL-backed document processing history storage."""
from __future__ import annotations
import uuid
from datetime import UTC, datetime
import psycopg2
import pytest
from app.domain.documents import Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent
from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
# Keep these tests focused on relational storage behavior only.
def _build_document(doc_id: str) -> Document:
"""Create a minimal document row required by the foreign keys."""
return Document(
doc_id=doc_id,
doc_name="Processing Test",
file_name="processing-test.pdf",
object_name=f"{doc_id}/processing-test.pdf",
content_type="application/pdf",
size_bytes=128,
status=DocumentStatus.PENDING,
)
def _connectivity_ready() -> bool:
"""Return whether the configured PostgreSQL instance is reachable for integration tests."""
try:
repository = PostgresDocumentRepository()
except psycopg2.Error:
return False
try:
repository.list(limit=1)
return True
except psycopg2.Error:
return False
pytestmark = pytest.mark.skipif(not _connectivity_ready(), reason="PostgreSQL test backend is not reachable")
def test_postgres_document_processing_store_supports_full_run_lifecycle():
"""Persist run, event, and artifact history and read it back as dataclasses."""
repository = PostgresDocumentRepository()
store = PostgresDocumentProcessingStore()
doc_id = f"proc-{uuid.uuid4().hex[:10]}"
run_id = f"run-{uuid.uuid4().hex[:10]}"
base_time = datetime.now(UTC)
repository.create(_build_document(doc_id))
try:
created = store.create_run(
DocumentProcessingRun(
run_id=run_id,
doc_id=doc_id,
trigger_type="upload",
run_status="running",
parser_backend="aliyun",
chunk_backend="aliyun",
embedding_model="text-embedding-v3",
started_at=base_time,
metadata={"origin": "test"},
)
)
stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True})
parsed = store.mark_run_parsed(
run_id,
parser_backend="fake_parser",
layout_count=2,
structure_node_count=3,
semantic_block_count=4,
vector_chunk_count=5,
parsed_at=base_time,
metadata={"parse_task_id": "task-1"},
)
indexed = store.mark_run_indexed(
run_id,
chunk_count=6,
index_name="regulations_dense_1024_v1",
indexed_at=base_time,
finished_at=base_time,
metadata={"collection": "regulations_dense_1024_v1"},
)
event = store.append_status_event(
DocumentStatusEvent(
event_id=f"evt-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
from_status="parsed",
to_status="indexed",
stage="index",
message="Indexed successfully",
metadata={"chunk_count": 6},
occurred_at=base_time,
)
)
artifacts = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name=f"artifacts/{doc_id}/layouts.json",
content_type="application/json",
created_at=base_time,
),
DocumentArtifact(
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
artifact_type="vector_chunks",
object_name=f"artifacts/{doc_id}/vector_chunks.json",
content_type="application/json",
created_at=base_time,
),
],
)
fetched = store.get_run(run_id)
run_rows = store.list_runs_by_document(doc_id)
event_rows = store.list_status_events_by_document(doc_id)
artifact_rows = store.list_artifacts_by_run(run_id)
assert created.run_id == run_id
assert stored is not None and stored.stored_at is not None
assert parsed is not None and parsed.parser_backend == "fake_parser"
assert indexed is not None and indexed.run_status == "succeeded"
assert fetched is not None and fetched.chunk_count == 6
assert isinstance(run_rows[0], DocumentProcessingRun)
assert isinstance(event_rows[0], DocumentStatusEvent)
assert isinstance(artifact_rows[0], DocumentArtifact)
assert event_rows[0].event_id == event.event_id
assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows}
finally:
store.delete_by_document(doc_id)
repository.delete(doc_id)
def test_postgres_document_processing_store_replaces_artifacts_and_deletes_document_data():
"""Replace artifact rows idempotently and remove all history rows for one document."""
repository = PostgresDocumentRepository()
store = PostgresDocumentProcessingStore()
doc_id = f"proc-{uuid.uuid4().hex[:10]}"
run_id = f"run-{uuid.uuid4().hex[:10]}"
repository.create(_build_document(doc_id))
try:
store.create_run(
DocumentProcessingRun(
run_id=run_id,
doc_id=doc_id,
trigger_type="retry",
run_status="running",
)
)
first = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name=f"artifacts/{doc_id}/layouts-v1.json",
content_type="application/json",
)
],
)
second = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name=f"artifacts/{doc_id}/layouts-v2.json",
content_type="application/json",
)
],
)
store.append_status_event(
DocumentStatusEvent(
event_id=f"evt-{uuid.uuid4().hex[:10]}",
doc_id=doc_id,
run_id=run_id,
from_status="pending",
to_status="failed",
stage="parse",
message="failed",
)
)
failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom")
artifact_rows = store.list_artifacts_by_run(run_id)
assert len(first) == 1
assert len(second) == 1
assert len(artifact_rows) == 1
assert artifact_rows[0].object_name.endswith("layouts-v2.json")
assert failed is not None and failed.run_status == "failed"
store.delete_by_document(doc_id)
assert store.list_runs_by_document(doc_id) == []
assert store.list_status_events_by_document(doc_id) == []
assert store.list_artifacts_by_document(doc_id) == []
finally:
repository.delete(doc_id)

View File

@@ -3,13 +3,18 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from app.application.documents.services import DocumentCommandService from app.application.documents.services import DocumentCommandService
from app.domain.documents import Chunk, Document, DocumentStatus, ParsedDocument from app.domain.documents import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
from app.shared import bootstrap from app.shared import bootstrap
class FakeRepository: class FakeRepository:
"""Store document rows in memory for application service tests."""
def __init__(self) -> None: def __init__(self) -> None:
self.documents: dict[str, Document] = {} self.documents: dict[str, Document] = {}
@@ -25,9 +30,14 @@ class FakeRepository:
return self.documents.get(doc_id) return self.documents.get(doc_id)
def list(self, limit: int | None = None) -> list[Document]: def list(self, limit: int | None = None) -> list[Document]:
"""Return stored documents in insertion order."""
values = list(self.documents.values()) values = list(self.documents.values())
return values[:limit] if limit is not None else values return values[:limit] if limit is not None else values
def delete(self, doc_id: str) -> bool:
"""Delete one document from the in-memory repository."""
return self.documents.pop(doc_id, None) is not None
def update_status( def update_status(
self, self,
doc_id: str, doc_id: str,
@@ -62,6 +72,8 @@ class FakeRepository:
class FakeBinaryStore: class FakeBinaryStore:
"""Store binary payloads in memory for upload and retry tests."""
def __init__(self) -> None: def __init__(self) -> None:
self.saved: dict[str, bytes] = {} self.saved: dict[str, bytes] = {}
@@ -76,6 +88,8 @@ class FakeBinaryStore:
class FakeParser: class FakeParser:
"""Return a stable parsed document for deterministic service tests."""
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument: def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
return ParsedDocument( return ParsedDocument(
doc_id=doc_id, doc_id=doc_id,
@@ -101,6 +115,8 @@ class FakeParser:
class FakeChunkBuilder: class FakeChunkBuilder:
"""Build one deterministic chunk from the fake parsed document."""
def build(self, *, parsed_document: ParsedDocument, regulation_type: str, version: str) -> list[Chunk]: def build(self, *, parsed_document: ParsedDocument, regulation_type: str, version: str) -> list[Chunk]:
return [ return [
Chunk( Chunk(
@@ -122,6 +138,8 @@ class FakeChunkBuilder:
class FakeEmbeddingProvider: class FakeEmbeddingProvider:
"""Capture embedding calls and return fixed-length vectors."""
def __init__(self) -> None: def __init__(self) -> None:
self.calls: list[list[str]] = [] self.calls: list[list[str]] = []
@@ -134,6 +152,8 @@ class FakeEmbeddingProvider:
class FakeVectorIndex: class FakeVectorIndex:
"""Capture vector upserts for service assertions."""
def __init__(self) -> None: def __init__(self) -> None:
self.upserts: list[tuple[list[Chunk], list[list[float]]]] = [] self.upserts: list[tuple[list[Chunk], list[list[float]]]] = []
@@ -151,11 +171,159 @@ class FakeVectorIndex:
return {"collection_name": "regulations_dense_1024_v1"} return {"collection_name": "regulations_dense_1024_v1"}
@dataclass
class FakeProcessingStore:
"""Record processing history method calls for orchestration assertions."""
runs: list[DocumentProcessingRun] = None
status_events: list[DocumentStatusEvent] = None
artifact_batches: list[list[DocumentArtifact]] = None
deleted_doc_ids: list[str] = None
stored_run_ids: list[str] = None
parsed_calls: list[dict] = None
indexed_calls: list[dict] = None
failed_calls: list[dict] = None
def __post_init__(self) -> None:
"""Initialize mutable call collections for each fake instance."""
self.runs = []
self.status_events = []
self.artifact_batches = []
self.deleted_doc_ids = []
self.stored_run_ids = []
self.parsed_calls = []
self.indexed_calls = []
self.failed_calls = []
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
"""Store the created run and return it unchanged."""
self.runs.append(run)
return run
def mark_run_stored(self, run_id: str, *, stored_at=None, metadata: dict | None = None) -> DocumentProcessingRun | None:
"""Record that one run reached the stored stage."""
self.stored_run_ids.append(run_id)
return next((run for run in self.runs if run.run_id == run_id), None)
def mark_run_parsed(
self,
run_id: str,
*,
parser_backend: str,
layout_count: int,
structure_node_count: int,
semantic_block_count: int,
vector_chunk_count: int,
parsed_at=None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record parse metrics for one run."""
self.parsed_calls.append(
{
"run_id": run_id,
"parser_backend": parser_backend,
"layout_count": layout_count,
"structure_node_count": structure_node_count,
"semantic_block_count": semantic_block_count,
"vector_chunk_count": vector_chunk_count,
"metadata": metadata or {},
}
)
return next((run for run in self.runs if run.run_id == run_id), None)
def mark_run_indexed(
self,
run_id: str,
*,
chunk_count: int,
index_name: str,
indexed_at=None,
finished_at=None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record index completion for one run."""
self.indexed_calls.append(
{
"run_id": run_id,
"chunk_count": chunk_count,
"index_name": index_name,
"metadata": metadata or {},
}
)
return next((run for run in self.runs if run.run_id == run_id), None)
def mark_run_failed(
self,
run_id: str,
*,
failure_stage: str,
error_message: str,
finished_at=None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record terminal failure details for one run."""
self.failed_calls.append(
{
"run_id": run_id,
"failure_stage": failure_stage,
"error_message": error_message,
"metadata": metadata or {},
}
)
return next((run for run in self.runs if run.run_id == run_id), None)
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
"""Store one status event."""
self.status_events.append(event)
return event
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
"""Store one artifact replacement batch."""
self.artifact_batches.append(artifacts)
return artifacts
def delete_by_document(self, doc_id: str) -> None:
"""Record an explicit document-history delete request."""
self.deleted_doc_ids.append(doc_id)
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
"""Return runs for completeness of the fake port."""
return [run for run in self.runs if run.doc_id == doc_id]
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
"""Return one run for completeness of the fake port."""
return next((run for run in self.runs if run.run_id == run_id), None)
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
"""Return status events for completeness of the fake port."""
return [event for event in self.status_events if event.doc_id == doc_id]
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
"""Return status events for completeness of the fake port."""
return [event for event in self.status_events if event.run_id == run_id]
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
"""Return artifact references for completeness of the fake port."""
return [artifact for batch in self.artifact_batches for artifact in batch if artifact.doc_id == doc_id]
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
"""Return artifact references for completeness of the fake port."""
return [artifact for batch in self.artifact_batches for artifact in batch if artifact.run_id == run_id]
class FailingParser:
"""Raise a deterministic parser failure for failure-stage assertions."""
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
raise RuntimeError("parser exploded")
def test_document_command_service_uses_1024_dense_embedding_and_updates_status(): def test_document_command_service_uses_1024_dense_embedding_and_updates_status():
repository = FakeRepository() repository = FakeRepository()
binary_store = FakeBinaryStore() binary_store = FakeBinaryStore()
embedding_provider = FakeEmbeddingProvider() embedding_provider = FakeEmbeddingProvider()
vector_index = FakeVectorIndex() vector_index = FakeVectorIndex()
processing_store = FakeProcessingStore()
service = DocumentCommandService( service = DocumentCommandService(
document_repository=repository, document_repository=repository,
binary_store=binary_store, binary_store=binary_store,
@@ -163,6 +331,7 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status()
chunk_builder=FakeChunkBuilder(), chunk_builder=FakeChunkBuilder(),
embedding_provider=embedding_provider, embedding_provider=embedding_provider,
vector_index=vector_index, vector_index=vector_index,
document_processing_store=processing_store,
) )
result = service.upload_and_process( result = service.upload_and_process(
@@ -188,6 +357,264 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status()
assert stored.index_name == "regulations_dense_1024_v1" assert stored.index_name == "regulations_dense_1024_v1"
assert stored.metadata["parse_task_id"] == "task-123" assert stored.metadata["parse_task_id"] == "task-123"
assert stored.metadata["artifact_keys"]["vector_chunks"].endswith("/vector_chunks.json") assert stored.metadata["artifact_keys"]["vector_chunks"].endswith("/vector_chunks.json")
assert len(processing_store.runs) == 1
assert processing_store.runs[0].trigger_type == "upload"
assert processing_store.stored_run_ids == [processing_store.runs[0].run_id]
assert processing_store.parsed_calls[0]["vector_chunk_count"] == 1
assert processing_store.indexed_calls[0]["index_name"] == "regulations_dense_1024_v1"
assert [event.to_status for event in processing_store.status_events] == ["pending", "stored", "parsed", "indexed"]
assert {artifact.artifact_type for artifact in processing_store.artifact_batches[0]} == {
"layouts",
"structure_nodes",
"semantic_blocks",
"vector_chunks",
}
def test_document_command_service_retry_marks_processing_run_as_retry():
repository = FakeRepository()
binary_store = FakeBinaryStore()
embedding_provider = FakeEmbeddingProvider()
vector_index = FakeVectorIndex()
processing_store = FakeProcessingStore()
repository.create(
Document(
doc_id="doc-retry",
doc_name="Retry Doc",
file_name="retry.pdf",
object_name="doc-retry/retry.pdf",
content_type="application/pdf",
size_bytes=4,
regulation_type="车辆安全",
version="2026",
metadata={"generate_summary": False},
)
)
binary_store.save(
object_name="doc-retry/retry.pdf",
data=b"data",
content_type="application/pdf",
metadata={"doc_id": "doc-retry"},
)
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FakeParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=embedding_provider,
vector_index=vector_index,
document_processing_store=processing_store,
)
result = service.retry("doc-retry")
assert result.status == "indexed"
assert processing_store.runs[0].trigger_type == "retry"
def test_document_command_service_records_failed_processing_stage():
repository = FakeRepository()
binary_store = FakeBinaryStore()
embedding_provider = FakeEmbeddingProvider()
vector_index = FakeVectorIndex()
processing_store = FakeProcessingStore()
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FailingParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=embedding_provider,
vector_index=vector_index,
document_processing_store=processing_store,
)
result = service.upload_and_process(
doc_id="doc-fail",
file_name="test.pdf",
content=b"dummy pdf bytes",
content_type="application/pdf",
doc_name="测试法规",
regulation_type="车辆安全",
version="2026",
generate_summary=False,
)
assert result.status == "failed"
assert processing_store.failed_calls[0]["failure_stage"] == "parse"
assert processing_store.status_events[-1].to_status == "failed"
assert repository.get("doc-fail").metadata["failure_stage"] == "parse"
def test_document_command_service_delete_cleans_processing_history_when_present():
repository = FakeRepository()
binary_store = FakeBinaryStore()
vector_index = FakeVectorIndex()
processing_store = FakeProcessingStore()
repository.create(
Document(
doc_id="doc-delete",
doc_name="Delete Doc",
file_name="delete.pdf",
object_name="doc-delete/delete.pdf",
content_type="application/pdf",
size_bytes=4,
)
)
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FakeParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=FakeEmbeddingProvider(),
vector_index=vector_index,
document_processing_store=processing_store,
)
deleted = service.delete("doc-delete")
assert deleted is True
assert processing_store.deleted_doc_ids == ["doc-delete"]
def test_document_command_service_persists_processing_history_with_json_store(tmp_path: Path):
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
binary_store = FakeBinaryStore()
embedding_provider = FakeEmbeddingProvider()
vector_index = FakeVectorIndex()
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FakeParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=embedding_provider,
vector_index=vector_index,
document_processing_store=processing_store,
)
result = service.upload_and_process(
doc_id="doc-json-flow",
file_name="test.pdf",
content=b"dummy pdf bytes",
content_type="application/pdf",
doc_name="测试法规",
regulation_type="车辆安全",
version="2026",
generate_summary=False,
)
stored = repository.get("doc-json-flow")
runs = processing_store.list_runs_by_document("doc-json-flow")
events = processing_store.list_status_events_by_document("doc-json-flow")
artifacts = processing_store.list_artifacts_by_document("doc-json-flow")
assert result.status == "indexed"
assert stored is not None and stored.status == DocumentStatus.INDEXED
assert len(runs) == 1
assert runs[0].trigger_type == "upload"
assert runs[0].run_status == "succeeded"
assert [event.to_status for event in events] == ["pending", "stored", "parsed", "indexed"]
assert {artifact.artifact_type for artifact in artifacts} == {
"layouts",
"structure_nodes",
"semantic_blocks",
"vector_chunks",
}
def test_document_command_service_retry_creates_second_json_processing_run(tmp_path: Path):
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
binary_store = FakeBinaryStore()
repository.create(
Document(
doc_id="doc-json-retry",
doc_name="Retry Doc",
file_name="retry.pdf",
object_name="doc-json-retry/retry.pdf",
content_type="application/pdf",
size_bytes=4,
regulation_type="车辆安全",
version="2026",
metadata={"generate_summary": False},
)
)
binary_store.save(
object_name="doc-json-retry/retry.pdf",
data=b"data",
content_type="application/pdf",
metadata={"doc_id": "doc-json-retry"},
)
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FakeParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=FakeEmbeddingProvider(),
vector_index=FakeVectorIndex(),
document_processing_store=processing_store,
)
first = service.retry("doc-json-retry")
second = service.retry("doc-json-retry")
runs = processing_store.list_runs_by_document("doc-json-retry")
assert first.status == "indexed"
assert second.status == "indexed"
assert len(runs) == 2
assert {run.trigger_type for run in runs} == {"retry"}
def test_document_command_service_delete_removes_json_processing_history(tmp_path: Path):
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
binary_store = FakeBinaryStore()
service = DocumentCommandService(
document_repository=repository,
binary_store=binary_store,
parser=FakeParser(),
chunk_builder=FakeChunkBuilder(),
embedding_provider=FakeEmbeddingProvider(),
vector_index=FakeVectorIndex(),
document_processing_store=processing_store,
)
service.upload_and_process(
doc_id="doc-json-delete",
file_name="delete.pdf",
content=b"delete me",
content_type="application/pdf",
doc_name="Delete Doc",
regulation_type="车辆安全",
version="2026",
generate_summary=False,
)
deleted = service.delete("doc-json-delete")
assert deleted is True
assert processing_store.list_runs_by_document("doc-json-delete") == []
assert processing_store.list_status_events_by_document("doc-json-delete") == []
assert processing_store.list_artifacts_by_document("doc-json-delete") == []
def test_bootstrap_returns_json_processing_store_for_json_backend(tmp_path: Path):
original_backend = bootstrap.settings.document_repository_backend
original_path = bootstrap.settings.document_processing_metadata_path
bootstrap.get_document_processing_store.cache_clear()
try:
bootstrap.settings.document_repository_backend = "json"
bootstrap.settings.document_processing_metadata_path = str(tmp_path / "document_processing.json")
store = bootstrap.get_document_processing_store()
assert store.__class__.__name__ == "JsonDocumentProcessingStore"
finally:
bootstrap.settings.document_repository_backend = original_backend
bootstrap.settings.document_processing_metadata_path = original_path
bootstrap.get_document_processing_store.cache_clear()
def test_bootstrap_defaults_to_aliyun_parser_and_chunk_builder(): def test_bootstrap_defaults_to_aliyun_parser_and_chunk_builder():
bootstrap.get_parser.cache_clear() bootstrap.get_parser.cache_clear()

View File

@@ -0,0 +1,184 @@
"""Test JSON-backed document processing history storage."""
from __future__ import annotations
import json
from datetime import UTC, datetime
from pathlib import Path
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentStatusEvent
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
# Keep JSON processing-store tests focused on local file persistence behavior.
def test_json_document_processing_store_initializes_missing_file(tmp_path: Path):
"""Create the backing file with the canonical empty payload on first use."""
file_path = tmp_path / "document_processing.json"
store = JsonDocumentProcessingStore(str(file_path))
payload = json.loads(file_path.read_text(encoding="utf-8"))
assert payload == {"runs": {}, "status_events": {}, "artifacts": {}}
assert store.list_runs_by_document("missing") == []
def test_json_document_processing_store_supports_full_run_lifecycle(tmp_path: Path):
"""Persist runs, events, and artifacts and read them back as dataclasses."""
file_path = tmp_path / "document_processing.json"
store = JsonDocumentProcessingStore(str(file_path))
doc_id = "doc-json"
run_id = "run-json"
event_id = "evt-json"
base_time = datetime.now(UTC)
created = store.create_run(
DocumentProcessingRun(
run_id=run_id,
doc_id=doc_id,
trigger_type="upload",
run_status="running",
parser_backend="aliyun",
chunk_backend="aliyun",
embedding_model="text-embedding-v3",
started_at=base_time,
metadata={"origin": "json-test"},
)
)
stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True})
parsed = store.mark_run_parsed(
run_id,
parser_backend="fake_parser",
layout_count=1,
structure_node_count=2,
semantic_block_count=3,
vector_chunk_count=4,
parsed_at=base_time,
metadata={"parse_task_id": "task-json"},
)
indexed = store.mark_run_indexed(
run_id,
chunk_count=5,
index_name="regulations_dense_1024_v1",
indexed_at=base_time,
finished_at=base_time,
metadata={"collection": "regulations_dense_1024_v1"},
)
event = store.append_status_event(
DocumentStatusEvent(
event_id=event_id,
doc_id=doc_id,
run_id=run_id,
from_status="parsed",
to_status="indexed",
stage="index",
message="Indexed",
metadata={"chunk_count": 5},
occurred_at=base_time,
)
)
artifacts = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id="art-layouts",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name="artifacts/doc-json/layouts.json",
content_type="application/json",
created_at=base_time,
),
DocumentArtifact(
artifact_id="art-vectors",
doc_id=doc_id,
run_id=run_id,
artifact_type="vector_chunks",
object_name="artifacts/doc-json/vector_chunks.json",
content_type="application/json",
created_at=base_time,
),
],
)
fetched = store.get_run(run_id)
run_rows = store.list_runs_by_document(doc_id)
event_rows = store.list_status_events_by_run(run_id)
artifact_rows = store.list_artifacts_by_document(doc_id)
assert created.run_id == run_id
assert stored is not None and stored.metadata["stored"] is True
assert parsed is not None and parsed.structure_node_count == 2
assert indexed is not None and indexed.run_status == "succeeded"
assert fetched is not None and fetched.chunk_count == 5
assert run_rows[0].started_at == base_time
assert event_rows[0].event_id == event.event_id
assert artifact_rows[0].doc_id == doc_id
assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows}
def test_json_document_processing_store_replaces_artifacts_and_deletes_by_document(tmp_path: Path):
"""Replace one run's artifacts idempotently and remove all history for a document."""
file_path = tmp_path / "document_processing.json"
store = JsonDocumentProcessingStore(str(file_path))
doc_id = "doc-delete"
run_id = "run-delete"
store.create_run(
DocumentProcessingRun(
run_id=run_id,
doc_id=doc_id,
trigger_type="retry",
run_status="running",
)
)
store.append_status_event(
DocumentStatusEvent(
event_id="evt-delete",
doc_id=doc_id,
run_id=run_id,
from_status="pending",
to_status="stored",
stage="store",
occurred_at=datetime.now(UTC),
)
)
first = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id="art-first",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name="artifacts/doc-delete/layouts-v1.json",
content_type="application/json",
)
],
)
second = store.replace_artifacts_for_run(
run_id,
[
DocumentArtifact(
artifact_id="art-second",
doc_id=doc_id,
run_id=run_id,
artifact_type="layouts",
object_name="artifacts/doc-delete/layouts-v2.json",
content_type="application/json",
)
],
)
failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom")
artifact_rows = store.list_artifacts_by_run(run_id)
assert len(first) == 1
assert len(second) == 1
assert len(artifact_rows) == 1
assert artifact_rows[0].object_name.endswith("layouts-v2.json")
assert failed is not None and failed.run_status == "failed"
store.delete_by_document(doc_id)
assert store.list_runs_by_document(doc_id) == []
assert store.list_status_events_by_document(doc_id) == []
assert store.list_artifacts_by_document(doc_id) == []

129
uv.lock generated
View File

@@ -35,7 +35,6 @@ dependencies = [
{ name = "python-dotenv" }, { name = "python-dotenv" },
{ name = "python-multipart" }, { name = "python-multipart" },
{ name = "redis" }, { name = "redis" },
{ name = "sqlalchemy" },
{ name = "tenacity" }, { name = "tenacity" },
{ name = "uvicorn", extra = ["standard"] }, { name = "uvicorn", extra = ["standard"] },
] ]
@@ -68,7 +67,6 @@ requires-dist = [
{ name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "python-multipart", specifier = ">=0.0.6" }, { name = "python-multipart", specifier = ">=0.0.6" },
{ name = "redis", specifier = ">=4.5.0" }, { name = "redis", specifier = ">=4.5.0" },
{ name = "sqlalchemy", specifier = ">=2.0.0" },
{ name = "tenacity", specifier = ">=8.2.0" }, { name = "tenacity", specifier = ">=8.2.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.23.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.23.0" },
] ]
@@ -815,7 +813,7 @@ name = "exceptiongroup"
version = "1.3.1" version = "1.3.1"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "typing-extensions", marker = "python_full_version < '3.13'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
wheels = [ wheels = [
@@ -959,76 +957,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
] ]
[[package]]
name = "greenlet"
version = "3.5.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/6e/802acd792aebb2256fbbee8cacf2727faaeb6f240ac11008f09eae4414bc/greenlet-3.5.1.tar.gz", hash = "sha256:5a56aeb7d5d9cc4b3a735efb5095bd4b4f6f0e4f93e5ca876d0e2315137b7829", size = 197356, upload-time = "2026-05-20T15:05:03.917Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1d/21/117c8710abb7f146d804a124c07eb5964a60b90d02b72452885aecc18efa/greenlet-3.5.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7eacb17a9d41538a2bc4912eba5ef13823c83cb69e4d141d0813debe7163187f", size = 283510, upload-time = "2026-05-20T13:12:26.475Z" },
{ url = "https://files.pythonhosted.org/packages/b9/f7/6762a56fa5f6c2295c449c6524e10ce481e381c994cc44d9d03aef0700fb/greenlet-3.5.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e5cc9606aa5f4e0bde0d3bd502b44f743864c3ffa5cfa1011b1e30f5aa02366f", size = 599696, upload-time = "2026-05-20T14:00:02.906Z" },
{ url = "https://files.pythonhosted.org/packages/0f/05/85a511e68ee109aff0aa00b4b497806091dd2d82ce209e49c6e801bd5d92/greenlet-3.5.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c3d35f87c7253b715d13d679e0783d845910144f282cb939fe1ba4ac8616269c", size = 612618, upload-time = "2026-05-20T14:05:39.202Z" },
{ url = "https://files.pythonhosted.org/packages/89/b8/8b83d18ae07c46c019617f35afd7b47aab7f9b4fbb12fc637d681e10bdd8/greenlet-3.5.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:540dae7b956209af4d70a3be35927b4055f617763771e5e84a5255bea934d2f5", size = 612947, upload-time = "2026-05-20T13:14:23.469Z" },
{ url = "https://files.pythonhosted.org/packages/5d/14/ad1f9fc9b82384c010212464a3702bd911f95dab2f1180bc6fbcfb1f958c/greenlet-3.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed8cdb691169715a9a492844a83246f090182247d1a5031dc78a403f68ba1e97", size = 1571425, upload-time = "2026-05-20T14:02:22.671Z" },
{ url = "https://files.pythonhosted.org/packages/46/1c/43b8203cf10f4292c9e3d270e9e5f5ade79115a0a0ca5ea6f1be5f8915a7/greenlet-3.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d59e840387076a51016777a9328b3f2c427c6f9208a6e958bad251be50a648d", size = 1638688, upload-time = "2026-05-20T13:14:30.026Z" },
{ url = "https://files.pythonhosted.org/packages/ac/6e/0344b1e99f58f71715456e46492101fd2daa408957b8186ade0a4b515da7/greenlet-3.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:b9152fca4a6466e114aaec745ae61cba739903a109754a9d4e1262f01e9259b1", size = 237763, upload-time = "2026-05-20T13:11:35.659Z" },
{ url = "https://files.pythonhosted.org/packages/42/3c/ff890b466eaba2b0f5e6bdfff025f8c75f41b8ffdc3dbc3d24ad261e764a/greenlet-3.5.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:73f78f9b9f0a5c06e5c946ba1e8e36f5114923b6be109ee618c54f079c3ea14f", size = 284764, upload-time = "2026-05-20T13:09:10.204Z" },
{ url = "https://files.pythonhosted.org/packages/81/0e/5e5457be3d256918f6a4756f073548a3f0190836e2cc94aa6d0d617a940b/greenlet-3.5.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0cbed8bb44e23c5b199f888f4e4ce096b45ad9f25ff74a7ad0213875e936bb2", size = 603479, upload-time = "2026-05-20T14:00:04.757Z" },
{ url = "https://files.pythonhosted.org/packages/6d/e1/f89a21d58d308298e6f275f13a1b472ed96c680b601a371b08be6a725989/greenlet-3.5.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a203a8bd0acb0701653d3bbb26e404854a68674139ed5cbb778830f42b09bb33", size = 615495, upload-time = "2026-05-20T14:05:40.87Z" },
{ url = "https://files.pythonhosted.org/packages/75/de/af6cef182862d2ccd6975440d21c9058a77c3f9b469abf94e322dfd2e0e3/greenlet-3.5.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a271fcd66c74615cda6a964fda3f304267a12e50a084472218a39bb0376f563", size = 614754, upload-time = "2026-05-20T13:14:24.947Z" },
{ url = "https://files.pythonhosted.org/packages/1a/c6/50e520283a9f19388a7326b05f9e8637e566003475eacaadad04f558c68d/greenlet-3.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ded7b068c7c31c1a8657d4fd42d886b3e051ae29f88b80c5ff9d502257b0f071", size = 1574097, upload-time = "2026-05-20T14:02:24.003Z" },
{ url = "https://files.pythonhosted.org/packages/21/1c/13abd1f4860d987fa5e1170a01930d6e6cd40d328de487a3c9fdaff0ffd0/greenlet-3.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0932b81d72f552ded9d810d00021b64d89f2195a91ce115b893f943b7a4ab3c", size = 1641058, upload-time = "2026-05-20T13:14:31.83Z" },
{ url = "https://files.pythonhosted.org/packages/f5/56/5f332b7705545eac2dc01b4e9254d24a793f2656d55d5cc6b94ee59d22ae/greenlet-3.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:88e300d136eac057b2397aa1cfd7328b4c87c7eb66a09c7bc6a1292234db474e", size = 238089, upload-time = "2026-05-20T13:14:03.229Z" },
{ url = "https://files.pythonhosted.org/packages/d9/a9/a3c2fa886c5b94863fb0e61b3bc14610b7aa94cf4f17f8741b11708305fc/greenlet-3.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:cc6ab7e555c8a112ad3a76e368e86e12a2754bcae1652a5602e133ec7b635523", size = 234989, upload-time = "2026-05-20T13:08:27.715Z" },
{ url = "https://files.pythonhosted.org/packages/c4/37/4549f149c9797c21b32c2683c33522af22522099de128b2406672526d005/greenlet-3.5.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:fa4f98af3a528f0c3fd592a26df7f376f93329c8f4d987f6bb979057af8bf5e2", size = 286220, upload-time = "2026-05-20T13:07:28.463Z" },
{ url = "https://files.pythonhosted.org/packages/38/ff/a4f436709716965eaab9f36ea7b906c8a927fbe32fb1372a2071d964f6b1/greenlet-3.5.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffea73584b216150eab159b6d12348fb253e68757974de1e2c40d8a318ac89ed", size = 601585, upload-time = "2026-05-20T14:00:06.141Z" },
{ url = "https://files.pythonhosted.org/packages/65/ad/54bc3fcee3ad368a61b19b67d88117f7a8c29727bf71fffdeda81fbd946e/greenlet-3.5.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1072b4f9edcc1e192d9283a66a3e68d6b84c561de33a83d7858beb9ba1effe10", size = 614215, upload-time = "2026-05-20T14:05:42.675Z" },
{ url = "https://files.pythonhosted.org/packages/40/69/b91cda0647df839483201545913514c2827ebea5e5ccdf931842763bc127/greenlet-3.5.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:add5217d68b31130f0beca584d7fef4878327d2e31642b66618a14eef312b63b", size = 611358, upload-time = "2026-05-20T13:14:26.37Z" },
{ url = "https://files.pythonhosted.org/packages/59/90/3cf77e080350cd02fa307bb2abf05df48f4482c240275bbd2c203ba8bb1c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5ea42a752d47a145eae922b605cd1634665ac3d5ec1e72402d5048e8d60d207", size = 1570475, upload-time = "2026-05-20T14:02:25.29Z" },
{ url = "https://files.pythonhosted.org/packages/65/2c/18cece62045e74598c3c393f70dce4a63f56222015ba29a5d4eeb04f764c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5551170cf4f5ff5623e9af81323751979fee2c731e2287b61f73cd27257b823", size = 1635625, upload-time = "2026-05-20T13:14:34.027Z" },
{ url = "https://files.pythonhosted.org/packages/30/f5/310d104ddf41eb5a70f4c268d22508dfb0c3c8e86fec152be34d0d2ed819/greenlet-3.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c8bb982ad117d29478ef8f5533e97df21f1e2befd17a299257b0c96d1371c0b", size = 238791, upload-time = "2026-05-20T13:10:39.018Z" },
{ url = "https://files.pythonhosted.org/packages/62/90/ceca11f504cd23a8047a3dea31919adc48df9b626dd0c13f0d858734fdfd/greenlet-3.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:80eb4b04dadc4e67df3fae179a32c4706a3f495bc7f22fc8a81115d5f5512188", size = 235580, upload-time = "2026-05-20T13:08:45.056Z" },
{ url = "https://files.pythonhosted.org/packages/27/69/7f7e5372d998b81001899b1c0823c957aa413ba0f2662e65821611cc31e4/greenlet-3.5.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:51518ff74664078fc51bffcc6fc529b0df5ae58da192691cee765d45ce944a2b", size = 285060, upload-time = "2026-05-20T13:08:51.899Z" },
{ url = "https://files.pythonhosted.org/packages/b1/bf/387f9b6b865fd2ae0d0be09e0004827295a01b71be76ed350dd1e28a91a4/greenlet-3.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ffdb3c0bb002c99cd8f298957e046c3dbf6006b5b7cdf11a4e19194624a0a0a", size = 604370, upload-time = "2026-05-20T14:00:07.492Z" },
{ url = "https://files.pythonhosted.org/packages/32/f5/169ce3d4e4c67291bd18f8cbe0299c9f3e45102c7f1fb3c14780c93e4532/greenlet-3.5.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7715a5a2c3378ba602c3a440558261e13a820bb53a82693aacd7b7f6d964e283", size = 616987, upload-time = "2026-05-20T14:05:44.237Z" },
{ url = "https://files.pythonhosted.org/packages/ee/e5/7f2e41d5273be07e77560d61ea4e56485b4d6c316d2a84518c62d1364061/greenlet-3.5.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc71ff466927a201b08305acac451ebe1aedfcea002f62f1f2f2ac2ac1e6a135", size = 613911, upload-time = "2026-05-20T13:14:27.539Z" },
{ url = "https://files.pythonhosted.org/packages/c5/a4/fbdc67579b73615a1f91615e814303cc71e06128f7baaba87be79b8fb90c/greenlet-3.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cd443683db272ebaaca03af98c0b063ab30db70ea8a31a1559f35e3f7b744ccd", size = 1570689, upload-time = "2026-05-20T14:02:27.225Z" },
{ url = "https://files.pythonhosted.org/packages/e6/b4/77abbe35078be39718a46cd49caf16bceb35662f97a34101dca28aa98e47/greenlet-3.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:089fff7a6ce8d9316d1f65ebc00273a56be258c1725b32b94de90a3a979557e1", size = 1635602, upload-time = "2026-05-20T13:14:36.344Z" },
{ url = "https://files.pythonhosted.org/packages/37/f7/129f27ca700845b8ee8ca88ce7f43435a1239c2eddb7677fc938822762cf/greenlet-3.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:110a1ca7b49b014b097f6078272c3f4ed31af45b254de5228b79adba879f6af9", size = 238683, upload-time = "2026-05-20T13:11:50.57Z" },
{ url = "https://files.pythonhosted.org/packages/6d/5c/a485a36e87df8d8fd0632ee01511244f5156a20ed3746cc6599340326395/greenlet-3.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:f16ba1efc0715b680a18b8123d90dad887c6112ae3555b4b5c32c149540c6b4e", size = 235499, upload-time = "2026-05-20T13:12:42.028Z" },
{ url = "https://files.pythonhosted.org/packages/8a/cb/c62454606daf5640369c94d8a9dd540599b1bfc090e2d2180cb77f4038d2/greenlet-3.5.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8ab31c9de8651a2facdd5c5bb0011f2380dd1a7af78ce2adf4b56095294fc07", size = 285579, upload-time = "2026-05-20T13:08:56.396Z" },
{ url = "https://files.pythonhosted.org/packages/ec/71/c4270398c2eba968a6071af1dfbdcaeee6ec1c24bc8b435b8cc452700da6/greenlet-3.5.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e300185139abc337ade480c327183adf42a875ac7181bfe66d7d4efea31fbea", size = 651106, upload-time = "2026-05-20T14:00:09.448Z" },
{ url = "https://files.pythonhosted.org/packages/1a/ab/71e34b78a44ec271fb5f550c17bc46d301ddc5953890d935f270b0dcdb5a/greenlet-3.5.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7ffdb990dcaa0234cf9845aead5df2e3c3a8b6507d409274dd87e0d5ab05ffc2", size = 663478, upload-time = "2026-05-20T14:05:45.88Z" },
{ url = "https://files.pythonhosted.org/packages/77/96/4efd6fa5c62c85426a0c19077a586258ebc3a2a146ff2493e4312a697a22/greenlet-3.5.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f82b3597e9d83b63408affed0b48fd0f54935edac4302237b9a837be0dae33c", size = 660800, upload-time = "2026-05-20T13:14:29.129Z" },
{ url = "https://files.pythonhosted.org/packages/7a/e0/6c71401a25cac7000261304e866a2f2cc04dc74810d40e2f118aa4799495/greenlet-3.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c0141e37414c10164e702b8fb1473304221ad98f71600850c6ef7ff4880feba0", size = 1617518, upload-time = "2026-05-20T14:02:28.662Z" },
{ url = "https://files.pythonhosted.org/packages/41/26/c5c06643e8c0af9e7bf18e16cb51d0ab7625155f0392e1c9015d66d556cd/greenlet-3.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:50ae25a67bea74ea41fb14b960bc532df73eb713417b2d61892dced82fe8d3bc", size = 1681593, upload-time = "2026-05-20T13:14:39.417Z" },
{ url = "https://files.pythonhosted.org/packages/8a/bd/e11a108317485075e68af9d23039619b86b28130c3b50d227d42edece64b/greenlet-3.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:8a17c42330e261299766b75ac1ea32caa437a9453c8f65d16a13140db378ecd3", size = 239800, upload-time = "2026-05-20T13:09:30.128Z" },
{ url = "https://files.pythonhosted.org/packages/47/f8/8e8e8417b7bf28639a5a56356ef934d0375e1d0c70a57e04d7701e870ffe/greenlet-3.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:7b5f5fae05b8ac6d176a61b60c394a8cbdc2b5b91b81793066e68745cf165e54", size = 236862, upload-time = "2026-05-20T13:09:10.498Z" },
{ url = "https://files.pythonhosted.org/packages/90/12/41bf27fde4d3605d3773ae57751eda182b8be2f5398011c041173b1d9534/greenlet-3.5.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:ea8da1e900d758d078810d4255d8c6aa572181896a31ec79d779eb79c3adc9ad", size = 293637, upload-time = "2026-05-20T13:12:35.529Z" },
{ url = "https://files.pythonhosted.org/packages/44/44/ba14b23e9757707050c2f397d305bbcae62e5d7cad122f8b6baec5ae4a1f/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a19570c52a21420dcbc94e661994bc325c0b5b11304540fed514586da5dc8f2e", size = 650840, upload-time = "2026-05-20T14:00:11.079Z" },
{ url = "https://files.pythonhosted.org/packages/a8/37/5ddc2b686a6844f91abecef43411842426da2e1573f60b49ecf2547f4ae1/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3d955c89b75eeca4723d7cc14135f393cd47c32e2a6cb4a8e4c6e760a26b0986", size = 656416, upload-time = "2026-05-20T14:05:47.118Z" },
{ url = "https://files.pythonhosted.org/packages/e1/f0/d17510297c35a2992712f0bf84de3779749999f7d3d63aa1f09db7c62dbe/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2daaaebd1a5aa88c49045b6baf9310b3263796bd88db713edf37cf53e7bb4e", size = 654397, upload-time = "2026-05-20T13:14:30.696Z" },
{ url = "https://files.pythonhosted.org/packages/37/eb/147387705bb89092645b012586e7273cb5ed3c90ef7eaf3a69173eaf0209/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bfbd69cc349e43bf3a8ae1c85548ff0718efc887615c2db16c3833d7b0b072d", size = 1614469, upload-time = "2026-05-20T14:02:30.192Z" },
{ url = "https://files.pythonhosted.org/packages/a6/4e/37ee0da7732b7aa9896f17e15579a9df34b9fcb9dd494f0adfa749af6623/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4378720dd888136c27215a0214d32a4d37c3852765d45bc37aad0623423cfd78", size = 1675115, upload-time = "2026-05-20T13:14:40.972Z" },
{ url = "https://files.pythonhosted.org/packages/57/f3/97dfcf4a6eb5077f8a672234216fb5923eb89f2cab7081cb10b2cf75b605/greenlet-3.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:45718441607f9325d948db98cbc691276059316d0358c188c246da4e1d4d23d2", size = 245246, upload-time = "2026-05-20T13:12:22.646Z" },
{ url = "https://files.pythonhosted.org/packages/5d/73/d7f72e34b582f694f4a9b248162db7b09cc458a259ba8f0c0bfa1a34ea7d/greenlet-3.5.1-cp315-cp315-macosx_11_0_universal2.whl", hash = "sha256:2baee5ca02031757ffe8cc3d69f0cc0aec7065ce362622da74f32d3bcab1c541", size = 285575, upload-time = "2026-05-20T13:12:07.043Z" },
{ url = "https://files.pythonhosted.org/packages/df/59/fa9c6e87dc8ad27a95dabe2f29f372b733d05a8a67470f6c901ed9975655/greenlet-3.5.1-cp315-cp315-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b1ec3274918a81d3ea778b9e75b56b72b33f300edb6cf7f3a7fe1dae56683de", size = 656428, upload-time = "2026-05-20T14:00:12.556Z" },
{ url = "https://files.pythonhosted.org/packages/f6/f9/e753408871eaa61dfe35e619cfc67512b036fde99893685d50eea9e07146/greenlet-3.5.1-cp315-cp315-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:111e2390ffffc47d5840b01711dd7fac07d4c09283d0283e7f3264b14e284c64", size = 667064, upload-time = "2026-05-20T14:05:48.662Z" },
{ url = "https://files.pythonhosted.org/packages/96/27/5565b5b40389f1c7753003a07e21892fda8660926787036d5bc0308b8113/greenlet-3.5.1-cp315-cp315-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e630136e905fe5ff43e86945ae41220b6d1470956a39220e708110ac48d01ea5", size = 665697, upload-time = "2026-05-20T13:14:32.943Z" },
{ url = "https://files.pythonhosted.org/packages/cf/82/e7de4178c0c2d1c9a5a3be3cc0b33e46a85b3ee4a77c071bf7ad8600e079/greenlet-3.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:975eac34b44a7077ca4d421348455b94f0f518246a7f14bc6d2fdcfe5b584368", size = 1621256, upload-time = "2026-05-20T14:02:31.91Z" },
{ url = "https://files.pythonhosted.org/packages/00/10/f2dddcf7dacac17dfc68691809589adad06135eb28930429cf58a6467a2f/greenlet-3.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:9ab3c3a0b2ae6198e67c898dad5215a49f9ae0d0081b3c3ec59f333e39eeca26", size = 1685956, upload-time = "2026-05-20T13:14:42.55Z" },
{ url = "https://files.pythonhosted.org/packages/22/17/4a232b32133230ada52f70e9d7f5b65b0caef8772f01849bd8d149e7e4ca/greenlet-3.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:cbfc69be86e10dcfef5b1e6269d1d6926552aa89ee39e1de3353360c1b6989ab", size = 239802, upload-time = "2026-05-20T13:13:15.481Z" },
{ url = "https://files.pythonhosted.org/packages/c2/ae/4e623a7e6d4d2a5f4cb8e4c82de4169fc637942caae68d6e676b8a128ac5/greenlet-3.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:92fd6d44ac5e5a887c8a5dc4a8ba0ba908527c31c12f78c6bc7dcfe8aab279f6", size = 236853, upload-time = "2026-05-20T13:15:37.301Z" },
{ url = "https://files.pythonhosted.org/packages/7a/57/816d9cff29119da3505b3d6a5e14a8af89006ac36f47f891ff293ee05af1/greenlet-3.5.1-cp315-cp315t-macosx_11_0_universal2.whl", hash = "sha256:a6fdf2433a5441ef9a95464f7c3e674775da1c8c1177fff311cee1acad4626ed", size = 293877, upload-time = "2026-05-20T13:10:19.078Z" },
{ url = "https://files.pythonhosted.org/packages/23/a1/59b0a7c7d140ff1a75626680b9a9899b79a9176cab298b394968fb023295/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7546556f0d649f99f6a361098a55f761181bb2ea12ff150bb16d26092ad88244", size = 655333, upload-time = "2026-05-20T14:00:14.758Z" },
{ url = "https://files.pythonhosted.org/packages/72/1b/5efe127597625042218939d01855109f352779050768b670b52edcc16a6c/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d5ee3ea898009fa898f85f9982255d35278c477bebe185beca249cab42d4526c", size = 659443, upload-time = "2026-05-20T14:05:50.159Z" },
{ url = "https://files.pythonhosted.org/packages/6c/6d/c404246ea4d22d097a7426d0efb5b781bd7eb67715f09e79001bd552ab18/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5c81f74d204d3edd136ebfd50dce53acbb776995d721a0fe801626cfc93b8cd", size = 658356, upload-time = "2026-05-20T13:14:35.091Z" },
{ url = "https://files.pythonhosted.org/packages/51/02/f8ee37fb6d2219329f350af241c27fcf12df57e723d11f6fc6d3bacdadaa/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:2c18ef16bf6d4dd410e4dd52996888ea1497be26892fe5bbc73580aba4287b8e", size = 1619216, upload-time = "2026-05-20T14:02:33.403Z" },
{ url = "https://files.pythonhosted.org/packages/93/c5/3dc9475ace2c7a3680da12372cddd7f1ac874eb410a1ac48d3e9dab83782/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:17d86354f0ae6b61bf9be5148d0dd34e06c3cb7c602c671f79f29ac3b150e659", size = 1678427, upload-time = "2026-05-20T13:14:43.71Z" },
{ url = "https://files.pythonhosted.org/packages/df/4e/750c15c317a41ffb36f0bf40b933e3d744a7dede61889f74443ea69690cf/greenlet-3.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:e7516cf6ae6b8a582c2770a0caed47b8a48373ed732c33d69a72913ae6ac923e", size = 245225, upload-time = "2026-05-20T13:13:59.366Z" },
{ url = "https://files.pythonhosted.org/packages/4f/fd/d3baea2eeb7b617efd47e87ca06e2ec2c6118d303aa9e918e0ce16eadc10/greenlet-3.5.1-cp315-cp315t-win_arm64.whl", hash = "sha256:5028648bf2253ec4745add746129d3904121fa7fe871a76bed23c5720573ce0a", size = 239590, upload-time = "2026-05-20T13:13:37.382Z" },
]
[[package]] [[package]]
name = "grpcio" name = "grpcio"
version = "1.80.0" version = "1.80.0"
@@ -2756,61 +2684,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
] ]
[[package]]
name = "sqlalchemy"
version = "2.0.50"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/57/da/6fbf010c8ebb347679d0d100b22fe9ba5e13fd04046c5df7280d2f0bf706/sqlalchemy-2.0.50.tar.gz", hash = "sha256:af5607d11ef90fd6a5c0549fe0045dce1663d427426bcfb506dcb5346a85a3b9", size = 9907424, upload-time = "2026-05-24T19:20:04.018Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/a9/812a775bd8c1af0966d660238d005baf25e9bced1f038c8e71f00aa637a7/sqlalchemy-2.0.50-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af6eeb84985bf840ba779018ff9424d61ff69b52e66b8789d3c8da7bf5341b2", size = 2161617, upload-time = "2026-05-24T20:00:00.761Z" },
{ url = "https://files.pythonhosted.org/packages/d5/74/5a6bc5496e9be8f740fbf80f9e6bd4ab965c8a80870eb07ab015e360957a/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fe7822866f3a9fc5f3db21a290ce8961a53050115f05edf9402b6a5feb92a9f", size = 3244104, upload-time = "2026-05-24T20:07:38.158Z" },
{ url = "https://files.pythonhosted.org/packages/81/55/b260d8df2adc9bb0bf294f67b5f802ff0d84d99442b536b9efd0ea72d447/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e1b0f6a4dcd9b4839e2320afb5df37a6981cbc20ff9c423ae11c5537bdbd21", size = 3243039, upload-time = "2026-05-24T20:14:23.765Z" },
{ url = "https://files.pythonhosted.org/packages/e5/6d/58714005cbf370f16c3f30d30324a43be10069efcfe764f7236a2e851947/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e195687f1af431c9515416288373b323b6eb599f774409814e89e9d603a56e39", size = 3195017, upload-time = "2026-05-24T20:07:40.086Z" },
{ url = "https://files.pythonhosted.org/packages/30/e8/67527fee039bd3e1a6ce3f03d2b62fd87ab9099c17052810d79496727b66/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ea1a8a2db4b2217d456c8d7a873bfc605f06fe3584d315264ea18c2a17585d0b", size = 3215308, upload-time = "2026-05-24T20:14:26.034Z" },
{ url = "https://files.pythonhosted.org/packages/94/b2/dd3155a6a6706cb89adecf5ee6e0512f7b0ee5cf3e6f4cde67d3c20ebfda/sqlalchemy-2.0.50-cp310-cp310-win32.whl", hash = "sha256:68b154b08088b4ec32bb4d2958bfbb50e57549f91a4cd3e7f928e3553ed69031", size = 2121637, upload-time = "2026-05-24T20:08:06.401Z" },
{ url = "https://files.pythonhosted.org/packages/93/a1/a09c463ee3e7764b5ce5bd19a7f0b6eefbde62e637439ab58498cdbd6b47/sqlalchemy-2.0.50-cp310-cp310-win_amd64.whl", hash = "sha256:66e374271ecb7101273f57af1a62446a953d327eec4f8089147de57c591bbacc", size = 2144673, upload-time = "2026-05-24T20:08:07.936Z" },
{ url = "https://files.pythonhosted.org/packages/b6/5d/3172686af1770e4de2805f919a51441085f589ddadf3dd76ec582f84f497/sqlalchemy-2.0.50-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1aa6e403663a9c43c8fef7ce4bdb4cf48bcd8d352e91deda2a99f963270bd508", size = 2161366, upload-time = "2026-05-24T20:00:02.061Z" },
{ url = "https://files.pythonhosted.org/packages/0f/90/e98dedea3c3e663a17afcd003a34ba45efdac2cea3b6f2e4585e2b1e2537/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51b637a84f9fa35ae1f9017e786cb142974a25305085e1b378b3647a67f65ad3", size = 3318926, upload-time = "2026-05-24T20:07:42.369Z" },
{ url = "https://files.pythonhosted.org/packages/3b/4f/501308c2babb62c11753ecb4ee88ba9eef019419a4d6cbf7cb13e2bad353/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2dab927761d9108550f0cf8e66ff21af56f907a0ce0a689793db615e2b55f62c", size = 3319199, upload-time = "2026-05-24T20:14:28.551Z" },
{ url = "https://files.pythonhosted.org/packages/ac/39/d88996c5e03ed6248c3a788d20f0b8d8b376b9f8a495e4bab9df7c72d2f8/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:545eae198d37bcf837a10ede3684e2af32458d6f35c597c35c2de7502dc38fc4", size = 3270301, upload-time = "2026-05-24T20:07:44.917Z" },
{ url = "https://files.pythonhosted.org/packages/42/1b/1ae0e65161b51cc43e5ca75430ef79d80e23b5042d645586c2c342c3b92e/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fec460e18cdbb4c7773531122ce9a27e96c6ca17af3933941d94da475ad2c86", size = 3293465, upload-time = "2026-05-24T20:14:30.501Z" },
{ url = "https://files.pythonhosted.org/packages/83/29/17c0003f2c0dfa6d1b97672475707e3ec5980db09defd7fa20beb6833bbd/sqlalchemy-2.0.50-cp311-cp311-win32.whl", hash = "sha256:e6e814658818fd165e749e3d8490ef16cc7f379a118c37ada8b0589ffbaaac22", size = 2120694, upload-time = "2026-05-24T20:08:09.237Z" },
{ url = "https://files.pythonhosted.org/packages/c9/18/280d00654cc19d1fccf236fa5070f6dd04b84dde6f1b2e637bde0ff340a7/sqlalchemy-2.0.50-cp311-cp311-win_amd64.whl", hash = "sha256:1c5f858fe79c9f5d8fda065c06186356acb7f8df3cd52dbd5ee3f200e4b144f5", size = 2145315, upload-time = "2026-05-24T20:08:10.952Z" },
{ url = "https://files.pythonhosted.org/packages/be/b0/a9d19b43f38f878b1278bca5b00b909f7540d41494396dd2561f9ad0956d/sqlalchemy-2.0.50-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae23d8b9d344d30d0a92f06d45825024a5790f1c1dd4cf452636a50d3e58cb", size = 2159807, upload-time = "2026-05-24T19:27:53.086Z" },
{ url = "https://files.pythonhosted.org/packages/f5/2c/191dd58a248fd2cfd4780fa82c375c505e4ad98c8b522fa69ec492130d77/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47b71b933e7b4ebad407c8fdfd70d2c4f08b78b3238bb30eebdd6eb32ca51b89", size = 3343358, upload-time = "2026-05-24T20:09:29.279Z" },
{ url = "https://files.pythonhosted.org/packages/8a/2b/514fce8a7df81cf5bad7ff7865de7ac0c5776a38cc043475c4703eb7fe8b/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:110fdac56ace278949f00de805edacbd6141e382d992f9ba28238b3a0827a600", size = 3357994, upload-time = "2026-05-24T20:17:13.495Z" },
{ url = "https://files.pythonhosted.org/packages/35/a6/a0e283f5494f92b0d77e319ff77e437b1ffe4a051ba67c81d53234825475/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5e4ac70e9e757f6b3e87c0491ff034442ecd8dfd36d041a50564c322dafc0e", size = 3289399, upload-time = "2026-05-24T20:09:32.239Z" },
{ url = "https://files.pythonhosted.org/packages/b7/96/1b07325ba71752d6a028b77d07bed1483ad545f794e8b1dc89b3ba3b3c68/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:724f3dcbe53dd0151e3cb5e7ec4ba4c620bede579caacd16275dc35ce06e8615", size = 3321216, upload-time = "2026-05-24T20:17:15.581Z" },
{ url = "https://files.pythonhosted.org/packages/ed/8e/bad6ed253e8a99edfc99af02f7173ec48a1d3ed1b9b35a1b8bc1700900cc/sqlalchemy-2.0.50-cp312-cp312-win32.whl", hash = "sha256:1208050441471d003b7c8cb4054fb084f185cf35ac3f0ea270803865bca9939a", size = 2119194, upload-time = "2026-05-24T19:50:04.943Z" },
{ url = "https://files.pythonhosted.org/packages/b6/2d/314a6690dda4b9cfc571eab1a63cf6fe6e1470aa3759ccda6aa016ee0f5a/sqlalchemy-2.0.50-cp312-cp312-win_amd64.whl", hash = "sha256:9d1af51558029a156a70986b7df88f042b3d158d7c8d8fb5072912d4b32d89c7", size = 2146186, upload-time = "2026-05-24T19:50:06.74Z" },
{ url = "https://files.pythonhosted.org/packages/0b/c4/c42356b527296e9862f67990efce31ef78b4cf69cd3f80873a528a060320/sqlalchemy-2.0.50-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:06a9210bdc5f4298cff0781087e2ff45683922252dacc452846373a58761f093", size = 2156697, upload-time = "2026-05-24T19:27:54.764Z" },
{ url = "https://files.pythonhosted.org/packages/60/a1/b1a70e3c4365ac7fe9e347f3710f19b562c866fb96d45e3c891588789a7b/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b53784972ade4f8174b9aa661f31a06f8a936d2cfdd602913ff3c6dd40ae873", size = 3284260, upload-time = "2026-05-24T20:09:34.195Z" },
{ url = "https://files.pythonhosted.org/packages/3f/4a/f3ac3caa19f263d57b0a47f8c91bbf56583dc2d3fc63acfbf644abb24fe0/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31648fa14460537e768a7303b078e4344d208e0d23e06867c1f376a227ed82db", size = 3302280, upload-time = "2026-05-24T20:17:17.825Z" },
{ url = "https://files.pythonhosted.org/packages/66/55/ccada3e3d62254587819749a0bc69f41173eb48a6e385d10e66d32a9c88e/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:03f4323c980ad0e918cc9e5369b015f759f4e534db5bbaf4dc36832c10d05064", size = 3231580, upload-time = "2026-05-24T20:09:36.406Z" },
{ url = "https://files.pythonhosted.org/packages/05/f6/6809349130a2de0e109e7f00fd7d431da9565b9b2868b32ee684754f672b/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2b9dcc43afef8ac157cd92fce96985d6b8b0cfbd3df4d666f66b4d55a75d202f", size = 3269375, upload-time = "2026-05-24T20:17:20.34Z" },
{ url = "https://files.pythonhosted.org/packages/48/84/278a811ef4e07be9c89dc5cdd7be833268509a66a68c4897cf585e67428f/sqlalchemy-2.0.50-cp313-cp313-win32.whl", hash = "sha256:60922d6599065ddca2c6f376b9aa2f41a6b85a271725e0909490bbc50b1998a5", size = 2117229, upload-time = "2026-05-24T19:50:08.215Z" },
{ url = "https://files.pythonhosted.org/packages/f6/1c/067cc6187ed32d2ec222fe6d2643acc1659a6d0659f8a7cbc5ad3ae83280/sqlalchemy-2.0.50-cp313-cp313-win_amd64.whl", hash = "sha256:287086e67275a212c4582d166a6fb03a65ccc5551d80866270ce0dd9f34eccd3", size = 2143126, upload-time = "2026-05-24T19:50:09.691Z" },
{ url = "https://files.pythonhosted.org/packages/df/32/10ac51b4be7cdecd7e93d069251c86dfbf70b7adbd7c67b48ccea6c49e1c/sqlalchemy-2.0.50-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c966932507a4d7d0a37314927dbfcd89720e3f37d2a1e3352e7ae7939fa8e8a0", size = 2158519, upload-time = "2026-05-24T19:27:56.472Z" },
{ url = "https://files.pythonhosted.org/packages/5a/76/e703d2f7681d7d66c4c891af3f07c7ccf4c76ad7f18351de035b5eda007a/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:faffef4bcc20a1892e65e155293d99d60855bbbc79250ab712819cfd56a8e6bb", size = 3282063, upload-time = "2026-05-24T20:09:38.57Z" },
{ url = "https://files.pythonhosted.org/packages/31/26/ef168b184a25701f9995e8fb7e503fafd7a99c1c77cda1bc1a26ea2ed486/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c206aec519a2e7bd08abbfb33436e325fd22c632d9c21a9047e376ce241646e", size = 3287069, upload-time = "2026-05-24T20:17:21.942Z" },
{ url = "https://files.pythonhosted.org/packages/c2/15/765acc2bc693bccc43ca4a95d5b69750da8aaf6db1b5c616536e087f8920/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bef4ac756363227ef6402a75fee025a4bc690f92328e825868939b3b3a446a6d", size = 3230453, upload-time = "2026-05-24T20:09:40.398Z" },
{ url = "https://files.pythonhosted.org/packages/63/61/08e03c3adbf5db0087a0b6816746fec8f3032fb2f7fc899a9bb9b2a48ce4/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:96fbee6b19c19cd1556c8bf9419447cf2ec149ffcab7ab64348c23e54ef8547f", size = 3252413, upload-time = "2026-05-24T20:17:24.067Z" },
{ url = "https://files.pythonhosted.org/packages/03/0c/370a1f2db38436c615e10134c8a37de3688e74084792380695f3f5083860/sqlalchemy-2.0.50-cp314-cp314-win32.whl", hash = "sha256:8f00e3eb43ba30eb1b238ee03a8a62309486d1321eda3328bb611e0340033ad8", size = 2120063, upload-time = "2026-05-24T19:50:11.08Z" },
{ url = "https://files.pythonhosted.org/packages/7f/a0/fe92bb9817863bc13ba093bda931979a26cc2ca69f8e8f26d07add3d7c6f/sqlalchemy-2.0.50-cp314-cp314-win_amd64.whl", hash = "sha256:15708c613cd5005b7dffe1f66ee6a63ee8f5e46799f71c70ebad74178c676a39", size = 2145830, upload-time = "2026-05-24T19:50:12.452Z" },
{ url = "https://files.pythonhosted.org/packages/cc/ff/e5640a98a0b2f491eb8fde10fb6c773621a2e44340de231fafcc9370f4a9/sqlalchemy-2.0.50-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3699dac4be410e97049a1658e9480da9cde956594aa0f3aebc60b88f21c5ba70", size = 2178435, upload-time = "2026-05-24T19:42:58.889Z" },
{ url = "https://files.pythonhosted.org/packages/b7/85/337116e186f1236375b5fb70c21cfac98e8e8ab0d3a47be838dc47a59e08/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f96233858e3df43932ac11589e22520da6e8aeb624b03fedfeebb0e8ea213086", size = 3566059, upload-time = "2026-05-24T20:01:20.848Z" },
{ url = "https://files.pythonhosted.org/packages/96/34/bb0e190e161c3c2c24314a65add57218be14a4a9486886b7f5047c1ff7c8/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4e70c46fad30c3bcc6a4708bc0130a3173e11a5b25f0ea4a9d8911b450f1f52", size = 3535366, upload-time = "2026-05-24T20:03:56.768Z" },
{ url = "https://files.pythonhosted.org/packages/df/5a/a7f759f97e4fd499c5d4e4488c760d5a7fbecf3028b465a04274fcd52384/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1918a3cf564d16d95bca7301005f41ab2ad50b07cd3b9da50d3ed986db148d6a", size = 3474879, upload-time = "2026-05-24T20:01:23.058Z" },
{ url = "https://files.pythonhosted.org/packages/9d/d9/2907ea38eb60687d297bf9c39e5ee58053c87b57fe8a9cae97090cecbf10/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b00098cdbdbd38c7be3d568b0c9c3122b8c0ec62b911b57cd5e6e0254d60a76d", size = 3486117, upload-time = "2026-05-24T20:03:59.052Z" },
{ url = "https://files.pythonhosted.org/packages/f2/e3/5aa06f167559f8c0bdae487e297d23ba548150ab016a3418265d617a4985/sqlalchemy-2.0.50-cp314-cp314t-win32.whl", hash = "sha256:1fbd55a969d7ac44a98e3dec75016074f809fa08f871585ace58dde110d1bf3e", size = 2150823, upload-time = "2026-05-24T20:08:58.644Z" },
{ url = "https://files.pythonhosted.org/packages/65/9b/112fb8f977582d7489d036e409e3723948bcf5320b3ac465f3c481bbe8f9/sqlalchemy-2.0.50-cp314-cp314t-win_amd64.whl", hash = "sha256:c5c3cdb753a9004183e1ccb634b41611654c989e61bc68617ce878e46d6f1e51", size = 2185794, upload-time = "2026-05-24T20:09:00.319Z" },
{ url = "https://files.pythonhosted.org/packages/d0/10/f7220e9b784d295d241c86ed99aeb537f92afcd469a64861f2717e9bb077/sqlalchemy-2.0.50-py3-none-any.whl", hash = "sha256:92064363517a3ff8212b5a93b8c62876579d8dfd1ca5b561335f30152d884fa9", size = 1943861, upload-time = "2026-05-24T19:59:01.119Z" },
]
[[package]] [[package]]
name = "starlette" name = "starlette"
version = "1.1.0" version = "1.1.0"