Fix centered content layout widths
This commit is contained in:
@@ -7,16 +7,22 @@ import tempfile
|
|||||||
import uuid
|
import uuid
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
from app.config.settings import settings
|
||||||
|
|
||||||
from app.domain.documents import (
|
from app.domain.documents import (
|
||||||
ChunkBuilder,
|
ChunkBuilder,
|
||||||
Document,
|
Document,
|
||||||
|
DocumentArtifact,
|
||||||
DocumentBinaryStore,
|
DocumentBinaryStore,
|
||||||
DocumentParser,
|
DocumentParser,
|
||||||
|
DocumentProcessingRun,
|
||||||
|
DocumentProcessingStore,
|
||||||
DocumentRepository,
|
DocumentRepository,
|
||||||
DocumentStatus,
|
DocumentStatus,
|
||||||
|
DocumentStatusEvent,
|
||||||
ParseArtifactStore,
|
ParseArtifactStore,
|
||||||
ParsedDocument,
|
ParsedDocument,
|
||||||
)
|
)
|
||||||
@@ -39,6 +45,7 @@ class DocumentProcessResult:
|
|||||||
|
|
||||||
class DocumentCommandService:
|
class DocumentCommandService:
|
||||||
"""Provide the Document Command Service service."""
|
"""Provide the Document Command Service service."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@@ -49,6 +56,7 @@ class DocumentCommandService:
|
|||||||
embedding_provider: EmbeddingProvider,
|
embedding_provider: EmbeddingProvider,
|
||||||
vector_index: VectorIndex,
|
vector_index: VectorIndex,
|
||||||
parse_artifact_store: ParseArtifactStore | None = None,
|
parse_artifact_store: ParseArtifactStore | None = None,
|
||||||
|
document_processing_store: DocumentProcessingStore | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the Document Command Service instance."""
|
"""Initialize the Document Command Service instance."""
|
||||||
self.document_repository = document_repository
|
self.document_repository = document_repository
|
||||||
@@ -58,6 +66,11 @@ class DocumentCommandService:
|
|||||||
self.embedding_provider = embedding_provider
|
self.embedding_provider = embedding_provider
|
||||||
self.vector_index = vector_index
|
self.vector_index = vector_index
|
||||||
self.parse_artifact_store = parse_artifact_store
|
self.parse_artifact_store = parse_artifact_store
|
||||||
|
self.document_processing_store = document_processing_store
|
||||||
|
|
||||||
|
def _utcnow(self) -> datetime:
|
||||||
|
"""Return the current UTC timestamp for persisted processing metadata."""
|
||||||
|
return datetime.now(UTC)
|
||||||
|
|
||||||
def _save_parse_artifacts(self, *, doc_id: str, parsed_document: ParsedDocument) -> dict[str, str]:
|
def _save_parse_artifacts(self, *, doc_id: str, parsed_document: ParsedDocument) -> dict[str, str]:
|
||||||
"""Persist parse artifacts so troubleshooting does not depend on provider retention windows."""
|
"""Persist parse artifacts so troubleshooting does not depend on provider retention windows."""
|
||||||
@@ -80,6 +93,143 @@ class DocumentCommandService:
|
|||||||
artifact_keys[name] = object_name
|
artifact_keys[name] = object_name
|
||||||
return artifact_keys
|
return artifact_keys
|
||||||
|
|
||||||
|
def _safe_create_processing_run(self, *, doc_id: str, trigger_type: str, generate_summary: bool) -> str | None:
|
||||||
|
"""Create a processing run record when the optional store is available."""
|
||||||
|
if not self.document_processing_store:
|
||||||
|
return None
|
||||||
|
run = DocumentProcessingRun(
|
||||||
|
run_id=str(uuid.uuid4()),
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type=trigger_type,
|
||||||
|
run_status="running",
|
||||||
|
parser_backend=settings.parser_backend,
|
||||||
|
chunk_backend=settings.chunk_backend,
|
||||||
|
embedding_model=settings.embedding_model,
|
||||||
|
metadata={"generate_summary": generate_summary},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
created = self.document_processing_store.create_run(run)
|
||||||
|
return created.run_id
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore.create_run failed for doc_id={}", doc_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _safe_append_status_event(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
doc_id: str,
|
||||||
|
run_id: str | None,
|
||||||
|
from_status: str,
|
||||||
|
to_status: str,
|
||||||
|
stage: str,
|
||||||
|
message: str = "",
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Append a status event without allowing auxiliary persistence failures to abort processing."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
event = DocumentStatusEvent(
|
||||||
|
event_id=str(uuid.uuid4()),
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status=from_status,
|
||||||
|
to_status=to_status,
|
||||||
|
stage=stage,
|
||||||
|
message=message,
|
||||||
|
metadata=metadata or {},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self.document_processing_store.append_status_event(event)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"DocumentProcessingStore.append_status_event failed for doc_id={}, run_id={}",
|
||||||
|
doc_id,
|
||||||
|
run_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _safe_mark_run_stored(self, *, doc_id: str, run_id: str | None) -> None:
|
||||||
|
"""Mark the processing run as stored without affecting the main workflow."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.document_processing_store.mark_run_stored(run_id, stored_at=self._utcnow())
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore.mark_run_stored failed for doc_id={}, run_id={}", doc_id, run_id)
|
||||||
|
|
||||||
|
def _safe_mark_run_parsed(self, *, doc_id: str, run_id: str | None, parsed_document: ParsedDocument) -> None:
|
||||||
|
"""Persist parse completion details without failing the document pipeline."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.document_processing_store.mark_run_parsed(
|
||||||
|
run_id,
|
||||||
|
parser_backend=parsed_document.parser_name,
|
||||||
|
layout_count=int(parsed_document.metadata.get("layout_count", len(parsed_document.raw_layouts)) or 0),
|
||||||
|
structure_node_count=len(parsed_document.structure_nodes),
|
||||||
|
semantic_block_count=len(parsed_document.semantic_blocks),
|
||||||
|
vector_chunk_count=len(parsed_document.vector_chunks),
|
||||||
|
parsed_at=self._utcnow(),
|
||||||
|
metadata={"parse_task_id": parsed_document.metadata.get("task_id", "")},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore.mark_run_parsed failed for doc_id={}, run_id={}", doc_id, run_id)
|
||||||
|
|
||||||
|
def _safe_replace_processing_artifacts(self, *, doc_id: str, run_id: str | None, artifact_keys: dict[str, str]) -> None:
|
||||||
|
"""Store artifact references without turning persistence drift into a user-visible failure."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
artifacts = [
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id=str(uuid.uuid4()),
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type=artifact_type,
|
||||||
|
object_name=object_name,
|
||||||
|
content_type="application/json",
|
||||||
|
byte_size=0,
|
||||||
|
checksum="",
|
||||||
|
)
|
||||||
|
for artifact_type, object_name in artifact_keys.items()
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
self.document_processing_store.replace_artifacts_for_run(run_id, artifacts)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"DocumentProcessingStore.replace_artifacts_for_run failed for doc_id={}, run_id={}",
|
||||||
|
doc_id,
|
||||||
|
run_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _safe_mark_run_indexed(self, *, doc_id: str, run_id: str | None, chunk_count: int, index_name: str) -> None:
|
||||||
|
"""Mark the processing run as indexed without affecting the success path."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
now = self._utcnow()
|
||||||
|
try:
|
||||||
|
self.document_processing_store.mark_run_indexed(
|
||||||
|
run_id,
|
||||||
|
chunk_count=chunk_count,
|
||||||
|
index_name=index_name,
|
||||||
|
indexed_at=now,
|
||||||
|
finished_at=now,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore.mark_run_indexed failed for doc_id={}, run_id={}", doc_id, run_id)
|
||||||
|
|
||||||
|
def _safe_mark_run_failed(self, *, doc_id: str, run_id: str | None, failure_stage: str, error_message: str) -> None:
|
||||||
|
"""Mark the processing run as failed without masking the original error handling path."""
|
||||||
|
if not self.document_processing_store or not run_id:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.document_processing_store.mark_run_failed(
|
||||||
|
run_id,
|
||||||
|
failure_stage=failure_stage,
|
||||||
|
error_message=error_message,
|
||||||
|
finished_at=self._utcnow(),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore.mark_run_failed failed for doc_id={}, run_id={}", doc_id, run_id)
|
||||||
|
|
||||||
def upload_and_process(
|
def upload_and_process(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@@ -91,11 +241,15 @@ class DocumentCommandService:
|
|||||||
regulation_type: str,
|
regulation_type: str,
|
||||||
version: str,
|
version: str,
|
||||||
generate_summary: bool,
|
generate_summary: bool,
|
||||||
|
trigger_type: str = "upload",
|
||||||
) -> DocumentProcessResult:
|
) -> DocumentProcessResult:
|
||||||
"""Handle upload and process for the Document Command Service instance."""
|
"""Handle upload and process for the Document Command Service instance."""
|
||||||
doc_id = doc_id or str(uuid.uuid4())[:8]
|
doc_id = doc_id or str(uuid.uuid4())[:8]
|
||||||
final_doc_name = doc_name or file_name
|
final_doc_name = doc_name or file_name
|
||||||
object_name = f"{doc_id}/{file_name}"
|
object_name = f"{doc_id}/{file_name}"
|
||||||
|
run_id: str | None = None
|
||||||
|
current_status = DocumentStatus.PENDING
|
||||||
|
current_stage = "store"
|
||||||
|
|
||||||
document = Document(
|
document = Document(
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
@@ -109,6 +263,19 @@ class DocumentCommandService:
|
|||||||
metadata={"generate_summary": generate_summary},
|
metadata={"generate_summary": generate_summary},
|
||||||
)
|
)
|
||||||
self.document_repository.create(document)
|
self.document_repository.create(document)
|
||||||
|
run_id = self._safe_create_processing_run(
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type=trigger_type,
|
||||||
|
generate_summary=generate_summary,
|
||||||
|
)
|
||||||
|
self._safe_append_status_event(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status="",
|
||||||
|
to_status=DocumentStatus.PENDING.value,
|
||||||
|
stage="document_created",
|
||||||
|
message="Document record created",
|
||||||
|
)
|
||||||
|
|
||||||
temp_path = ""
|
temp_path = ""
|
||||||
try:
|
try:
|
||||||
@@ -119,6 +286,17 @@ class DocumentCommandService:
|
|||||||
metadata={"doc_id": doc_id},
|
metadata={"doc_id": doc_id},
|
||||||
)
|
)
|
||||||
self.document_repository.update_status(doc_id, DocumentStatus.STORED)
|
self.document_repository.update_status(doc_id, DocumentStatus.STORED)
|
||||||
|
current_status = DocumentStatus.STORED
|
||||||
|
current_stage = "parse"
|
||||||
|
self._safe_mark_run_stored(doc_id=doc_id, run_id=run_id)
|
||||||
|
self._safe_append_status_event(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status=DocumentStatus.PENDING.value,
|
||||||
|
to_status=DocumentStatus.STORED.value,
|
||||||
|
stage="store",
|
||||||
|
message="Source file stored",
|
||||||
|
)
|
||||||
|
|
||||||
suffix = os.path.splitext(file_name)[1]
|
suffix = os.path.splitext(file_name)[1]
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
||||||
@@ -130,7 +308,13 @@ class DocumentCommandService:
|
|||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
doc_name=final_doc_name,
|
doc_name=final_doc_name,
|
||||||
)
|
)
|
||||||
|
self._safe_mark_run_parsed(doc_id=doc_id, run_id=run_id, parsed_document=parsed_document)
|
||||||
|
|
||||||
|
artifact_keys: dict[str, str] = {}
|
||||||
|
try:
|
||||||
artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document)
|
artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Parse artifact binary persistence failed for doc_id={}", doc_id)
|
||||||
self.document_repository.update_status(
|
self.document_repository.update_status(
|
||||||
doc_id,
|
doc_id,
|
||||||
DocumentStatus.PARSED,
|
DocumentStatus.PARSED,
|
||||||
@@ -146,6 +330,18 @@ class DocumentCommandService:
|
|||||||
"processing_stage": "parsed",
|
"processing_stage": "parsed",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
current_status = DocumentStatus.PARSED
|
||||||
|
current_stage = "embed"
|
||||||
|
self._safe_replace_processing_artifacts(doc_id=doc_id, run_id=run_id, artifact_keys=artifact_keys)
|
||||||
|
self._safe_append_status_event(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status=DocumentStatus.STORED.value,
|
||||||
|
to_status=DocumentStatus.PARSED.value,
|
||||||
|
stage="parse",
|
||||||
|
message="Document parsed",
|
||||||
|
metadata={"artifact_count": len(artifact_keys)},
|
||||||
|
)
|
||||||
if self.parse_artifact_store:
|
if self.parse_artifact_store:
|
||||||
try:
|
try:
|
||||||
self.parse_artifact_store.save(
|
self.parse_artifact_store.save(
|
||||||
@@ -165,6 +361,7 @@ class DocumentCommandService:
|
|||||||
raise ValueError("解析完成但没有生成可入库的 chunks")
|
raise ValueError("解析完成但没有生成可入库的 chunks")
|
||||||
|
|
||||||
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
|
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
|
||||||
|
current_stage = "index"
|
||||||
inserted = self.vector_index.upsert(chunks, vectors)
|
inserted = self.vector_index.upsert(chunks, vectors)
|
||||||
if inserted != len(chunks):
|
if inserted != len(chunks):
|
||||||
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
|
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
|
||||||
@@ -182,6 +379,23 @@ class DocumentCommandService:
|
|||||||
"processing_stage": "indexed",
|
"processing_stage": "indexed",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
current_status = DocumentStatus.INDEXED
|
||||||
|
index_name = health.get("collection_name", "")
|
||||||
|
self._safe_mark_run_indexed(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
chunk_count=len(chunks),
|
||||||
|
index_name=index_name,
|
||||||
|
)
|
||||||
|
self._safe_append_status_event(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status=DocumentStatus.PARSED.value,
|
||||||
|
to_status=DocumentStatus.INDEXED.value,
|
||||||
|
stage="index",
|
||||||
|
message="Document indexed",
|
||||||
|
metadata={"chunk_count": len(chunks), "index_name": index_name},
|
||||||
|
)
|
||||||
stored = self.document_repository.get(doc_id)
|
stored = self.document_repository.get(doc_id)
|
||||||
return DocumentProcessResult(
|
return DocumentProcessResult(
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
@@ -194,6 +408,7 @@ class DocumentCommandService:
|
|||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception("文档处理失败: doc_id={}", doc_id)
|
logger.exception("文档处理失败: doc_id={}", doc_id)
|
||||||
|
failure_stage = current_stage
|
||||||
self.document_repository.update_status(
|
self.document_repository.update_status(
|
||||||
doc_id,
|
doc_id,
|
||||||
DocumentStatus.FAILED,
|
DocumentStatus.FAILED,
|
||||||
@@ -201,8 +416,23 @@ class DocumentCommandService:
|
|||||||
metadata={
|
metadata={
|
||||||
"failure_reason": str(exc),
|
"failure_reason": str(exc),
|
||||||
"processing_stage": "failed",
|
"processing_stage": "failed",
|
||||||
|
"failure_stage": failure_stage,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
self._safe_mark_run_failed(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
failure_stage=failure_stage,
|
||||||
|
error_message=str(exc),
|
||||||
|
)
|
||||||
|
self._safe_append_status_event(
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status=current_status.value,
|
||||||
|
to_status=DocumentStatus.FAILED.value,
|
||||||
|
stage=failure_stage,
|
||||||
|
message=str(exc),
|
||||||
|
)
|
||||||
return DocumentProcessResult(
|
return DocumentProcessResult(
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
doc_name=final_doc_name,
|
doc_name=final_doc_name,
|
||||||
@@ -235,6 +465,11 @@ class DocumentCommandService:
|
|||||||
self.parse_artifact_store.delete(doc_id)
|
self.parse_artifact_store.delete(doc_id)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning("ParseArtifactStore delete failed for doc_id={}", doc_id)
|
logger.warning("ParseArtifactStore delete failed for doc_id={}", doc_id)
|
||||||
|
if self.document_processing_store:
|
||||||
|
try:
|
||||||
|
self.document_processing_store.delete_by_document(doc_id)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("DocumentProcessingStore delete failed for doc_id={}", doc_id)
|
||||||
self.document_repository.delete(doc_id)
|
self.document_repository.delete(doc_id)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -253,6 +488,7 @@ class DocumentCommandService:
|
|||||||
regulation_type=document.regulation_type,
|
regulation_type=document.regulation_type,
|
||||||
version=document.version,
|
version=document.version,
|
||||||
generate_summary=bool(document.metadata.get("generate_summary", False)),
|
generate_summary=bool(document.metadata.get("generate_summary", False)),
|
||||||
|
trigger_type="retry",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ class Settings(BaseSettings):
|
|||||||
chunk_overlap: int = Field(default=50, description="分块重叠大小")
|
chunk_overlap: int = Field(default=50, description="分块重叠大小")
|
||||||
max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)")
|
max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)")
|
||||||
document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径")
|
document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径")
|
||||||
|
document_processing_metadata_path: str = Field(default="backend/data/document_processing.json", description="文档处理历史存储路径")
|
||||||
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
|
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
|
||||||
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
|
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
|
||||||
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")
|
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")
|
||||||
|
|||||||
@@ -1,18 +1,29 @@
|
|||||||
"""Initialize the app.domain.documents package."""
|
"""Initialize the app.domain.documents package."""
|
||||||
|
|
||||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
|
||||||
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository, ParseArtifactStore
|
from .ports import (
|
||||||
|
ChunkBuilder,
|
||||||
|
DocumentBinaryStore,
|
||||||
|
DocumentParser,
|
||||||
|
DocumentProcessingStore,
|
||||||
|
DocumentRepository,
|
||||||
|
ParseArtifactStore,
|
||||||
|
)
|
||||||
# Keep package boundaries explicit so backend imports stay predictable.
|
# Keep package boundaries explicit so backend imports stay predictable.
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Chunk",
|
"Chunk",
|
||||||
"Document",
|
"Document",
|
||||||
|
"DocumentArtifact",
|
||||||
|
"DocumentProcessingRun",
|
||||||
"DocumentStatus",
|
"DocumentStatus",
|
||||||
|
"DocumentStatusEvent",
|
||||||
"ParsedDocument",
|
"ParsedDocument",
|
||||||
"ChunkBuilder",
|
"ChunkBuilder",
|
||||||
"DocumentBinaryStore",
|
"DocumentBinaryStore",
|
||||||
"DocumentParser",
|
"DocumentParser",
|
||||||
|
"DocumentProcessingStore",
|
||||||
"DocumentRepository",
|
"DocumentRepository",
|
||||||
"ParseArtifactStore",
|
"ParseArtifactStore",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -76,3 +76,61 @@ class Chunk:
|
|||||||
semantic_id: str = ""
|
semantic_id: str = ""
|
||||||
block_type: str = ""
|
block_type: str = ""
|
||||||
metadata: dict[str, Any] = field(default_factory=dict)
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DocumentProcessingRun:
|
||||||
|
"""Represent one processing attempt for a document."""
|
||||||
|
|
||||||
|
run_id: str
|
||||||
|
doc_id: str
|
||||||
|
trigger_type: str
|
||||||
|
run_status: str
|
||||||
|
parser_backend: str = ""
|
||||||
|
chunk_backend: str = ""
|
||||||
|
embedding_model: str = ""
|
||||||
|
index_name: str = ""
|
||||||
|
started_at: datetime = field(default_factory=utcnow)
|
||||||
|
stored_at: datetime | None = None
|
||||||
|
parsed_at: datetime | None = None
|
||||||
|
indexed_at: datetime | None = None
|
||||||
|
finished_at: datetime | None = None
|
||||||
|
layout_count: int = 0
|
||||||
|
structure_node_count: int = 0
|
||||||
|
semantic_block_count: int = 0
|
||||||
|
vector_chunk_count: int = 0
|
||||||
|
chunk_count: int = 0
|
||||||
|
failure_stage: str = ""
|
||||||
|
error_message: str = ""
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DocumentStatusEvent:
|
||||||
|
"""Represent a document lifecycle event emitted during processing."""
|
||||||
|
|
||||||
|
event_id: str
|
||||||
|
doc_id: str
|
||||||
|
run_id: str
|
||||||
|
from_status: str
|
||||||
|
to_status: str
|
||||||
|
stage: str
|
||||||
|
message: str = ""
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
occurred_at: datetime = field(default_factory=utcnow)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DocumentArtifact:
|
||||||
|
"""Represent a persisted artifact reference for one processing run."""
|
||||||
|
|
||||||
|
artifact_id: str
|
||||||
|
doc_id: str
|
||||||
|
run_id: str
|
||||||
|
artifact_type: str
|
||||||
|
object_name: str
|
||||||
|
content_type: str
|
||||||
|
byte_size: int = 0
|
||||||
|
checksum: str = ""
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
created_at: datetime = field(default_factory=utcnow)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
|
||||||
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
||||||
|
|
||||||
|
|
||||||
@@ -128,3 +128,111 @@ class ParseArtifactStore(ABC):
|
|||||||
def get_structure_nodes(self, doc_id: str) -> list[dict]:
|
def get_structure_nodes(self, doc_id: str) -> list[dict]:
|
||||||
"""Return all structure nodes for a document."""
|
"""Return all structure nodes for a document."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentProcessingStore(ABC):
|
||||||
|
"""Persist document processing runs, events, and artifact references."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
|
||||||
|
"""Create a new processing run record."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def mark_run_stored(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
stored_at: object | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as having persisted the source file."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def mark_run_parsed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
parser_backend: str,
|
||||||
|
layout_count: int,
|
||||||
|
structure_node_count: int,
|
||||||
|
semantic_block_count: int,
|
||||||
|
vector_chunk_count: int,
|
||||||
|
parsed_at: object | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record parse completion details for a run."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def mark_run_indexed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
chunk_count: int,
|
||||||
|
index_name: str,
|
||||||
|
indexed_at: object | None = None,
|
||||||
|
finished_at: object | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as successfully indexed."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def mark_run_failed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
failure_stage: str,
|
||||||
|
error_message: str,
|
||||||
|
finished_at: object | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as failed."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
|
||||||
|
"""Append a document status event."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
|
||||||
|
"""Replace all artifacts for a run with the provided list."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def delete_by_document(self, doc_id: str) -> None:
|
||||||
|
"""Delete all processing data for a document."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
|
||||||
|
"""List all processing runs for a document."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
|
||||||
|
"""Return one processing run by identifier."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List status events for a document."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List status events for a run."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List artifact references for a document."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List artifact references for a run."""
|
||||||
|
pass
|
||||||
|
|||||||
@@ -0,0 +1,373 @@
|
|||||||
|
"""Implement infrastructure support for json document processing history."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent
|
||||||
|
# Keep JSON persistence behavior aligned with the lightweight document repository adapter.
|
||||||
|
|
||||||
|
|
||||||
|
class JsonDocumentProcessingStore(DocumentProcessingStore):
|
||||||
|
"""Persist processing history in a standalone JSON file."""
|
||||||
|
|
||||||
|
def __init__(self, file_path: str) -> None:
|
||||||
|
"""Initialize the JSON processing history store."""
|
||||||
|
self.file_path = Path(file_path)
|
||||||
|
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if not self.file_path.exists():
|
||||||
|
self._save(self._empty_payload())
|
||||||
|
|
||||||
|
def _empty_payload(self) -> dict[str, dict[str, dict[str, Any]]]:
|
||||||
|
"""Return the canonical empty JSON structure for processing history."""
|
||||||
|
return {"runs": {}, "status_events": {}, "artifacts": {}}
|
||||||
|
|
||||||
|
def _load(self) -> dict[str, dict[str, dict[str, Any]]]:
|
||||||
|
"""Load the full JSON payload and normalize missing sections."""
|
||||||
|
if not self.file_path.exists():
|
||||||
|
return self._empty_payload()
|
||||||
|
payload = json.loads(self.file_path.read_text(encoding="utf-8") or "{}")
|
||||||
|
normalized = self._empty_payload()
|
||||||
|
for key in normalized:
|
||||||
|
section = payload.get(key, {})
|
||||||
|
normalized[key] = section if isinstance(section, dict) else {}
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def _save(self, payload: dict[str, dict[str, dict[str, Any]]]) -> None:
|
||||||
|
"""Persist the full JSON payload with stable formatting."""
|
||||||
|
self.file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
|
def _serialize_datetime(self, value: datetime | None) -> str | None:
|
||||||
|
"""Serialize optional datetimes into ISO8601 strings."""
|
||||||
|
return value.isoformat() if value is not None else None
|
||||||
|
|
||||||
|
def _deserialize_datetime(self, value: str | None) -> datetime | None:
|
||||||
|
"""Deserialize optional ISO8601 strings into datetimes."""
|
||||||
|
return datetime.fromisoformat(value) if value else None
|
||||||
|
|
||||||
|
def _serialize_run(self, run: DocumentProcessingRun) -> dict[str, Any]:
|
||||||
|
"""Serialize one processing run to a JSON-compatible payload."""
|
||||||
|
return {
|
||||||
|
"run_id": run.run_id,
|
||||||
|
"doc_id": run.doc_id,
|
||||||
|
"trigger_type": run.trigger_type,
|
||||||
|
"run_status": run.run_status,
|
||||||
|
"parser_backend": run.parser_backend,
|
||||||
|
"chunk_backend": run.chunk_backend,
|
||||||
|
"embedding_model": run.embedding_model,
|
||||||
|
"index_name": run.index_name,
|
||||||
|
"started_at": self._serialize_datetime(run.started_at),
|
||||||
|
"stored_at": self._serialize_datetime(run.stored_at),
|
||||||
|
"parsed_at": self._serialize_datetime(run.parsed_at),
|
||||||
|
"indexed_at": self._serialize_datetime(run.indexed_at),
|
||||||
|
"finished_at": self._serialize_datetime(run.finished_at),
|
||||||
|
"layout_count": run.layout_count,
|
||||||
|
"structure_node_count": run.structure_node_count,
|
||||||
|
"semantic_block_count": run.semantic_block_count,
|
||||||
|
"vector_chunk_count": run.vector_chunk_count,
|
||||||
|
"chunk_count": run.chunk_count,
|
||||||
|
"failure_stage": run.failure_stage,
|
||||||
|
"error_message": run.error_message,
|
||||||
|
"metadata": run.metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _deserialize_run(self, payload: dict[str, Any]) -> DocumentProcessingRun:
|
||||||
|
"""Deserialize one JSON payload into a processing run dataclass."""
|
||||||
|
return DocumentProcessingRun(
|
||||||
|
run_id=payload["run_id"],
|
||||||
|
doc_id=payload["doc_id"],
|
||||||
|
trigger_type=payload["trigger_type"],
|
||||||
|
run_status=payload["run_status"],
|
||||||
|
parser_backend=payload.get("parser_backend", ""),
|
||||||
|
chunk_backend=payload.get("chunk_backend", ""),
|
||||||
|
embedding_model=payload.get("embedding_model", ""),
|
||||||
|
index_name=payload.get("index_name", ""),
|
||||||
|
started_at=self._deserialize_datetime(payload.get("started_at")) or datetime.now(UTC),
|
||||||
|
stored_at=self._deserialize_datetime(payload.get("stored_at")),
|
||||||
|
parsed_at=self._deserialize_datetime(payload.get("parsed_at")),
|
||||||
|
indexed_at=self._deserialize_datetime(payload.get("indexed_at")),
|
||||||
|
finished_at=self._deserialize_datetime(payload.get("finished_at")),
|
||||||
|
layout_count=int(payload.get("layout_count", 0) or 0),
|
||||||
|
structure_node_count=int(payload.get("structure_node_count", 0) or 0),
|
||||||
|
semantic_block_count=int(payload.get("semantic_block_count", 0) or 0),
|
||||||
|
vector_chunk_count=int(payload.get("vector_chunk_count", 0) or 0),
|
||||||
|
chunk_count=int(payload.get("chunk_count", 0) or 0),
|
||||||
|
failure_stage=payload.get("failure_stage", ""),
|
||||||
|
error_message=payload.get("error_message", ""),
|
||||||
|
metadata=payload.get("metadata", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _serialize_event(self, event: DocumentStatusEvent) -> dict[str, Any]:
|
||||||
|
"""Serialize one status event to a JSON-compatible payload."""
|
||||||
|
return {
|
||||||
|
"event_id": event.event_id,
|
||||||
|
"doc_id": event.doc_id,
|
||||||
|
"run_id": event.run_id,
|
||||||
|
"from_status": event.from_status,
|
||||||
|
"to_status": event.to_status,
|
||||||
|
"stage": event.stage,
|
||||||
|
"message": event.message,
|
||||||
|
"metadata": event.metadata,
|
||||||
|
"occurred_at": self._serialize_datetime(event.occurred_at),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _deserialize_event(self, payload: dict[str, Any]) -> DocumentStatusEvent:
|
||||||
|
"""Deserialize one JSON payload into a status event dataclass."""
|
||||||
|
return DocumentStatusEvent(
|
||||||
|
event_id=payload["event_id"],
|
||||||
|
doc_id=payload["doc_id"],
|
||||||
|
run_id=payload["run_id"],
|
||||||
|
from_status=payload.get("from_status", ""),
|
||||||
|
to_status=payload["to_status"],
|
||||||
|
stage=payload.get("stage", ""),
|
||||||
|
message=payload.get("message", ""),
|
||||||
|
metadata=payload.get("metadata", {}),
|
||||||
|
occurred_at=self._deserialize_datetime(payload.get("occurred_at")) or datetime.now(UTC),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _serialize_artifact(self, artifact: DocumentArtifact) -> dict[str, Any]:
|
||||||
|
"""Serialize one artifact reference to a JSON-compatible payload."""
|
||||||
|
return {
|
||||||
|
"artifact_id": artifact.artifact_id,
|
||||||
|
"doc_id": artifact.doc_id,
|
||||||
|
"run_id": artifact.run_id,
|
||||||
|
"artifact_type": artifact.artifact_type,
|
||||||
|
"object_name": artifact.object_name,
|
||||||
|
"content_type": artifact.content_type,
|
||||||
|
"byte_size": artifact.byte_size,
|
||||||
|
"checksum": artifact.checksum,
|
||||||
|
"metadata": artifact.metadata,
|
||||||
|
"created_at": self._serialize_datetime(artifact.created_at),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _deserialize_artifact(self, payload: dict[str, Any]) -> DocumentArtifact:
|
||||||
|
"""Deserialize one JSON payload into an artifact dataclass."""
|
||||||
|
return DocumentArtifact(
|
||||||
|
artifact_id=payload["artifact_id"],
|
||||||
|
doc_id=payload["doc_id"],
|
||||||
|
run_id=payload["run_id"],
|
||||||
|
artifact_type=payload["artifact_type"],
|
||||||
|
object_name=payload["object_name"],
|
||||||
|
content_type=payload.get("content_type", ""),
|
||||||
|
byte_size=int(payload.get("byte_size", 0) or 0),
|
||||||
|
checksum=payload.get("checksum", ""),
|
||||||
|
metadata=payload.get("metadata", {}),
|
||||||
|
created_at=self._deserialize_datetime(payload.get("created_at")) or datetime.now(UTC),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _merge_metadata(self, original: dict[str, Any], update: dict | None) -> dict[str, Any]:
|
||||||
|
"""Merge metadata updates onto an existing payload."""
|
||||||
|
merged = dict(original)
|
||||||
|
if update:
|
||||||
|
merged.update(update)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
|
||||||
|
"""Create a new processing run record."""
|
||||||
|
payload = self._load()
|
||||||
|
payload["runs"][run.run_id] = self._serialize_run(run)
|
||||||
|
self._save(payload)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_stored(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
stored_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as having persisted the source file."""
|
||||||
|
payload = self._load()
|
||||||
|
run_payload = payload["runs"].get(run_id)
|
||||||
|
if not run_payload:
|
||||||
|
return None
|
||||||
|
run = self._deserialize_run(run_payload)
|
||||||
|
run.stored_at = stored_at or datetime.now(UTC)
|
||||||
|
run.metadata = self._merge_metadata(run.metadata, metadata)
|
||||||
|
payload["runs"][run_id] = self._serialize_run(run)
|
||||||
|
self._save(payload)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_parsed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
parser_backend: str,
|
||||||
|
layout_count: int,
|
||||||
|
structure_node_count: int,
|
||||||
|
semantic_block_count: int,
|
||||||
|
vector_chunk_count: int,
|
||||||
|
parsed_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record parse completion details for a run."""
|
||||||
|
payload = self._load()
|
||||||
|
run_payload = payload["runs"].get(run_id)
|
||||||
|
if not run_payload:
|
||||||
|
return None
|
||||||
|
run = self._deserialize_run(run_payload)
|
||||||
|
run.parser_backend = parser_backend
|
||||||
|
run.layout_count = layout_count
|
||||||
|
run.structure_node_count = structure_node_count
|
||||||
|
run.semantic_block_count = semantic_block_count
|
||||||
|
run.vector_chunk_count = vector_chunk_count
|
||||||
|
run.parsed_at = parsed_at or datetime.now(UTC)
|
||||||
|
run.metadata = self._merge_metadata(run.metadata, metadata)
|
||||||
|
payload["runs"][run_id] = self._serialize_run(run)
|
||||||
|
self._save(payload)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_indexed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
chunk_count: int,
|
||||||
|
index_name: str,
|
||||||
|
indexed_at: datetime | None = None,
|
||||||
|
finished_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as successfully indexed."""
|
||||||
|
payload = self._load()
|
||||||
|
run_payload = payload["runs"].get(run_id)
|
||||||
|
if not run_payload:
|
||||||
|
return None
|
||||||
|
run = self._deserialize_run(run_payload)
|
||||||
|
now = datetime.now(UTC)
|
||||||
|
run.run_status = "succeeded"
|
||||||
|
run.chunk_count = chunk_count
|
||||||
|
run.index_name = index_name
|
||||||
|
run.indexed_at = indexed_at or now
|
||||||
|
run.finished_at = finished_at or now
|
||||||
|
run.metadata = self._merge_metadata(run.metadata, metadata)
|
||||||
|
payload["runs"][run_id] = self._serialize_run(run)
|
||||||
|
self._save(payload)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_failed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
failure_stage: str,
|
||||||
|
error_message: str,
|
||||||
|
finished_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as failed."""
|
||||||
|
payload = self._load()
|
||||||
|
run_payload = payload["runs"].get(run_id)
|
||||||
|
if not run_payload:
|
||||||
|
return None
|
||||||
|
run = self._deserialize_run(run_payload)
|
||||||
|
run.run_status = "failed"
|
||||||
|
run.failure_stage = failure_stage
|
||||||
|
run.error_message = error_message
|
||||||
|
run.finished_at = finished_at or datetime.now(UTC)
|
||||||
|
run.metadata = self._merge_metadata(run.metadata, metadata)
|
||||||
|
payload["runs"][run_id] = self._serialize_run(run)
|
||||||
|
self._save(payload)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
|
||||||
|
"""Append a document status event."""
|
||||||
|
payload = self._load()
|
||||||
|
payload["status_events"][event.event_id] = self._serialize_event(event)
|
||||||
|
self._save(payload)
|
||||||
|
return event
|
||||||
|
|
||||||
|
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
|
||||||
|
"""Replace all artifacts for a run with the provided list."""
|
||||||
|
payload = self._load()
|
||||||
|
payload["artifacts"] = {
|
||||||
|
artifact_id: artifact_payload
|
||||||
|
for artifact_id, artifact_payload in payload["artifacts"].items()
|
||||||
|
if artifact_payload.get("run_id") != run_id
|
||||||
|
}
|
||||||
|
for artifact in artifacts:
|
||||||
|
payload["artifacts"][artifact.artifact_id] = self._serialize_artifact(artifact)
|
||||||
|
self._save(payload)
|
||||||
|
return artifacts
|
||||||
|
|
||||||
|
def delete_by_document(self, doc_id: str) -> None:
|
||||||
|
"""Delete all processing data for a document."""
|
||||||
|
payload = self._load()
|
||||||
|
payload["runs"] = {
|
||||||
|
run_id: run_payload
|
||||||
|
for run_id, run_payload in payload["runs"].items()
|
||||||
|
if run_payload.get("doc_id") != doc_id
|
||||||
|
}
|
||||||
|
payload["status_events"] = {
|
||||||
|
event_id: event_payload
|
||||||
|
for event_id, event_payload in payload["status_events"].items()
|
||||||
|
if event_payload.get("doc_id") != doc_id
|
||||||
|
}
|
||||||
|
payload["artifacts"] = {
|
||||||
|
artifact_id: artifact_payload
|
||||||
|
for artifact_id, artifact_payload in payload["artifacts"].items()
|
||||||
|
if artifact_payload.get("doc_id") != doc_id
|
||||||
|
}
|
||||||
|
self._save(payload)
|
||||||
|
|
||||||
|
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
|
||||||
|
"""List all processing runs for a document."""
|
||||||
|
payload = self._load()
|
||||||
|
runs = [
|
||||||
|
self._deserialize_run(run_payload)
|
||||||
|
for run_payload in payload["runs"].values()
|
||||||
|
if run_payload.get("doc_id") == doc_id
|
||||||
|
]
|
||||||
|
runs.sort(key=lambda run: run.started_at)
|
||||||
|
return runs
|
||||||
|
|
||||||
|
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
|
||||||
|
"""Return one processing run by identifier."""
|
||||||
|
payload = self._load()
|
||||||
|
run_payload = payload["runs"].get(run_id)
|
||||||
|
return self._deserialize_run(run_payload) if run_payload else None
|
||||||
|
|
||||||
|
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List status events for a document."""
|
||||||
|
payload = self._load()
|
||||||
|
events = [
|
||||||
|
self._deserialize_event(event_payload)
|
||||||
|
for event_payload in payload["status_events"].values()
|
||||||
|
if event_payload.get("doc_id") == doc_id
|
||||||
|
]
|
||||||
|
events.sort(key=lambda event: event.occurred_at)
|
||||||
|
return events
|
||||||
|
|
||||||
|
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List status events for a run."""
|
||||||
|
payload = self._load()
|
||||||
|
events = [
|
||||||
|
self._deserialize_event(event_payload)
|
||||||
|
for event_payload in payload["status_events"].values()
|
||||||
|
if event_payload.get("run_id") == run_id
|
||||||
|
]
|
||||||
|
events.sort(key=lambda event: event.occurred_at)
|
||||||
|
return events
|
||||||
|
|
||||||
|
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List artifact references for a document."""
|
||||||
|
payload = self._load()
|
||||||
|
artifacts = [
|
||||||
|
self._deserialize_artifact(artifact_payload)
|
||||||
|
for artifact_payload in payload["artifacts"].values()
|
||||||
|
if artifact_payload.get("doc_id") == doc_id
|
||||||
|
]
|
||||||
|
artifacts.sort(key=lambda artifact: artifact.created_at)
|
||||||
|
return artifacts
|
||||||
|
|
||||||
|
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List artifact references for a run."""
|
||||||
|
payload = self._load()
|
||||||
|
artifacts = [
|
||||||
|
self._deserialize_artifact(artifact_payload)
|
||||||
|
for artifact_payload in payload["artifacts"].values()
|
||||||
|
if artifact_payload.get("run_id") == run_id
|
||||||
|
]
|
||||||
|
artifacts.sort(key=lambda artifact: artifact.created_at)
|
||||||
|
return artifacts
|
||||||
@@ -0,0 +1,466 @@
|
|||||||
|
"""Implement infrastructure support for postgres document processing history."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import psycopg2.extras
|
||||||
|
from psycopg2.pool import ThreadedConnectionPool
|
||||||
|
|
||||||
|
from app.config.settings import settings
|
||||||
|
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentProcessingStore, DocumentStatusEvent
|
||||||
|
# Keep SQL mapping local to this adapter so the domain stays storage-agnostic.
|
||||||
|
|
||||||
|
_CREATE_RUNS_TABLE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS document_processing_runs (
|
||||||
|
run_id VARCHAR(128) PRIMARY KEY,
|
||||||
|
doc_id VARCHAR(128) NOT NULL,
|
||||||
|
trigger_type VARCHAR(32) NOT NULL,
|
||||||
|
run_status VARCHAR(32) NOT NULL DEFAULT 'running',
|
||||||
|
parser_backend VARCHAR(128) NOT NULL DEFAULT '',
|
||||||
|
chunk_backend VARCHAR(128) NOT NULL DEFAULT '',
|
||||||
|
embedding_model VARCHAR(256) NOT NULL DEFAULT '',
|
||||||
|
index_name VARCHAR(128) NOT NULL DEFAULT '',
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
stored_at TIMESTAMPTZ,
|
||||||
|
parsed_at TIMESTAMPTZ,
|
||||||
|
indexed_at TIMESTAMPTZ,
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
layout_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
structure_node_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
semantic_block_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
vector_chunk_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
chunk_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
failure_stage VARCHAR(64) NOT NULL DEFAULT '',
|
||||||
|
error_message TEXT NOT NULL DEFAULT '',
|
||||||
|
metadata JSONB NOT NULL DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
CONSTRAINT fk_dpr_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_document_processing_runs_doc_id ON document_processing_runs(doc_id, started_at DESC);
|
||||||
|
"""
|
||||||
|
|
||||||
|
_CREATE_EVENTS_TABLE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS document_status_history (
|
||||||
|
event_id VARCHAR(128) PRIMARY KEY,
|
||||||
|
doc_id VARCHAR(128) NOT NULL,
|
||||||
|
run_id VARCHAR(128) NOT NULL,
|
||||||
|
from_status VARCHAR(32) NOT NULL DEFAULT '',
|
||||||
|
to_status VARCHAR(32) NOT NULL,
|
||||||
|
stage VARCHAR(64) NOT NULL DEFAULT '',
|
||||||
|
message TEXT NOT NULL DEFAULT '',
|
||||||
|
metadata JSONB NOT NULL DEFAULT '{}',
|
||||||
|
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
CONSTRAINT fk_dsh_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
|
||||||
|
CONSTRAINT fk_dsh_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_document_status_history_doc_id ON document_status_history(doc_id, occurred_at ASC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_document_status_history_run_id ON document_status_history(run_id, occurred_at ASC);
|
||||||
|
"""
|
||||||
|
|
||||||
|
_CREATE_ARTIFACTS_TABLE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS document_artifacts (
|
||||||
|
artifact_id VARCHAR(128) PRIMARY KEY,
|
||||||
|
doc_id VARCHAR(128) NOT NULL,
|
||||||
|
run_id VARCHAR(128) NOT NULL,
|
||||||
|
artifact_type VARCHAR(64) NOT NULL,
|
||||||
|
object_name VARCHAR(1024) NOT NULL,
|
||||||
|
content_type VARCHAR(128) NOT NULL DEFAULT '',
|
||||||
|
byte_size BIGINT NOT NULL DEFAULT 0,
|
||||||
|
checksum VARCHAR(256) NOT NULL DEFAULT '',
|
||||||
|
metadata JSONB NOT NULL DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
CONSTRAINT fk_da_doc FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
|
||||||
|
CONSTRAINT fk_da_run FOREIGN KEY (run_id) REFERENCES document_processing_runs(run_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_document_artifacts_doc_id ON document_artifacts(doc_id, created_at ASC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_document_artifacts_run_id ON document_artifacts(run_id, created_at ASC);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class PostgresDocumentProcessingStore(DocumentProcessingStore):
|
||||||
|
"""Persist processing history in PostgreSQL using handwritten SQL."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"""Initialize the store and ensure the required tables exist."""
|
||||||
|
self._pool = ThreadedConnectionPool(
|
||||||
|
minconn=1,
|
||||||
|
maxconn=5,
|
||||||
|
host=settings.postgres_host,
|
||||||
|
port=settings.postgres_port,
|
||||||
|
user=settings.postgres_user,
|
||||||
|
password=settings.postgres_password,
|
||||||
|
dbname=settings.postgres_db,
|
||||||
|
)
|
||||||
|
self._ensure_schema()
|
||||||
|
|
||||||
|
def _ensure_schema(self) -> None:
|
||||||
|
"""Create processing history tables and indexes if they are missing."""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(_CREATE_RUNS_TABLE)
|
||||||
|
cur.execute(_CREATE_EVENTS_TABLE)
|
||||||
|
cur.execute(_CREATE_ARTIFACTS_TABLE)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _conn(self):
|
||||||
|
"""Borrow one connection from the pool and return it afterwards."""
|
||||||
|
conn = self._pool.getconn()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
self._pool.putconn(conn)
|
||||||
|
|
||||||
|
def _normalize_metadata(self, value: Any) -> dict[str, Any]:
|
||||||
|
"""Return a JSON-object payload regardless of the row representation."""
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return value
|
||||||
|
if not value:
|
||||||
|
return {}
|
||||||
|
return json.loads(value)
|
||||||
|
|
||||||
|
def _row_to_run(self, row: dict[str, Any]) -> DocumentProcessingRun:
|
||||||
|
"""Map one run row into the domain dataclass."""
|
||||||
|
return DocumentProcessingRun(
|
||||||
|
run_id=row["run_id"],
|
||||||
|
doc_id=row["doc_id"],
|
||||||
|
trigger_type=row["trigger_type"],
|
||||||
|
run_status=row["run_status"],
|
||||||
|
parser_backend=row["parser_backend"],
|
||||||
|
chunk_backend=row["chunk_backend"],
|
||||||
|
embedding_model=row["embedding_model"],
|
||||||
|
index_name=row["index_name"],
|
||||||
|
started_at=row["started_at"],
|
||||||
|
stored_at=row["stored_at"],
|
||||||
|
parsed_at=row["parsed_at"],
|
||||||
|
indexed_at=row["indexed_at"],
|
||||||
|
finished_at=row["finished_at"],
|
||||||
|
layout_count=row["layout_count"],
|
||||||
|
structure_node_count=row["structure_node_count"],
|
||||||
|
semantic_block_count=row["semantic_block_count"],
|
||||||
|
vector_chunk_count=row["vector_chunk_count"],
|
||||||
|
chunk_count=row["chunk_count"],
|
||||||
|
failure_stage=row["failure_stage"],
|
||||||
|
error_message=row["error_message"],
|
||||||
|
metadata=self._normalize_metadata(row["metadata"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _row_to_event(self, row: dict[str, Any]) -> DocumentStatusEvent:
|
||||||
|
"""Map one event row into the domain dataclass."""
|
||||||
|
return DocumentStatusEvent(
|
||||||
|
event_id=row["event_id"],
|
||||||
|
doc_id=row["doc_id"],
|
||||||
|
run_id=row["run_id"],
|
||||||
|
from_status=row["from_status"],
|
||||||
|
to_status=row["to_status"],
|
||||||
|
stage=row["stage"],
|
||||||
|
message=row["message"],
|
||||||
|
metadata=self._normalize_metadata(row["metadata"]),
|
||||||
|
occurred_at=row["occurred_at"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _row_to_artifact(self, row: dict[str, Any]) -> DocumentArtifact:
|
||||||
|
"""Map one artifact row into the domain dataclass."""
|
||||||
|
return DocumentArtifact(
|
||||||
|
artifact_id=row["artifact_id"],
|
||||||
|
doc_id=row["doc_id"],
|
||||||
|
run_id=row["run_id"],
|
||||||
|
artifact_type=row["artifact_type"],
|
||||||
|
object_name=row["object_name"],
|
||||||
|
content_type=row["content_type"],
|
||||||
|
byte_size=row["byte_size"],
|
||||||
|
checksum=row["checksum"],
|
||||||
|
metadata=self._normalize_metadata(row["metadata"]),
|
||||||
|
created_at=row["created_at"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _update_run(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
assignments: dict[str, Any],
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Update one run row and return the latest stored state."""
|
||||||
|
set_clauses = []
|
||||||
|
params: dict[str, Any] = {"run_id": run_id, "updated_at": datetime.now(UTC)}
|
||||||
|
for key, value in assignments.items():
|
||||||
|
set_clauses.append(f"{key} = %({key})s")
|
||||||
|
params[key] = value
|
||||||
|
set_clauses.append("updated_at = %(updated_at)s")
|
||||||
|
if metadata is not None:
|
||||||
|
set_clauses.append("metadata = COALESCE(metadata, '{}'::jsonb) || %(metadata)s::jsonb")
|
||||||
|
params["metadata"] = json.dumps(metadata, ensure_ascii=False)
|
||||||
|
sql = f"""
|
||||||
|
UPDATE document_processing_runs
|
||||||
|
SET {", ".join(set_clauses)}
|
||||||
|
WHERE run_id = %(run_id)s
|
||||||
|
RETURNING *
|
||||||
|
"""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, params)
|
||||||
|
row = cur.fetchone()
|
||||||
|
conn.commit()
|
||||||
|
return self._row_to_run(dict(row)) if row else None
|
||||||
|
|
||||||
|
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
|
||||||
|
"""Create a new processing run record."""
|
||||||
|
sql = """
|
||||||
|
INSERT INTO document_processing_runs
|
||||||
|
(run_id, doc_id, trigger_type, run_status, parser_backend, chunk_backend,
|
||||||
|
embedding_model, index_name, started_at, stored_at, parsed_at, indexed_at,
|
||||||
|
finished_at, layout_count, structure_node_count, semantic_block_count,
|
||||||
|
vector_chunk_count, chunk_count, failure_stage, error_message, metadata)
|
||||||
|
VALUES
|
||||||
|
(%(run_id)s, %(doc_id)s, %(trigger_type)s, %(run_status)s, %(parser_backend)s,
|
||||||
|
%(chunk_backend)s, %(embedding_model)s, %(index_name)s, %(started_at)s,
|
||||||
|
%(stored_at)s, %(parsed_at)s, %(indexed_at)s, %(finished_at)s, %(layout_count)s,
|
||||||
|
%(structure_node_count)s, %(semantic_block_count)s, %(vector_chunk_count)s,
|
||||||
|
%(chunk_count)s, %(failure_stage)s, %(error_message)s, %(metadata)s)
|
||||||
|
"""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
sql,
|
||||||
|
{
|
||||||
|
"run_id": run.run_id,
|
||||||
|
"doc_id": run.doc_id,
|
||||||
|
"trigger_type": run.trigger_type,
|
||||||
|
"run_status": run.run_status,
|
||||||
|
"parser_backend": run.parser_backend,
|
||||||
|
"chunk_backend": run.chunk_backend,
|
||||||
|
"embedding_model": run.embedding_model,
|
||||||
|
"index_name": run.index_name,
|
||||||
|
"started_at": run.started_at,
|
||||||
|
"stored_at": run.stored_at,
|
||||||
|
"parsed_at": run.parsed_at,
|
||||||
|
"indexed_at": run.indexed_at,
|
||||||
|
"finished_at": run.finished_at,
|
||||||
|
"layout_count": run.layout_count,
|
||||||
|
"structure_node_count": run.structure_node_count,
|
||||||
|
"semantic_block_count": run.semantic_block_count,
|
||||||
|
"vector_chunk_count": run.vector_chunk_count,
|
||||||
|
"chunk_count": run.chunk_count,
|
||||||
|
"failure_stage": run.failure_stage,
|
||||||
|
"error_message": run.error_message,
|
||||||
|
"metadata": json.dumps(run.metadata, ensure_ascii=False),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_stored(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
stored_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as having persisted its source file."""
|
||||||
|
return self._update_run(
|
||||||
|
run_id,
|
||||||
|
assignments={"stored_at": stored_at or datetime.now(UTC)},
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
def mark_run_parsed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
parser_backend: str,
|
||||||
|
layout_count: int,
|
||||||
|
structure_node_count: int,
|
||||||
|
semantic_block_count: int,
|
||||||
|
vector_chunk_count: int,
|
||||||
|
parsed_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record parse completion metrics for a run."""
|
||||||
|
return self._update_run(
|
||||||
|
run_id,
|
||||||
|
assignments={
|
||||||
|
"parser_backend": parser_backend,
|
||||||
|
"parsed_at": parsed_at or datetime.now(UTC),
|
||||||
|
"layout_count": layout_count,
|
||||||
|
"structure_node_count": structure_node_count,
|
||||||
|
"semantic_block_count": semantic_block_count,
|
||||||
|
"vector_chunk_count": vector_chunk_count,
|
||||||
|
},
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
def mark_run_indexed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
chunk_count: int,
|
||||||
|
index_name: str,
|
||||||
|
indexed_at: datetime | None = None,
|
||||||
|
finished_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as successfully indexed."""
|
||||||
|
now = datetime.now(UTC)
|
||||||
|
return self._update_run(
|
||||||
|
run_id,
|
||||||
|
assignments={
|
||||||
|
"run_status": "succeeded",
|
||||||
|
"chunk_count": chunk_count,
|
||||||
|
"index_name": index_name,
|
||||||
|
"indexed_at": indexed_at or now,
|
||||||
|
"finished_at": finished_at or now,
|
||||||
|
},
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
def mark_run_failed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
failure_stage: str,
|
||||||
|
error_message: str,
|
||||||
|
finished_at: datetime | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Mark a run as failed and persist the terminal error details."""
|
||||||
|
return self._update_run(
|
||||||
|
run_id,
|
||||||
|
assignments={
|
||||||
|
"run_status": "failed",
|
||||||
|
"failure_stage": failure_stage,
|
||||||
|
"error_message": error_message,
|
||||||
|
"finished_at": finished_at or datetime.now(UTC),
|
||||||
|
},
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
|
||||||
|
"""Append a document status event."""
|
||||||
|
sql = """
|
||||||
|
INSERT INTO document_status_history
|
||||||
|
(event_id, doc_id, run_id, from_status, to_status, stage, message, metadata, occurred_at)
|
||||||
|
VALUES
|
||||||
|
(%(event_id)s, %(doc_id)s, %(run_id)s, %(from_status)s, %(to_status)s,
|
||||||
|
%(stage)s, %(message)s, %(metadata)s, %(occurred_at)s)
|
||||||
|
"""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
sql,
|
||||||
|
{
|
||||||
|
"event_id": event.event_id,
|
||||||
|
"doc_id": event.doc_id,
|
||||||
|
"run_id": event.run_id,
|
||||||
|
"from_status": event.from_status,
|
||||||
|
"to_status": event.to_status,
|
||||||
|
"stage": event.stage,
|
||||||
|
"message": event.message,
|
||||||
|
"metadata": json.dumps(event.metadata, ensure_ascii=False),
|
||||||
|
"occurred_at": event.occurred_at,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return event
|
||||||
|
|
||||||
|
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
|
||||||
|
"""Replace all artifact references for one run using a delete-then-insert strategy."""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("DELETE FROM document_artifacts WHERE run_id = %s", (run_id,))
|
||||||
|
if artifacts:
|
||||||
|
psycopg2.extras.execute_values(
|
||||||
|
cur,
|
||||||
|
"""
|
||||||
|
INSERT INTO document_artifacts
|
||||||
|
(artifact_id, doc_id, run_id, artifact_type, object_name,
|
||||||
|
content_type, byte_size, checksum, metadata, created_at)
|
||||||
|
VALUES %s
|
||||||
|
""",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
artifact.artifact_id,
|
||||||
|
artifact.doc_id,
|
||||||
|
artifact.run_id,
|
||||||
|
artifact.artifact_type,
|
||||||
|
artifact.object_name,
|
||||||
|
artifact.content_type,
|
||||||
|
artifact.byte_size,
|
||||||
|
artifact.checksum,
|
||||||
|
json.dumps(artifact.metadata, ensure_ascii=False),
|
||||||
|
artifact.created_at,
|
||||||
|
)
|
||||||
|
for artifact in artifacts
|
||||||
|
],
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return artifacts
|
||||||
|
|
||||||
|
def delete_by_document(self, doc_id: str) -> None:
|
||||||
|
"""Delete all processing rows for a document explicitly."""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("DELETE FROM document_status_history WHERE doc_id = %s", (doc_id,))
|
||||||
|
cur.execute("DELETE FROM document_artifacts WHERE doc_id = %s", (doc_id,))
|
||||||
|
cur.execute("DELETE FROM document_processing_runs WHERE doc_id = %s", (doc_id,))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
|
||||||
|
"""List processing runs for a document in chronological order."""
|
||||||
|
sql = "SELECT * FROM document_processing_runs WHERE doc_id = %s ORDER BY started_at ASC"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (doc_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [self._row_to_run(dict(row)) for row in rows]
|
||||||
|
|
||||||
|
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
|
||||||
|
"""Return one processing run by identifier."""
|
||||||
|
sql = "SELECT * FROM document_processing_runs WHERE run_id = %s"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (run_id,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
return self._row_to_run(dict(row)) if row else None
|
||||||
|
|
||||||
|
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List all status events for a document."""
|
||||||
|
sql = "SELECT * FROM document_status_history WHERE doc_id = %s ORDER BY occurred_at ASC"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (doc_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [self._row_to_event(dict(row)) for row in rows]
|
||||||
|
|
||||||
|
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""List all status events for a run."""
|
||||||
|
sql = "SELECT * FROM document_status_history WHERE run_id = %s ORDER BY occurred_at ASC"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (run_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [self._row_to_event(dict(row)) for row in rows]
|
||||||
|
|
||||||
|
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List all artifact references for a document."""
|
||||||
|
sql = "SELECT * FROM document_artifacts WHERE doc_id = %s ORDER BY created_at ASC"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (doc_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [self._row_to_artifact(dict(row)) for row in rows]
|
||||||
|
|
||||||
|
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""List all artifact references for a run."""
|
||||||
|
sql = "SELECT * FROM document_artifacts WHERE run_id = %s ORDER BY created_at ASC"
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
|
cur.execute(sql, (run_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [self._row_to_artifact(dict(row)) for row in rows]
|
||||||
@@ -20,8 +20,10 @@ from app.infrastructure.parser.local_document_parser import LocalDocumentParser
|
|||||||
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
|
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
|
||||||
from app.infrastructure.perception.mock_event_store import MockEventStore
|
from app.infrastructure.perception.mock_event_store import MockEventStore
|
||||||
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
|
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
|
||||||
|
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
|
||||||
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
|
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
|
||||||
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
|
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
|
||||||
|
from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore
|
||||||
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
|
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
|
||||||
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
|
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
|
||||||
from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
|
from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
|
||||||
@@ -148,6 +150,14 @@ def get_parse_artifact_store():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def get_document_processing_store():
|
||||||
|
"""Return document processing store for the active repository backend."""
|
||||||
|
if settings.document_repository_backend == "postgres":
|
||||||
|
return PostgresDocumentProcessingStore()
|
||||||
|
return JsonDocumentProcessingStore(settings.document_processing_metadata_path)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_binary_store() -> DocumentBinaryStore:
|
def get_binary_store() -> DocumentBinaryStore:
|
||||||
"""Return binary store."""
|
"""Return binary store."""
|
||||||
@@ -226,6 +236,7 @@ def get_document_command_service() -> DocumentCommandService:
|
|||||||
embedding_provider=get_embedding_provider(),
|
embedding_provider=get_embedding_provider(),
|
||||||
vector_index=get_vector_index(),
|
vector_index=get_vector_index(),
|
||||||
parse_artifact_store=get_parse_artifact_store(),
|
parse_artifact_store=get_parse_artifact_store(),
|
||||||
|
document_processing_store=get_document_processing_store(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Outlet, useLocation } from 'react-router-dom';
|
import { useLocation } from 'react-router-dom';
|
||||||
|
|
||||||
import { FooterLayout } from './FooterLayout';
|
import { FooterLayout } from './FooterLayout';
|
||||||
import { HeaderLayout } from './HeaderLayout';
|
import { HeaderLayout } from './HeaderLayout';
|
||||||
@@ -15,7 +15,6 @@ export function AppShell() {
|
|||||||
<HeaderLayout activeTab={activeTab} />
|
<HeaderLayout activeTab={activeTab} />
|
||||||
<ContentLayout tab={activeTab}>
|
<ContentLayout tab={activeTab}>
|
||||||
<KeepAliveViewport activeTab={activeTab} />
|
<KeepAliveViewport activeTab={activeTab} />
|
||||||
<Outlet />
|
|
||||||
</ContentLayout>
|
</ContentLayout>
|
||||||
<FooterLayout />
|
<FooterLayout />
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -22,12 +22,12 @@ export function ContentLayout({ children, tab }: ContentLayoutProps) {
|
|||||||
<div
|
<div
|
||||||
className={[
|
className={[
|
||||||
shellFrameClassName,
|
shellFrameClassName,
|
||||||
'relative flex min-h-0 flex-1 py-8',
|
'relative flex min-h-0 flex-1 justify-center py-8',
|
||||||
].join(' ')}
|
].join(' ')}
|
||||||
>
|
>
|
||||||
<div
|
<div
|
||||||
className={[
|
className={[
|
||||||
'relative flex w-full min-h-0 flex-1',
|
'relative flex min-h-0 w-full',
|
||||||
widthClass,
|
widthClass,
|
||||||
tab.fillHeight ? 'overflow-hidden' : '',
|
tab.fillHeight ? 'overflow-hidden' : '',
|
||||||
].join(' ')}
|
].join(' ')}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ export const appTabs: AppTabConfig[] = [
|
|||||||
label: '合规分析',
|
label: '合规分析',
|
||||||
component: CompliancePage,
|
component: CompliancePage,
|
||||||
keepAlive: true,
|
keepAlive: true,
|
||||||
contentWidth: 'full',
|
contentWidth: 'wide',
|
||||||
fillHeight: true,
|
fillHeight: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -61,7 +61,7 @@ export const appTabs: AppTabConfig[] = [
|
|||||||
label: '法规对话',
|
label: '法规对话',
|
||||||
component: RagChatPage,
|
component: RagChatPage,
|
||||||
keepAlive: true,
|
keepAlive: true,
|
||||||
contentWidth: 'full',
|
contentWidth: 'wide',
|
||||||
fillHeight: true,
|
fillHeight: true,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ dependencies = [
|
|||||||
"redis>=4.5.0",
|
"redis>=4.5.0",
|
||||||
"minio>=7.1.0",
|
"minio>=7.1.0",
|
||||||
"psycopg2-binary>=2.9.0",
|
"psycopg2-binary>=2.9.0",
|
||||||
"sqlalchemy>=2.0.0",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
|
|||||||
216
tests/test_document_processing_store.py
Normal file
216
tests/test_document_processing_store.py
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
"""Test PostgreSQL-backed document processing history storage."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.domain.documents import Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent
|
||||||
|
from app.infrastructure.storage.postgres_document_processing_store import PostgresDocumentProcessingStore
|
||||||
|
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
|
||||||
|
# Keep these tests focused on relational storage behavior only.
|
||||||
|
|
||||||
|
|
||||||
|
def _build_document(doc_id: str) -> Document:
|
||||||
|
"""Create a minimal document row required by the foreign keys."""
|
||||||
|
return Document(
|
||||||
|
doc_id=doc_id,
|
||||||
|
doc_name="Processing Test",
|
||||||
|
file_name="processing-test.pdf",
|
||||||
|
object_name=f"{doc_id}/processing-test.pdf",
|
||||||
|
content_type="application/pdf",
|
||||||
|
size_bytes=128,
|
||||||
|
status=DocumentStatus.PENDING,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _connectivity_ready() -> bool:
|
||||||
|
"""Return whether the configured PostgreSQL instance is reachable for integration tests."""
|
||||||
|
try:
|
||||||
|
repository = PostgresDocumentRepository()
|
||||||
|
except psycopg2.Error:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
repository.list(limit=1)
|
||||||
|
return True
|
||||||
|
except psycopg2.Error:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.skipif(not _connectivity_ready(), reason="PostgreSQL test backend is not reachable")
|
||||||
|
|
||||||
|
|
||||||
|
def test_postgres_document_processing_store_supports_full_run_lifecycle():
|
||||||
|
"""Persist run, event, and artifact history and read it back as dataclasses."""
|
||||||
|
repository = PostgresDocumentRepository()
|
||||||
|
store = PostgresDocumentProcessingStore()
|
||||||
|
doc_id = f"proc-{uuid.uuid4().hex[:10]}"
|
||||||
|
run_id = f"run-{uuid.uuid4().hex[:10]}"
|
||||||
|
base_time = datetime.now(UTC)
|
||||||
|
|
||||||
|
repository.create(_build_document(doc_id))
|
||||||
|
try:
|
||||||
|
created = store.create_run(
|
||||||
|
DocumentProcessingRun(
|
||||||
|
run_id=run_id,
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type="upload",
|
||||||
|
run_status="running",
|
||||||
|
parser_backend="aliyun",
|
||||||
|
chunk_backend="aliyun",
|
||||||
|
embedding_model="text-embedding-v3",
|
||||||
|
started_at=base_time,
|
||||||
|
metadata={"origin": "test"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True})
|
||||||
|
parsed = store.mark_run_parsed(
|
||||||
|
run_id,
|
||||||
|
parser_backend="fake_parser",
|
||||||
|
layout_count=2,
|
||||||
|
structure_node_count=3,
|
||||||
|
semantic_block_count=4,
|
||||||
|
vector_chunk_count=5,
|
||||||
|
parsed_at=base_time,
|
||||||
|
metadata={"parse_task_id": "task-1"},
|
||||||
|
)
|
||||||
|
indexed = store.mark_run_indexed(
|
||||||
|
run_id,
|
||||||
|
chunk_count=6,
|
||||||
|
index_name="regulations_dense_1024_v1",
|
||||||
|
indexed_at=base_time,
|
||||||
|
finished_at=base_time,
|
||||||
|
metadata={"collection": "regulations_dense_1024_v1"},
|
||||||
|
)
|
||||||
|
event = store.append_status_event(
|
||||||
|
DocumentStatusEvent(
|
||||||
|
event_id=f"evt-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status="parsed",
|
||||||
|
to_status="indexed",
|
||||||
|
stage="index",
|
||||||
|
message="Indexed successfully",
|
||||||
|
metadata={"chunk_count": 6},
|
||||||
|
occurred_at=base_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
artifacts = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name=f"artifacts/{doc_id}/layouts.json",
|
||||||
|
content_type="application/json",
|
||||||
|
created_at=base_time,
|
||||||
|
),
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="vector_chunks",
|
||||||
|
object_name=f"artifacts/{doc_id}/vector_chunks.json",
|
||||||
|
content_type="application/json",
|
||||||
|
created_at=base_time,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
fetched = store.get_run(run_id)
|
||||||
|
run_rows = store.list_runs_by_document(doc_id)
|
||||||
|
event_rows = store.list_status_events_by_document(doc_id)
|
||||||
|
artifact_rows = store.list_artifacts_by_run(run_id)
|
||||||
|
|
||||||
|
assert created.run_id == run_id
|
||||||
|
assert stored is not None and stored.stored_at is not None
|
||||||
|
assert parsed is not None and parsed.parser_backend == "fake_parser"
|
||||||
|
assert indexed is not None and indexed.run_status == "succeeded"
|
||||||
|
assert fetched is not None and fetched.chunk_count == 6
|
||||||
|
assert isinstance(run_rows[0], DocumentProcessingRun)
|
||||||
|
assert isinstance(event_rows[0], DocumentStatusEvent)
|
||||||
|
assert isinstance(artifact_rows[0], DocumentArtifact)
|
||||||
|
assert event_rows[0].event_id == event.event_id
|
||||||
|
assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows}
|
||||||
|
finally:
|
||||||
|
store.delete_by_document(doc_id)
|
||||||
|
repository.delete(doc_id)
|
||||||
|
|
||||||
|
|
||||||
|
def test_postgres_document_processing_store_replaces_artifacts_and_deletes_document_data():
|
||||||
|
"""Replace artifact rows idempotently and remove all history rows for one document."""
|
||||||
|
repository = PostgresDocumentRepository()
|
||||||
|
store = PostgresDocumentProcessingStore()
|
||||||
|
doc_id = f"proc-{uuid.uuid4().hex[:10]}"
|
||||||
|
run_id = f"run-{uuid.uuid4().hex[:10]}"
|
||||||
|
|
||||||
|
repository.create(_build_document(doc_id))
|
||||||
|
try:
|
||||||
|
store.create_run(
|
||||||
|
DocumentProcessingRun(
|
||||||
|
run_id=run_id,
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type="retry",
|
||||||
|
run_status="running",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
first = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name=f"artifacts/{doc_id}/layouts-v1.json",
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
second = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id=f"art-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name=f"artifacts/{doc_id}/layouts-v2.json",
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
store.append_status_event(
|
||||||
|
DocumentStatusEvent(
|
||||||
|
event_id=f"evt-{uuid.uuid4().hex[:10]}",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status="pending",
|
||||||
|
to_status="failed",
|
||||||
|
stage="parse",
|
||||||
|
message="failed",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom")
|
||||||
|
|
||||||
|
artifact_rows = store.list_artifacts_by_run(run_id)
|
||||||
|
assert len(first) == 1
|
||||||
|
assert len(second) == 1
|
||||||
|
assert len(artifact_rows) == 1
|
||||||
|
assert artifact_rows[0].object_name.endswith("layouts-v2.json")
|
||||||
|
assert failed is not None and failed.run_status == "failed"
|
||||||
|
|
||||||
|
store.delete_by_document(doc_id)
|
||||||
|
|
||||||
|
assert store.list_runs_by_document(doc_id) == []
|
||||||
|
assert store.list_status_events_by_document(doc_id) == []
|
||||||
|
assert store.list_artifacts_by_document(doc_id) == []
|
||||||
|
finally:
|
||||||
|
repository.delete(doc_id)
|
||||||
@@ -3,13 +3,18 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from app.application.documents.services import DocumentCommandService
|
from app.application.documents.services import DocumentCommandService
|
||||||
from app.domain.documents import Chunk, Document, DocumentStatus, ParsedDocument
|
from app.domain.documents import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
|
||||||
|
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
|
||||||
|
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
|
||||||
from app.shared import bootstrap
|
from app.shared import bootstrap
|
||||||
|
|
||||||
|
|
||||||
class FakeRepository:
|
class FakeRepository:
|
||||||
|
"""Store document rows in memory for application service tests."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.documents: dict[str, Document] = {}
|
self.documents: dict[str, Document] = {}
|
||||||
|
|
||||||
@@ -25,9 +30,14 @@ class FakeRepository:
|
|||||||
return self.documents.get(doc_id)
|
return self.documents.get(doc_id)
|
||||||
|
|
||||||
def list(self, limit: int | None = None) -> list[Document]:
|
def list(self, limit: int | None = None) -> list[Document]:
|
||||||
|
"""Return stored documents in insertion order."""
|
||||||
values = list(self.documents.values())
|
values = list(self.documents.values())
|
||||||
return values[:limit] if limit is not None else values
|
return values[:limit] if limit is not None else values
|
||||||
|
|
||||||
|
def delete(self, doc_id: str) -> bool:
|
||||||
|
"""Delete one document from the in-memory repository."""
|
||||||
|
return self.documents.pop(doc_id, None) is not None
|
||||||
|
|
||||||
def update_status(
|
def update_status(
|
||||||
self,
|
self,
|
||||||
doc_id: str,
|
doc_id: str,
|
||||||
@@ -62,6 +72,8 @@ class FakeRepository:
|
|||||||
|
|
||||||
|
|
||||||
class FakeBinaryStore:
|
class FakeBinaryStore:
|
||||||
|
"""Store binary payloads in memory for upload and retry tests."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.saved: dict[str, bytes] = {}
|
self.saved: dict[str, bytes] = {}
|
||||||
|
|
||||||
@@ -76,6 +88,8 @@ class FakeBinaryStore:
|
|||||||
|
|
||||||
|
|
||||||
class FakeParser:
|
class FakeParser:
|
||||||
|
"""Return a stable parsed document for deterministic service tests."""
|
||||||
|
|
||||||
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
|
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
|
||||||
return ParsedDocument(
|
return ParsedDocument(
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
@@ -101,6 +115,8 @@ class FakeParser:
|
|||||||
|
|
||||||
|
|
||||||
class FakeChunkBuilder:
|
class FakeChunkBuilder:
|
||||||
|
"""Build one deterministic chunk from the fake parsed document."""
|
||||||
|
|
||||||
def build(self, *, parsed_document: ParsedDocument, regulation_type: str, version: str) -> list[Chunk]:
|
def build(self, *, parsed_document: ParsedDocument, regulation_type: str, version: str) -> list[Chunk]:
|
||||||
return [
|
return [
|
||||||
Chunk(
|
Chunk(
|
||||||
@@ -122,6 +138,8 @@ class FakeChunkBuilder:
|
|||||||
|
|
||||||
|
|
||||||
class FakeEmbeddingProvider:
|
class FakeEmbeddingProvider:
|
||||||
|
"""Capture embedding calls and return fixed-length vectors."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.calls: list[list[str]] = []
|
self.calls: list[list[str]] = []
|
||||||
|
|
||||||
@@ -134,6 +152,8 @@ class FakeEmbeddingProvider:
|
|||||||
|
|
||||||
|
|
||||||
class FakeVectorIndex:
|
class FakeVectorIndex:
|
||||||
|
"""Capture vector upserts for service assertions."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.upserts: list[tuple[list[Chunk], list[list[float]]]] = []
|
self.upserts: list[tuple[list[Chunk], list[list[float]]]] = []
|
||||||
|
|
||||||
@@ -151,11 +171,159 @@ class FakeVectorIndex:
|
|||||||
return {"collection_name": "regulations_dense_1024_v1"}
|
return {"collection_name": "regulations_dense_1024_v1"}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FakeProcessingStore:
|
||||||
|
"""Record processing history method calls for orchestration assertions."""
|
||||||
|
|
||||||
|
runs: list[DocumentProcessingRun] = None
|
||||||
|
status_events: list[DocumentStatusEvent] = None
|
||||||
|
artifact_batches: list[list[DocumentArtifact]] = None
|
||||||
|
deleted_doc_ids: list[str] = None
|
||||||
|
stored_run_ids: list[str] = None
|
||||||
|
parsed_calls: list[dict] = None
|
||||||
|
indexed_calls: list[dict] = None
|
||||||
|
failed_calls: list[dict] = None
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
"""Initialize mutable call collections for each fake instance."""
|
||||||
|
self.runs = []
|
||||||
|
self.status_events = []
|
||||||
|
self.artifact_batches = []
|
||||||
|
self.deleted_doc_ids = []
|
||||||
|
self.stored_run_ids = []
|
||||||
|
self.parsed_calls = []
|
||||||
|
self.indexed_calls = []
|
||||||
|
self.failed_calls = []
|
||||||
|
|
||||||
|
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
|
||||||
|
"""Store the created run and return it unchanged."""
|
||||||
|
self.runs.append(run)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def mark_run_stored(self, run_id: str, *, stored_at=None, metadata: dict | None = None) -> DocumentProcessingRun | None:
|
||||||
|
"""Record that one run reached the stored stage."""
|
||||||
|
self.stored_run_ids.append(run_id)
|
||||||
|
return next((run for run in self.runs if run.run_id == run_id), None)
|
||||||
|
|
||||||
|
def mark_run_parsed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
parser_backend: str,
|
||||||
|
layout_count: int,
|
||||||
|
structure_node_count: int,
|
||||||
|
semantic_block_count: int,
|
||||||
|
vector_chunk_count: int,
|
||||||
|
parsed_at=None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record parse metrics for one run."""
|
||||||
|
self.parsed_calls.append(
|
||||||
|
{
|
||||||
|
"run_id": run_id,
|
||||||
|
"parser_backend": parser_backend,
|
||||||
|
"layout_count": layout_count,
|
||||||
|
"structure_node_count": structure_node_count,
|
||||||
|
"semantic_block_count": semantic_block_count,
|
||||||
|
"vector_chunk_count": vector_chunk_count,
|
||||||
|
"metadata": metadata or {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return next((run for run in self.runs if run.run_id == run_id), None)
|
||||||
|
|
||||||
|
def mark_run_indexed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
chunk_count: int,
|
||||||
|
index_name: str,
|
||||||
|
indexed_at=None,
|
||||||
|
finished_at=None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record index completion for one run."""
|
||||||
|
self.indexed_calls.append(
|
||||||
|
{
|
||||||
|
"run_id": run_id,
|
||||||
|
"chunk_count": chunk_count,
|
||||||
|
"index_name": index_name,
|
||||||
|
"metadata": metadata or {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return next((run for run in self.runs if run.run_id == run_id), None)
|
||||||
|
|
||||||
|
def mark_run_failed(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
*,
|
||||||
|
failure_stage: str,
|
||||||
|
error_message: str,
|
||||||
|
finished_at=None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> DocumentProcessingRun | None:
|
||||||
|
"""Record terminal failure details for one run."""
|
||||||
|
self.failed_calls.append(
|
||||||
|
{
|
||||||
|
"run_id": run_id,
|
||||||
|
"failure_stage": failure_stage,
|
||||||
|
"error_message": error_message,
|
||||||
|
"metadata": metadata or {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return next((run for run in self.runs if run.run_id == run_id), None)
|
||||||
|
|
||||||
|
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
|
||||||
|
"""Store one status event."""
|
||||||
|
self.status_events.append(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
|
||||||
|
"""Store one artifact replacement batch."""
|
||||||
|
self.artifact_batches.append(artifacts)
|
||||||
|
return artifacts
|
||||||
|
|
||||||
|
def delete_by_document(self, doc_id: str) -> None:
|
||||||
|
"""Record an explicit document-history delete request."""
|
||||||
|
self.deleted_doc_ids.append(doc_id)
|
||||||
|
|
||||||
|
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
|
||||||
|
"""Return runs for completeness of the fake port."""
|
||||||
|
return [run for run in self.runs if run.doc_id == doc_id]
|
||||||
|
|
||||||
|
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
|
||||||
|
"""Return one run for completeness of the fake port."""
|
||||||
|
return next((run for run in self.runs if run.run_id == run_id), None)
|
||||||
|
|
||||||
|
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""Return status events for completeness of the fake port."""
|
||||||
|
return [event for event in self.status_events if event.doc_id == doc_id]
|
||||||
|
|
||||||
|
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
|
||||||
|
"""Return status events for completeness of the fake port."""
|
||||||
|
return [event for event in self.status_events if event.run_id == run_id]
|
||||||
|
|
||||||
|
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""Return artifact references for completeness of the fake port."""
|
||||||
|
return [artifact for batch in self.artifact_batches for artifact in batch if artifact.doc_id == doc_id]
|
||||||
|
|
||||||
|
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
|
||||||
|
"""Return artifact references for completeness of the fake port."""
|
||||||
|
return [artifact for batch in self.artifact_batches for artifact in batch if artifact.run_id == run_id]
|
||||||
|
|
||||||
|
|
||||||
|
class FailingParser:
|
||||||
|
"""Raise a deterministic parser failure for failure-stage assertions."""
|
||||||
|
|
||||||
|
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
|
||||||
|
raise RuntimeError("parser exploded")
|
||||||
|
|
||||||
|
|
||||||
def test_document_command_service_uses_1024_dense_embedding_and_updates_status():
|
def test_document_command_service_uses_1024_dense_embedding_and_updates_status():
|
||||||
repository = FakeRepository()
|
repository = FakeRepository()
|
||||||
binary_store = FakeBinaryStore()
|
binary_store = FakeBinaryStore()
|
||||||
embedding_provider = FakeEmbeddingProvider()
|
embedding_provider = FakeEmbeddingProvider()
|
||||||
vector_index = FakeVectorIndex()
|
vector_index = FakeVectorIndex()
|
||||||
|
processing_store = FakeProcessingStore()
|
||||||
service = DocumentCommandService(
|
service = DocumentCommandService(
|
||||||
document_repository=repository,
|
document_repository=repository,
|
||||||
binary_store=binary_store,
|
binary_store=binary_store,
|
||||||
@@ -163,6 +331,7 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status()
|
|||||||
chunk_builder=FakeChunkBuilder(),
|
chunk_builder=FakeChunkBuilder(),
|
||||||
embedding_provider=embedding_provider,
|
embedding_provider=embedding_provider,
|
||||||
vector_index=vector_index,
|
vector_index=vector_index,
|
||||||
|
document_processing_store=processing_store,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = service.upload_and_process(
|
result = service.upload_and_process(
|
||||||
@@ -188,6 +357,264 @@ def test_document_command_service_uses_1024_dense_embedding_and_updates_status()
|
|||||||
assert stored.index_name == "regulations_dense_1024_v1"
|
assert stored.index_name == "regulations_dense_1024_v1"
|
||||||
assert stored.metadata["parse_task_id"] == "task-123"
|
assert stored.metadata["parse_task_id"] == "task-123"
|
||||||
assert stored.metadata["artifact_keys"]["vector_chunks"].endswith("/vector_chunks.json")
|
assert stored.metadata["artifact_keys"]["vector_chunks"].endswith("/vector_chunks.json")
|
||||||
|
assert len(processing_store.runs) == 1
|
||||||
|
assert processing_store.runs[0].trigger_type == "upload"
|
||||||
|
assert processing_store.stored_run_ids == [processing_store.runs[0].run_id]
|
||||||
|
assert processing_store.parsed_calls[0]["vector_chunk_count"] == 1
|
||||||
|
assert processing_store.indexed_calls[0]["index_name"] == "regulations_dense_1024_v1"
|
||||||
|
assert [event.to_status for event in processing_store.status_events] == ["pending", "stored", "parsed", "indexed"]
|
||||||
|
assert {artifact.artifact_type for artifact in processing_store.artifact_batches[0]} == {
|
||||||
|
"layouts",
|
||||||
|
"structure_nodes",
|
||||||
|
"semantic_blocks",
|
||||||
|
"vector_chunks",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_retry_marks_processing_run_as_retry():
|
||||||
|
repository = FakeRepository()
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
embedding_provider = FakeEmbeddingProvider()
|
||||||
|
vector_index = FakeVectorIndex()
|
||||||
|
processing_store = FakeProcessingStore()
|
||||||
|
|
||||||
|
repository.create(
|
||||||
|
Document(
|
||||||
|
doc_id="doc-retry",
|
||||||
|
doc_name="Retry Doc",
|
||||||
|
file_name="retry.pdf",
|
||||||
|
object_name="doc-retry/retry.pdf",
|
||||||
|
content_type="application/pdf",
|
||||||
|
size_bytes=4,
|
||||||
|
regulation_type="车辆安全",
|
||||||
|
version="2026",
|
||||||
|
metadata={"generate_summary": False},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
binary_store.save(
|
||||||
|
object_name="doc-retry/retry.pdf",
|
||||||
|
data=b"data",
|
||||||
|
content_type="application/pdf",
|
||||||
|
metadata={"doc_id": "doc-retry"},
|
||||||
|
)
|
||||||
|
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FakeParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=embedding_provider,
|
||||||
|
vector_index=vector_index,
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = service.retry("doc-retry")
|
||||||
|
|
||||||
|
assert result.status == "indexed"
|
||||||
|
assert processing_store.runs[0].trigger_type == "retry"
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_records_failed_processing_stage():
|
||||||
|
repository = FakeRepository()
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
embedding_provider = FakeEmbeddingProvider()
|
||||||
|
vector_index = FakeVectorIndex()
|
||||||
|
processing_store = FakeProcessingStore()
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FailingParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=embedding_provider,
|
||||||
|
vector_index=vector_index,
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = service.upload_and_process(
|
||||||
|
doc_id="doc-fail",
|
||||||
|
file_name="test.pdf",
|
||||||
|
content=b"dummy pdf bytes",
|
||||||
|
content_type="application/pdf",
|
||||||
|
doc_name="测试法规",
|
||||||
|
regulation_type="车辆安全",
|
||||||
|
version="2026",
|
||||||
|
generate_summary=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.status == "failed"
|
||||||
|
assert processing_store.failed_calls[0]["failure_stage"] == "parse"
|
||||||
|
assert processing_store.status_events[-1].to_status == "failed"
|
||||||
|
assert repository.get("doc-fail").metadata["failure_stage"] == "parse"
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_delete_cleans_processing_history_when_present():
|
||||||
|
repository = FakeRepository()
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
vector_index = FakeVectorIndex()
|
||||||
|
processing_store = FakeProcessingStore()
|
||||||
|
repository.create(
|
||||||
|
Document(
|
||||||
|
doc_id="doc-delete",
|
||||||
|
doc_name="Delete Doc",
|
||||||
|
file_name="delete.pdf",
|
||||||
|
object_name="doc-delete/delete.pdf",
|
||||||
|
content_type="application/pdf",
|
||||||
|
size_bytes=4,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FakeParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=FakeEmbeddingProvider(),
|
||||||
|
vector_index=vector_index,
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
deleted = service.delete("doc-delete")
|
||||||
|
|
||||||
|
assert deleted is True
|
||||||
|
assert processing_store.deleted_doc_ids == ["doc-delete"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_persists_processing_history_with_json_store(tmp_path: Path):
|
||||||
|
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
|
||||||
|
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
embedding_provider = FakeEmbeddingProvider()
|
||||||
|
vector_index = FakeVectorIndex()
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FakeParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=embedding_provider,
|
||||||
|
vector_index=vector_index,
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = service.upload_and_process(
|
||||||
|
doc_id="doc-json-flow",
|
||||||
|
file_name="test.pdf",
|
||||||
|
content=b"dummy pdf bytes",
|
||||||
|
content_type="application/pdf",
|
||||||
|
doc_name="测试法规",
|
||||||
|
regulation_type="车辆安全",
|
||||||
|
version="2026",
|
||||||
|
generate_summary=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
stored = repository.get("doc-json-flow")
|
||||||
|
runs = processing_store.list_runs_by_document("doc-json-flow")
|
||||||
|
events = processing_store.list_status_events_by_document("doc-json-flow")
|
||||||
|
artifacts = processing_store.list_artifacts_by_document("doc-json-flow")
|
||||||
|
|
||||||
|
assert result.status == "indexed"
|
||||||
|
assert stored is not None and stored.status == DocumentStatus.INDEXED
|
||||||
|
assert len(runs) == 1
|
||||||
|
assert runs[0].trigger_type == "upload"
|
||||||
|
assert runs[0].run_status == "succeeded"
|
||||||
|
assert [event.to_status for event in events] == ["pending", "stored", "parsed", "indexed"]
|
||||||
|
assert {artifact.artifact_type for artifact in artifacts} == {
|
||||||
|
"layouts",
|
||||||
|
"structure_nodes",
|
||||||
|
"semantic_blocks",
|
||||||
|
"vector_chunks",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_retry_creates_second_json_processing_run(tmp_path: Path):
|
||||||
|
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
|
||||||
|
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
repository.create(
|
||||||
|
Document(
|
||||||
|
doc_id="doc-json-retry",
|
||||||
|
doc_name="Retry Doc",
|
||||||
|
file_name="retry.pdf",
|
||||||
|
object_name="doc-json-retry/retry.pdf",
|
||||||
|
content_type="application/pdf",
|
||||||
|
size_bytes=4,
|
||||||
|
regulation_type="车辆安全",
|
||||||
|
version="2026",
|
||||||
|
metadata={"generate_summary": False},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
binary_store.save(
|
||||||
|
object_name="doc-json-retry/retry.pdf",
|
||||||
|
data=b"data",
|
||||||
|
content_type="application/pdf",
|
||||||
|
metadata={"doc_id": "doc-json-retry"},
|
||||||
|
)
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FakeParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=FakeEmbeddingProvider(),
|
||||||
|
vector_index=FakeVectorIndex(),
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
first = service.retry("doc-json-retry")
|
||||||
|
second = service.retry("doc-json-retry")
|
||||||
|
runs = processing_store.list_runs_by_document("doc-json-retry")
|
||||||
|
|
||||||
|
assert first.status == "indexed"
|
||||||
|
assert second.status == "indexed"
|
||||||
|
assert len(runs) == 2
|
||||||
|
assert {run.trigger_type for run in runs} == {"retry"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_command_service_delete_removes_json_processing_history(tmp_path: Path):
|
||||||
|
repository = JsonDocumentRepository(str(tmp_path / "documents.json"))
|
||||||
|
processing_store = JsonDocumentProcessingStore(str(tmp_path / "document_processing.json"))
|
||||||
|
binary_store = FakeBinaryStore()
|
||||||
|
service = DocumentCommandService(
|
||||||
|
document_repository=repository,
|
||||||
|
binary_store=binary_store,
|
||||||
|
parser=FakeParser(),
|
||||||
|
chunk_builder=FakeChunkBuilder(),
|
||||||
|
embedding_provider=FakeEmbeddingProvider(),
|
||||||
|
vector_index=FakeVectorIndex(),
|
||||||
|
document_processing_store=processing_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
service.upload_and_process(
|
||||||
|
doc_id="doc-json-delete",
|
||||||
|
file_name="delete.pdf",
|
||||||
|
content=b"delete me",
|
||||||
|
content_type="application/pdf",
|
||||||
|
doc_name="Delete Doc",
|
||||||
|
regulation_type="车辆安全",
|
||||||
|
version="2026",
|
||||||
|
generate_summary=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
deleted = service.delete("doc-json-delete")
|
||||||
|
|
||||||
|
assert deleted is True
|
||||||
|
assert processing_store.list_runs_by_document("doc-json-delete") == []
|
||||||
|
assert processing_store.list_status_events_by_document("doc-json-delete") == []
|
||||||
|
assert processing_store.list_artifacts_by_document("doc-json-delete") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_returns_json_processing_store_for_json_backend(tmp_path: Path):
|
||||||
|
original_backend = bootstrap.settings.document_repository_backend
|
||||||
|
original_path = bootstrap.settings.document_processing_metadata_path
|
||||||
|
bootstrap.get_document_processing_store.cache_clear()
|
||||||
|
try:
|
||||||
|
bootstrap.settings.document_repository_backend = "json"
|
||||||
|
bootstrap.settings.document_processing_metadata_path = str(tmp_path / "document_processing.json")
|
||||||
|
|
||||||
|
store = bootstrap.get_document_processing_store()
|
||||||
|
|
||||||
|
assert store.__class__.__name__ == "JsonDocumentProcessingStore"
|
||||||
|
finally:
|
||||||
|
bootstrap.settings.document_repository_backend = original_backend
|
||||||
|
bootstrap.settings.document_processing_metadata_path = original_path
|
||||||
|
bootstrap.get_document_processing_store.cache_clear()
|
||||||
|
|
||||||
def test_bootstrap_defaults_to_aliyun_parser_and_chunk_builder():
|
def test_bootstrap_defaults_to_aliyun_parser_and_chunk_builder():
|
||||||
bootstrap.get_parser.cache_clear()
|
bootstrap.get_parser.cache_clear()
|
||||||
|
|||||||
184
tests/test_json_document_processing_store.py
Normal file
184
tests/test_json_document_processing_store.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
"""Test JSON-backed document processing history storage."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.domain.documents import DocumentArtifact, DocumentProcessingRun, DocumentStatusEvent
|
||||||
|
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
|
||||||
|
# Keep JSON processing-store tests focused on local file persistence behavior.
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_document_processing_store_initializes_missing_file(tmp_path: Path):
|
||||||
|
"""Create the backing file with the canonical empty payload on first use."""
|
||||||
|
file_path = tmp_path / "document_processing.json"
|
||||||
|
|
||||||
|
store = JsonDocumentProcessingStore(str(file_path))
|
||||||
|
|
||||||
|
payload = json.loads(file_path.read_text(encoding="utf-8"))
|
||||||
|
assert payload == {"runs": {}, "status_events": {}, "artifacts": {}}
|
||||||
|
assert store.list_runs_by_document("missing") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_document_processing_store_supports_full_run_lifecycle(tmp_path: Path):
|
||||||
|
"""Persist runs, events, and artifacts and read them back as dataclasses."""
|
||||||
|
file_path = tmp_path / "document_processing.json"
|
||||||
|
store = JsonDocumentProcessingStore(str(file_path))
|
||||||
|
doc_id = "doc-json"
|
||||||
|
run_id = "run-json"
|
||||||
|
event_id = "evt-json"
|
||||||
|
base_time = datetime.now(UTC)
|
||||||
|
|
||||||
|
created = store.create_run(
|
||||||
|
DocumentProcessingRun(
|
||||||
|
run_id=run_id,
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type="upload",
|
||||||
|
run_status="running",
|
||||||
|
parser_backend="aliyun",
|
||||||
|
chunk_backend="aliyun",
|
||||||
|
embedding_model="text-embedding-v3",
|
||||||
|
started_at=base_time,
|
||||||
|
metadata={"origin": "json-test"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
stored = store.mark_run_stored(run_id, stored_at=base_time, metadata={"stored": True})
|
||||||
|
parsed = store.mark_run_parsed(
|
||||||
|
run_id,
|
||||||
|
parser_backend="fake_parser",
|
||||||
|
layout_count=1,
|
||||||
|
structure_node_count=2,
|
||||||
|
semantic_block_count=3,
|
||||||
|
vector_chunk_count=4,
|
||||||
|
parsed_at=base_time,
|
||||||
|
metadata={"parse_task_id": "task-json"},
|
||||||
|
)
|
||||||
|
indexed = store.mark_run_indexed(
|
||||||
|
run_id,
|
||||||
|
chunk_count=5,
|
||||||
|
index_name="regulations_dense_1024_v1",
|
||||||
|
indexed_at=base_time,
|
||||||
|
finished_at=base_time,
|
||||||
|
metadata={"collection": "regulations_dense_1024_v1"},
|
||||||
|
)
|
||||||
|
event = store.append_status_event(
|
||||||
|
DocumentStatusEvent(
|
||||||
|
event_id=event_id,
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status="parsed",
|
||||||
|
to_status="indexed",
|
||||||
|
stage="index",
|
||||||
|
message="Indexed",
|
||||||
|
metadata={"chunk_count": 5},
|
||||||
|
occurred_at=base_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
artifacts = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id="art-layouts",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name="artifacts/doc-json/layouts.json",
|
||||||
|
content_type="application/json",
|
||||||
|
created_at=base_time,
|
||||||
|
),
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id="art-vectors",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="vector_chunks",
|
||||||
|
object_name="artifacts/doc-json/vector_chunks.json",
|
||||||
|
content_type="application/json",
|
||||||
|
created_at=base_time,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
fetched = store.get_run(run_id)
|
||||||
|
run_rows = store.list_runs_by_document(doc_id)
|
||||||
|
event_rows = store.list_status_events_by_run(run_id)
|
||||||
|
artifact_rows = store.list_artifacts_by_document(doc_id)
|
||||||
|
|
||||||
|
assert created.run_id == run_id
|
||||||
|
assert stored is not None and stored.metadata["stored"] is True
|
||||||
|
assert parsed is not None and parsed.structure_node_count == 2
|
||||||
|
assert indexed is not None and indexed.run_status == "succeeded"
|
||||||
|
assert fetched is not None and fetched.chunk_count == 5
|
||||||
|
assert run_rows[0].started_at == base_time
|
||||||
|
assert event_rows[0].event_id == event.event_id
|
||||||
|
assert artifact_rows[0].doc_id == doc_id
|
||||||
|
assert {artifact.artifact_type for artifact in artifacts} == {artifact.artifact_type for artifact in artifact_rows}
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_document_processing_store_replaces_artifacts_and_deletes_by_document(tmp_path: Path):
|
||||||
|
"""Replace one run's artifacts idempotently and remove all history for a document."""
|
||||||
|
file_path = tmp_path / "document_processing.json"
|
||||||
|
store = JsonDocumentProcessingStore(str(file_path))
|
||||||
|
doc_id = "doc-delete"
|
||||||
|
run_id = "run-delete"
|
||||||
|
|
||||||
|
store.create_run(
|
||||||
|
DocumentProcessingRun(
|
||||||
|
run_id=run_id,
|
||||||
|
doc_id=doc_id,
|
||||||
|
trigger_type="retry",
|
||||||
|
run_status="running",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
store.append_status_event(
|
||||||
|
DocumentStatusEvent(
|
||||||
|
event_id="evt-delete",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
from_status="pending",
|
||||||
|
to_status="stored",
|
||||||
|
stage="store",
|
||||||
|
occurred_at=datetime.now(UTC),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
first = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id="art-first",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name="artifacts/doc-delete/layouts-v1.json",
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
second = store.replace_artifacts_for_run(
|
||||||
|
run_id,
|
||||||
|
[
|
||||||
|
DocumentArtifact(
|
||||||
|
artifact_id="art-second",
|
||||||
|
doc_id=doc_id,
|
||||||
|
run_id=run_id,
|
||||||
|
artifact_type="layouts",
|
||||||
|
object_name="artifacts/doc-delete/layouts-v2.json",
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
failed = store.mark_run_failed(run_id, failure_stage="parse", error_message="boom")
|
||||||
|
|
||||||
|
artifact_rows = store.list_artifacts_by_run(run_id)
|
||||||
|
assert len(first) == 1
|
||||||
|
assert len(second) == 1
|
||||||
|
assert len(artifact_rows) == 1
|
||||||
|
assert artifact_rows[0].object_name.endswith("layouts-v2.json")
|
||||||
|
assert failed is not None and failed.run_status == "failed"
|
||||||
|
|
||||||
|
store.delete_by_document(doc_id)
|
||||||
|
|
||||||
|
assert store.list_runs_by_document(doc_id) == []
|
||||||
|
assert store.list_status_events_by_document(doc_id) == []
|
||||||
|
assert store.list_artifacts_by_document(doc_id) == []
|
||||||
129
uv.lock
generated
129
uv.lock
generated
@@ -35,7 +35,6 @@ dependencies = [
|
|||||||
{ name = "python-dotenv" },
|
{ name = "python-dotenv" },
|
||||||
{ name = "python-multipart" },
|
{ name = "python-multipart" },
|
||||||
{ name = "redis" },
|
{ name = "redis" },
|
||||||
{ name = "sqlalchemy" },
|
|
||||||
{ name = "tenacity" },
|
{ name = "tenacity" },
|
||||||
{ name = "uvicorn", extra = ["standard"] },
|
{ name = "uvicorn", extra = ["standard"] },
|
||||||
]
|
]
|
||||||
@@ -68,7 +67,6 @@ requires-dist = [
|
|||||||
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
||||||
{ name = "python-multipart", specifier = ">=0.0.6" },
|
{ name = "python-multipart", specifier = ">=0.0.6" },
|
||||||
{ name = "redis", specifier = ">=4.5.0" },
|
{ name = "redis", specifier = ">=4.5.0" },
|
||||||
{ name = "sqlalchemy", specifier = ">=2.0.0" },
|
|
||||||
{ name = "tenacity", specifier = ">=8.2.0" },
|
{ name = "tenacity", specifier = ">=8.2.0" },
|
||||||
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.23.0" },
|
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.23.0" },
|
||||||
]
|
]
|
||||||
@@ -815,7 +813,7 @@ name = "exceptiongroup"
|
|||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
@@ -959,76 +957,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
|
{ url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "greenlet"
|
|
||||||
version = "3.5.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/6e/802acd792aebb2256fbbee8cacf2727faaeb6f240ac11008f09eae4414bc/greenlet-3.5.1.tar.gz", hash = "sha256:5a56aeb7d5d9cc4b3a735efb5095bd4b4f6f0e4f93e5ca876d0e2315137b7829", size = 197356, upload-time = "2026-05-20T15:05:03.917Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1d/21/117c8710abb7f146d804a124c07eb5964a60b90d02b72452885aecc18efa/greenlet-3.5.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7eacb17a9d41538a2bc4912eba5ef13823c83cb69e4d141d0813debe7163187f", size = 283510, upload-time = "2026-05-20T13:12:26.475Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b9/f7/6762a56fa5f6c2295c449c6524e10ce481e381c994cc44d9d03aef0700fb/greenlet-3.5.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e5cc9606aa5f4e0bde0d3bd502b44f743864c3ffa5cfa1011b1e30f5aa02366f", size = 599696, upload-time = "2026-05-20T14:00:02.906Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0f/05/85a511e68ee109aff0aa00b4b497806091dd2d82ce209e49c6e801bd5d92/greenlet-3.5.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c3d35f87c7253b715d13d679e0783d845910144f282cb939fe1ba4ac8616269c", size = 612618, upload-time = "2026-05-20T14:05:39.202Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/89/b8/8b83d18ae07c46c019617f35afd7b47aab7f9b4fbb12fc637d681e10bdd8/greenlet-3.5.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:540dae7b956209af4d70a3be35927b4055f617763771e5e84a5255bea934d2f5", size = 612947, upload-time = "2026-05-20T13:14:23.469Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5d/14/ad1f9fc9b82384c010212464a3702bd911f95dab2f1180bc6fbcfb1f958c/greenlet-3.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed8cdb691169715a9a492844a83246f090182247d1a5031dc78a403f68ba1e97", size = 1571425, upload-time = "2026-05-20T14:02:22.671Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/46/1c/43b8203cf10f4292c9e3d270e9e5f5ade79115a0a0ca5ea6f1be5f8915a7/greenlet-3.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d59e840387076a51016777a9328b3f2c427c6f9208a6e958bad251be50a648d", size = 1638688, upload-time = "2026-05-20T13:14:30.026Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/6e/0344b1e99f58f71715456e46492101fd2daa408957b8186ade0a4b515da7/greenlet-3.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:b9152fca4a6466e114aaec745ae61cba739903a109754a9d4e1262f01e9259b1", size = 237763, upload-time = "2026-05-20T13:11:35.659Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/42/3c/ff890b466eaba2b0f5e6bdfff025f8c75f41b8ffdc3dbc3d24ad261e764a/greenlet-3.5.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:73f78f9b9f0a5c06e5c946ba1e8e36f5114923b6be109ee618c54f079c3ea14f", size = 284764, upload-time = "2026-05-20T13:09:10.204Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/81/0e/5e5457be3d256918f6a4756f073548a3f0190836e2cc94aa6d0d617a940b/greenlet-3.5.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0cbed8bb44e23c5b199f888f4e4ce096b45ad9f25ff74a7ad0213875e936bb2", size = 603479, upload-time = "2026-05-20T14:00:04.757Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6d/e1/f89a21d58d308298e6f275f13a1b472ed96c680b601a371b08be6a725989/greenlet-3.5.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a203a8bd0acb0701653d3bbb26e404854a68674139ed5cbb778830f42b09bb33", size = 615495, upload-time = "2026-05-20T14:05:40.87Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/75/de/af6cef182862d2ccd6975440d21c9058a77c3f9b469abf94e322dfd2e0e3/greenlet-3.5.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a271fcd66c74615cda6a964fda3f304267a12e50a084472218a39bb0376f563", size = 614754, upload-time = "2026-05-20T13:14:24.947Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1a/c6/50e520283a9f19388a7326b05f9e8637e566003475eacaadad04f558c68d/greenlet-3.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ded7b068c7c31c1a8657d4fd42d886b3e051ae29f88b80c5ff9d502257b0f071", size = 1574097, upload-time = "2026-05-20T14:02:24.003Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/21/1c/13abd1f4860d987fa5e1170a01930d6e6cd40d328de487a3c9fdaff0ffd0/greenlet-3.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0932b81d72f552ded9d810d00021b64d89f2195a91ce115b893f943b7a4ab3c", size = 1641058, upload-time = "2026-05-20T13:14:31.83Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/56/5f332b7705545eac2dc01b4e9254d24a793f2656d55d5cc6b94ee59d22ae/greenlet-3.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:88e300d136eac057b2397aa1cfd7328b4c87c7eb66a09c7bc6a1292234db474e", size = 238089, upload-time = "2026-05-20T13:14:03.229Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/d9/a9/a3c2fa886c5b94863fb0e61b3bc14610b7aa94cf4f17f8741b11708305fc/greenlet-3.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:cc6ab7e555c8a112ad3a76e368e86e12a2754bcae1652a5602e133ec7b635523", size = 234989, upload-time = "2026-05-20T13:08:27.715Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c4/37/4549f149c9797c21b32c2683c33522af22522099de128b2406672526d005/greenlet-3.5.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:fa4f98af3a528f0c3fd592a26df7f376f93329c8f4d987f6bb979057af8bf5e2", size = 286220, upload-time = "2026-05-20T13:07:28.463Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/38/ff/a4f436709716965eaab9f36ea7b906c8a927fbe32fb1372a2071d964f6b1/greenlet-3.5.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffea73584b216150eab159b6d12348fb253e68757974de1e2c40d8a318ac89ed", size = 601585, upload-time = "2026-05-20T14:00:06.141Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/65/ad/54bc3fcee3ad368a61b19b67d88117f7a8c29727bf71fffdeda81fbd946e/greenlet-3.5.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1072b4f9edcc1e192d9283a66a3e68d6b84c561de33a83d7858beb9ba1effe10", size = 614215, upload-time = "2026-05-20T14:05:42.675Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/40/69/b91cda0647df839483201545913514c2827ebea5e5ccdf931842763bc127/greenlet-3.5.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:add5217d68b31130f0beca584d7fef4878327d2e31642b66618a14eef312b63b", size = 611358, upload-time = "2026-05-20T13:14:26.37Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/59/90/3cf77e080350cd02fa307bb2abf05df48f4482c240275bbd2c203ba8bb1c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5ea42a752d47a145eae922b605cd1634665ac3d5ec1e72402d5048e8d60d207", size = 1570475, upload-time = "2026-05-20T14:02:25.29Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/65/2c/18cece62045e74598c3c393f70dce4a63f56222015ba29a5d4eeb04f764c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5551170cf4f5ff5623e9af81323751979fee2c731e2287b61f73cd27257b823", size = 1635625, upload-time = "2026-05-20T13:14:34.027Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/30/f5/310d104ddf41eb5a70f4c268d22508dfb0c3c8e86fec152be34d0d2ed819/greenlet-3.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c8bb982ad117d29478ef8f5533e97df21f1e2befd17a299257b0c96d1371c0b", size = 238791, upload-time = "2026-05-20T13:10:39.018Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/62/90/ceca11f504cd23a8047a3dea31919adc48df9b626dd0c13f0d858734fdfd/greenlet-3.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:80eb4b04dadc4e67df3fae179a32c4706a3f495bc7f22fc8a81115d5f5512188", size = 235580, upload-time = "2026-05-20T13:08:45.056Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/27/69/7f7e5372d998b81001899b1c0823c957aa413ba0f2662e65821611cc31e4/greenlet-3.5.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:51518ff74664078fc51bffcc6fc529b0df5ae58da192691cee765d45ce944a2b", size = 285060, upload-time = "2026-05-20T13:08:51.899Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b1/bf/387f9b6b865fd2ae0d0be09e0004827295a01b71be76ed350dd1e28a91a4/greenlet-3.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ffdb3c0bb002c99cd8f298957e046c3dbf6006b5b7cdf11a4e19194624a0a0a", size = 604370, upload-time = "2026-05-20T14:00:07.492Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/32/f5/169ce3d4e4c67291bd18f8cbe0299c9f3e45102c7f1fb3c14780c93e4532/greenlet-3.5.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7715a5a2c3378ba602c3a440558261e13a820bb53a82693aacd7b7f6d964e283", size = 616987, upload-time = "2026-05-20T14:05:44.237Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ee/e5/7f2e41d5273be07e77560d61ea4e56485b4d6c316d2a84518c62d1364061/greenlet-3.5.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc71ff466927a201b08305acac451ebe1aedfcea002f62f1f2f2ac2ac1e6a135", size = 613911, upload-time = "2026-05-20T13:14:27.539Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/a4/fbdc67579b73615a1f91615e814303cc71e06128f7baaba87be79b8fb90c/greenlet-3.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cd443683db272ebaaca03af98c0b063ab30db70ea8a31a1559f35e3f7b744ccd", size = 1570689, upload-time = "2026-05-20T14:02:27.225Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e6/b4/77abbe35078be39718a46cd49caf16bceb35662f97a34101dca28aa98e47/greenlet-3.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:089fff7a6ce8d9316d1f65ebc00273a56be258c1725b32b94de90a3a979557e1", size = 1635602, upload-time = "2026-05-20T13:14:36.344Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/37/f7/129f27ca700845b8ee8ca88ce7f43435a1239c2eddb7677fc938822762cf/greenlet-3.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:110a1ca7b49b014b097f6078272c3f4ed31af45b254de5228b79adba879f6af9", size = 238683, upload-time = "2026-05-20T13:11:50.57Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6d/5c/a485a36e87df8d8fd0632ee01511244f5156a20ed3746cc6599340326395/greenlet-3.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:f16ba1efc0715b680a18b8123d90dad887c6112ae3555b4b5c32c149540c6b4e", size = 235499, upload-time = "2026-05-20T13:12:42.028Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/8a/cb/c62454606daf5640369c94d8a9dd540599b1bfc090e2d2180cb77f4038d2/greenlet-3.5.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8ab31c9de8651a2facdd5c5bb0011f2380dd1a7af78ce2adf4b56095294fc07", size = 285579, upload-time = "2026-05-20T13:08:56.396Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ec/71/c4270398c2eba968a6071af1dfbdcaeee6ec1c24bc8b435b8cc452700da6/greenlet-3.5.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e300185139abc337ade480c327183adf42a875ac7181bfe66d7d4efea31fbea", size = 651106, upload-time = "2026-05-20T14:00:09.448Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1a/ab/71e34b78a44ec271fb5f550c17bc46d301ddc5953890d935f270b0dcdb5a/greenlet-3.5.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7ffdb990dcaa0234cf9845aead5df2e3c3a8b6507d409274dd87e0d5ab05ffc2", size = 663478, upload-time = "2026-05-20T14:05:45.88Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/77/96/4efd6fa5c62c85426a0c19077a586258ebc3a2a146ff2493e4312a697a22/greenlet-3.5.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f82b3597e9d83b63408affed0b48fd0f54935edac4302237b9a837be0dae33c", size = 660800, upload-time = "2026-05-20T13:14:29.129Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7a/e0/6c71401a25cac7000261304e866a2f2cc04dc74810d40e2f118aa4799495/greenlet-3.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c0141e37414c10164e702b8fb1473304221ad98f71600850c6ef7ff4880feba0", size = 1617518, upload-time = "2026-05-20T14:02:28.662Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/41/26/c5c06643e8c0af9e7bf18e16cb51d0ab7625155f0392e1c9015d66d556cd/greenlet-3.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:50ae25a67bea74ea41fb14b960bc532df73eb713417b2d61892dced82fe8d3bc", size = 1681593, upload-time = "2026-05-20T13:14:39.417Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/8a/bd/e11a108317485075e68af9d23039619b86b28130c3b50d227d42edece64b/greenlet-3.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:8a17c42330e261299766b75ac1ea32caa437a9453c8f65d16a13140db378ecd3", size = 239800, upload-time = "2026-05-20T13:09:30.128Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/47/f8/8e8e8417b7bf28639a5a56356ef934d0375e1d0c70a57e04d7701e870ffe/greenlet-3.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:7b5f5fae05b8ac6d176a61b60c394a8cbdc2b5b91b81793066e68745cf165e54", size = 236862, upload-time = "2026-05-20T13:09:10.498Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/90/12/41bf27fde4d3605d3773ae57751eda182b8be2f5398011c041173b1d9534/greenlet-3.5.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:ea8da1e900d758d078810d4255d8c6aa572181896a31ec79d779eb79c3adc9ad", size = 293637, upload-time = "2026-05-20T13:12:35.529Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/44/44/ba14b23e9757707050c2f397d305bbcae62e5d7cad122f8b6baec5ae4a1f/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a19570c52a21420dcbc94e661994bc325c0b5b11304540fed514586da5dc8f2e", size = 650840, upload-time = "2026-05-20T14:00:11.079Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a8/37/5ddc2b686a6844f91abecef43411842426da2e1573f60b49ecf2547f4ae1/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3d955c89b75eeca4723d7cc14135f393cd47c32e2a6cb4a8e4c6e760a26b0986", size = 656416, upload-time = "2026-05-20T14:05:47.118Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e1/f0/d17510297c35a2992712f0bf84de3779749999f7d3d63aa1f09db7c62dbe/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2daaaebd1a5aa88c49045b6baf9310b3263796bd88db713edf37cf53e7bb4e", size = 654397, upload-time = "2026-05-20T13:14:30.696Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/37/eb/147387705bb89092645b012586e7273cb5ed3c90ef7eaf3a69173eaf0209/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bfbd69cc349e43bf3a8ae1c85548ff0718efc887615c2db16c3833d7b0b072d", size = 1614469, upload-time = "2026-05-20T14:02:30.192Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a6/4e/37ee0da7732b7aa9896f17e15579a9df34b9fcb9dd494f0adfa749af6623/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4378720dd888136c27215a0214d32a4d37c3852765d45bc37aad0623423cfd78", size = 1675115, upload-time = "2026-05-20T13:14:40.972Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/57/f3/97dfcf4a6eb5077f8a672234216fb5923eb89f2cab7081cb10b2cf75b605/greenlet-3.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:45718441607f9325d948db98cbc691276059316d0358c188c246da4e1d4d23d2", size = 245246, upload-time = "2026-05-20T13:12:22.646Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5d/73/d7f72e34b582f694f4a9b248162db7b09cc458a259ba8f0c0bfa1a34ea7d/greenlet-3.5.1-cp315-cp315-macosx_11_0_universal2.whl", hash = "sha256:2baee5ca02031757ffe8cc3d69f0cc0aec7065ce362622da74f32d3bcab1c541", size = 285575, upload-time = "2026-05-20T13:12:07.043Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/59/fa9c6e87dc8ad27a95dabe2f29f372b733d05a8a67470f6c901ed9975655/greenlet-3.5.1-cp315-cp315-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b1ec3274918a81d3ea778b9e75b56b72b33f300edb6cf7f3a7fe1dae56683de", size = 656428, upload-time = "2026-05-20T14:00:12.556Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f6/f9/e753408871eaa61dfe35e619cfc67512b036fde99893685d50eea9e07146/greenlet-3.5.1-cp315-cp315-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:111e2390ffffc47d5840b01711dd7fac07d4c09283d0283e7f3264b14e284c64", size = 667064, upload-time = "2026-05-20T14:05:48.662Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/96/27/5565b5b40389f1c7753003a07e21892fda8660926787036d5bc0308b8113/greenlet-3.5.1-cp315-cp315-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e630136e905fe5ff43e86945ae41220b6d1470956a39220e708110ac48d01ea5", size = 665697, upload-time = "2026-05-20T13:14:32.943Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cf/82/e7de4178c0c2d1c9a5a3be3cc0b33e46a85b3ee4a77c071bf7ad8600e079/greenlet-3.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:975eac34b44a7077ca4d421348455b94f0f518246a7f14bc6d2fdcfe5b584368", size = 1621256, upload-time = "2026-05-20T14:02:31.91Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/00/10/f2dddcf7dacac17dfc68691809589adad06135eb28930429cf58a6467a2f/greenlet-3.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:9ab3c3a0b2ae6198e67c898dad5215a49f9ae0d0081b3c3ec59f333e39eeca26", size = 1685956, upload-time = "2026-05-20T13:14:42.55Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/22/17/4a232b32133230ada52f70e9d7f5b65b0caef8772f01849bd8d149e7e4ca/greenlet-3.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:cbfc69be86e10dcfef5b1e6269d1d6926552aa89ee39e1de3353360c1b6989ab", size = 239802, upload-time = "2026-05-20T13:13:15.481Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/ae/4e623a7e6d4d2a5f4cb8e4c82de4169fc637942caae68d6e676b8a128ac5/greenlet-3.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:92fd6d44ac5e5a887c8a5dc4a8ba0ba908527c31c12f78c6bc7dcfe8aab279f6", size = 236853, upload-time = "2026-05-20T13:15:37.301Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7a/57/816d9cff29119da3505b3d6a5e14a8af89006ac36f47f891ff293ee05af1/greenlet-3.5.1-cp315-cp315t-macosx_11_0_universal2.whl", hash = "sha256:a6fdf2433a5441ef9a95464f7c3e674775da1c8c1177fff311cee1acad4626ed", size = 293877, upload-time = "2026-05-20T13:10:19.078Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/23/a1/59b0a7c7d140ff1a75626680b9a9899b79a9176cab298b394968fb023295/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7546556f0d649f99f6a361098a55f761181bb2ea12ff150bb16d26092ad88244", size = 655333, upload-time = "2026-05-20T14:00:14.758Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/72/1b/5efe127597625042218939d01855109f352779050768b670b52edcc16a6c/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d5ee3ea898009fa898f85f9982255d35278c477bebe185beca249cab42d4526c", size = 659443, upload-time = "2026-05-20T14:05:50.159Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6c/6d/c404246ea4d22d097a7426d0efb5b781bd7eb67715f09e79001bd552ab18/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5c81f74d204d3edd136ebfd50dce53acbb776995d721a0fe801626cfc93b8cd", size = 658356, upload-time = "2026-05-20T13:14:35.091Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/51/02/f8ee37fb6d2219329f350af241c27fcf12df57e723d11f6fc6d3bacdadaa/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:2c18ef16bf6d4dd410e4dd52996888ea1497be26892fe5bbc73580aba4287b8e", size = 1619216, upload-time = "2026-05-20T14:02:33.403Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/93/c5/3dc9475ace2c7a3680da12372cddd7f1ac874eb410a1ac48d3e9dab83782/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:17d86354f0ae6b61bf9be5148d0dd34e06c3cb7c602c671f79f29ac3b150e659", size = 1678427, upload-time = "2026-05-20T13:14:43.71Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/4e/750c15c317a41ffb36f0bf40b933e3d744a7dede61889f74443ea69690cf/greenlet-3.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:e7516cf6ae6b8a582c2770a0caed47b8a48373ed732c33d69a72913ae6ac923e", size = 245225, upload-time = "2026-05-20T13:13:59.366Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4f/fd/d3baea2eeb7b617efd47e87ca06e2ec2c6118d303aa9e918e0ce16eadc10/greenlet-3.5.1-cp315-cp315t-win_arm64.whl", hash = "sha256:5028648bf2253ec4745add746129d3904121fa7fe871a76bed23c5720573ce0a", size = 239590, upload-time = "2026-05-20T13:13:37.382Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio"
|
name = "grpcio"
|
||||||
version = "1.80.0"
|
version = "1.80.0"
|
||||||
@@ -2756,61 +2684,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
|
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sqlalchemy"
|
|
||||||
version = "2.0.50"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" },
|
|
||||||
{ name = "typing-extensions" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/57/da/6fbf010c8ebb347679d0d100b22fe9ba5e13fd04046c5df7280d2f0bf706/sqlalchemy-2.0.50.tar.gz", hash = "sha256:af5607d11ef90fd6a5c0549fe0045dce1663d427426bcfb506dcb5346a85a3b9", size = 9907424, upload-time = "2026-05-24T19:20:04.018Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/70/a9/812a775bd8c1af0966d660238d005baf25e9bced1f038c8e71f00aa637a7/sqlalchemy-2.0.50-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af6eeb84985bf840ba779018ff9424d61ff69b52e66b8789d3c8da7bf5341b2", size = 2161617, upload-time = "2026-05-24T20:00:00.761Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/d5/74/5a6bc5496e9be8f740fbf80f9e6bd4ab965c8a80870eb07ab015e360957a/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fe7822866f3a9fc5f3db21a290ce8961a53050115f05edf9402b6a5feb92a9f", size = 3244104, upload-time = "2026-05-24T20:07:38.158Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/81/55/b260d8df2adc9bb0bf294f67b5f802ff0d84d99442b536b9efd0ea72d447/sqlalchemy-2.0.50-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e1b0f6a4dcd9b4839e2320afb5df37a6981cbc20ff9c423ae11c5537bdbd21", size = 3243039, upload-time = "2026-05-24T20:14:23.765Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e5/6d/58714005cbf370f16c3f30d30324a43be10069efcfe764f7236a2e851947/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e195687f1af431c9515416288373b323b6eb599f774409814e89e9d603a56e39", size = 3195017, upload-time = "2026-05-24T20:07:40.086Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/30/e8/67527fee039bd3e1a6ce3f03d2b62fd87ab9099c17052810d79496727b66/sqlalchemy-2.0.50-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ea1a8a2db4b2217d456c8d7a873bfc605f06fe3584d315264ea18c2a17585d0b", size = 3215308, upload-time = "2026-05-24T20:14:26.034Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/94/b2/dd3155a6a6706cb89adecf5ee6e0512f7b0ee5cf3e6f4cde67d3c20ebfda/sqlalchemy-2.0.50-cp310-cp310-win32.whl", hash = "sha256:68b154b08088b4ec32bb4d2958bfbb50e57549f91a4cd3e7f928e3553ed69031", size = 2121637, upload-time = "2026-05-24T20:08:06.401Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/93/a1/a09c463ee3e7764b5ce5bd19a7f0b6eefbde62e637439ab58498cdbd6b47/sqlalchemy-2.0.50-cp310-cp310-win_amd64.whl", hash = "sha256:66e374271ecb7101273f57af1a62446a953d327eec4f8089147de57c591bbacc", size = 2144673, upload-time = "2026-05-24T20:08:07.936Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b6/5d/3172686af1770e4de2805f919a51441085f589ddadf3dd76ec582f84f497/sqlalchemy-2.0.50-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1aa6e403663a9c43c8fef7ce4bdb4cf48bcd8d352e91deda2a99f963270bd508", size = 2161366, upload-time = "2026-05-24T20:00:02.061Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0f/90/e98dedea3c3e663a17afcd003a34ba45efdac2cea3b6f2e4585e2b1e2537/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51b637a84f9fa35ae1f9017e786cb142974a25305085e1b378b3647a67f65ad3", size = 3318926, upload-time = "2026-05-24T20:07:42.369Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/3b/4f/501308c2babb62c11753ecb4ee88ba9eef019419a4d6cbf7cb13e2bad353/sqlalchemy-2.0.50-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2dab927761d9108550f0cf8e66ff21af56f907a0ce0a689793db615e2b55f62c", size = 3319199, upload-time = "2026-05-24T20:14:28.551Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/39/d88996c5e03ed6248c3a788d20f0b8d8b376b9f8a495e4bab9df7c72d2f8/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:545eae198d37bcf837a10ede3684e2af32458d6f35c597c35c2de7502dc38fc4", size = 3270301, upload-time = "2026-05-24T20:07:44.917Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/42/1b/1ae0e65161b51cc43e5ca75430ef79d80e23b5042d645586c2c342c3b92e/sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fec460e18cdbb4c7773531122ce9a27e96c6ca17af3933941d94da475ad2c86", size = 3293465, upload-time = "2026-05-24T20:14:30.501Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/83/29/17c0003f2c0dfa6d1b97672475707e3ec5980db09defd7fa20beb6833bbd/sqlalchemy-2.0.50-cp311-cp311-win32.whl", hash = "sha256:e6e814658818fd165e749e3d8490ef16cc7f379a118c37ada8b0589ffbaaac22", size = 2120694, upload-time = "2026-05-24T20:08:09.237Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c9/18/280d00654cc19d1fccf236fa5070f6dd04b84dde6f1b2e637bde0ff340a7/sqlalchemy-2.0.50-cp311-cp311-win_amd64.whl", hash = "sha256:1c5f858fe79c9f5d8fda065c06186356acb7f8df3cd52dbd5ee3f200e4b144f5", size = 2145315, upload-time = "2026-05-24T20:08:10.952Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/be/b0/a9d19b43f38f878b1278bca5b00b909f7540d41494396dd2561f9ad0956d/sqlalchemy-2.0.50-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae23d8b9d344d30d0a92f06d45825024a5790f1c1dd4cf452636a50d3e58cb", size = 2159807, upload-time = "2026-05-24T19:27:53.086Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/2c/191dd58a248fd2cfd4780fa82c375c505e4ad98c8b522fa69ec492130d77/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47b71b933e7b4ebad407c8fdfd70d2c4f08b78b3238bb30eebdd6eb32ca51b89", size = 3343358, upload-time = "2026-05-24T20:09:29.279Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/8a/2b/514fce8a7df81cf5bad7ff7865de7ac0c5776a38cc043475c4703eb7fe8b/sqlalchemy-2.0.50-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:110fdac56ace278949f00de805edacbd6141e382d992f9ba28238b3a0827a600", size = 3357994, upload-time = "2026-05-24T20:17:13.495Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/35/a6/a0e283f5494f92b0d77e319ff77e437b1ffe4a051ba67c81d53234825475/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5e4ac70e9e757f6b3e87c0491ff034442ecd8dfd36d041a50564c322dafc0e", size = 3289399, upload-time = "2026-05-24T20:09:32.239Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b7/96/1b07325ba71752d6a028b77d07bed1483ad545f794e8b1dc89b3ba3b3c68/sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:724f3dcbe53dd0151e3cb5e7ec4ba4c620bede579caacd16275dc35ce06e8615", size = 3321216, upload-time = "2026-05-24T20:17:15.581Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ed/8e/bad6ed253e8a99edfc99af02f7173ec48a1d3ed1b9b35a1b8bc1700900cc/sqlalchemy-2.0.50-cp312-cp312-win32.whl", hash = "sha256:1208050441471d003b7c8cb4054fb084f185cf35ac3f0ea270803865bca9939a", size = 2119194, upload-time = "2026-05-24T19:50:04.943Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b6/2d/314a6690dda4b9cfc571eab1a63cf6fe6e1470aa3759ccda6aa016ee0f5a/sqlalchemy-2.0.50-cp312-cp312-win_amd64.whl", hash = "sha256:9d1af51558029a156a70986b7df88f042b3d158d7c8d8fb5072912d4b32d89c7", size = 2146186, upload-time = "2026-05-24T19:50:06.74Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0b/c4/c42356b527296e9862f67990efce31ef78b4cf69cd3f80873a528a060320/sqlalchemy-2.0.50-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:06a9210bdc5f4298cff0781087e2ff45683922252dacc452846373a58761f093", size = 2156697, upload-time = "2026-05-24T19:27:54.764Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/60/a1/b1a70e3c4365ac7fe9e347f3710f19b562c866fb96d45e3c891588789a7b/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b53784972ade4f8174b9aa661f31a06f8a936d2cfdd602913ff3c6dd40ae873", size = 3284260, upload-time = "2026-05-24T20:09:34.195Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/3f/4a/f3ac3caa19f263d57b0a47f8c91bbf56583dc2d3fc63acfbf644abb24fe0/sqlalchemy-2.0.50-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31648fa14460537e768a7303b078e4344d208e0d23e06867c1f376a227ed82db", size = 3302280, upload-time = "2026-05-24T20:17:17.825Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/66/55/ccada3e3d62254587819749a0bc69f41173eb48a6e385d10e66d32a9c88e/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:03f4323c980ad0e918cc9e5369b015f759f4e534db5bbaf4dc36832c10d05064", size = 3231580, upload-time = "2026-05-24T20:09:36.406Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/05/f6/6809349130a2de0e109e7f00fd7d431da9565b9b2868b32ee684754f672b/sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2b9dcc43afef8ac157cd92fce96985d6b8b0cfbd3df4d666f66b4d55a75d202f", size = 3269375, upload-time = "2026-05-24T20:17:20.34Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/48/84/278a811ef4e07be9c89dc5cdd7be833268509a66a68c4897cf585e67428f/sqlalchemy-2.0.50-cp313-cp313-win32.whl", hash = "sha256:60922d6599065ddca2c6f376b9aa2f41a6b85a271725e0909490bbc50b1998a5", size = 2117229, upload-time = "2026-05-24T19:50:08.215Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f6/1c/067cc6187ed32d2ec222fe6d2643acc1659a6d0659f8a7cbc5ad3ae83280/sqlalchemy-2.0.50-cp313-cp313-win_amd64.whl", hash = "sha256:287086e67275a212c4582d166a6fb03a65ccc5551d80866270ce0dd9f34eccd3", size = 2143126, upload-time = "2026-05-24T19:50:09.691Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/32/10ac51b4be7cdecd7e93d069251c86dfbf70b7adbd7c67b48ccea6c49e1c/sqlalchemy-2.0.50-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c966932507a4d7d0a37314927dbfcd89720e3f37d2a1e3352e7ae7939fa8e8a0", size = 2158519, upload-time = "2026-05-24T19:27:56.472Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5a/76/e703d2f7681d7d66c4c891af3f07c7ccf4c76ad7f18351de035b5eda007a/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:faffef4bcc20a1892e65e155293d99d60855bbbc79250ab712819cfd56a8e6bb", size = 3282063, upload-time = "2026-05-24T20:09:38.57Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/31/26/ef168b184a25701f9995e8fb7e503fafd7a99c1c77cda1bc1a26ea2ed486/sqlalchemy-2.0.50-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c206aec519a2e7bd08abbfb33436e325fd22c632d9c21a9047e376ce241646e", size = 3287069, upload-time = "2026-05-24T20:17:21.942Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/15/765acc2bc693bccc43ca4a95d5b69750da8aaf6db1b5c616536e087f8920/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bef4ac756363227ef6402a75fee025a4bc690f92328e825868939b3b3a446a6d", size = 3230453, upload-time = "2026-05-24T20:09:40.398Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/63/61/08e03c3adbf5db0087a0b6816746fec8f3032fb2f7fc899a9bb9b2a48ce4/sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:96fbee6b19c19cd1556c8bf9419447cf2ec149ffcab7ab64348c23e54ef8547f", size = 3252413, upload-time = "2026-05-24T20:17:24.067Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/03/0c/370a1f2db38436c615e10134c8a37de3688e74084792380695f3f5083860/sqlalchemy-2.0.50-cp314-cp314-win32.whl", hash = "sha256:8f00e3eb43ba30eb1b238ee03a8a62309486d1321eda3328bb611e0340033ad8", size = 2120063, upload-time = "2026-05-24T19:50:11.08Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7f/a0/fe92bb9817863bc13ba093bda931979a26cc2ca69f8e8f26d07add3d7c6f/sqlalchemy-2.0.50-cp314-cp314-win_amd64.whl", hash = "sha256:15708c613cd5005b7dffe1f66ee6a63ee8f5e46799f71c70ebad74178c676a39", size = 2145830, upload-time = "2026-05-24T19:50:12.452Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cc/ff/e5640a98a0b2f491eb8fde10fb6c773621a2e44340de231fafcc9370f4a9/sqlalchemy-2.0.50-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3699dac4be410e97049a1658e9480da9cde956594aa0f3aebc60b88f21c5ba70", size = 2178435, upload-time = "2026-05-24T19:42:58.889Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b7/85/337116e186f1236375b5fb70c21cfac98e8e8ab0d3a47be838dc47a59e08/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f96233858e3df43932ac11589e22520da6e8aeb624b03fedfeebb0e8ea213086", size = 3566059, upload-time = "2026-05-24T20:01:20.848Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/96/34/bb0e190e161c3c2c24314a65add57218be14a4a9486886b7f5047c1ff7c8/sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4e70c46fad30c3bcc6a4708bc0130a3173e11a5b25f0ea4a9d8911b450f1f52", size = 3535366, upload-time = "2026-05-24T20:03:56.768Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/5a/a7f759f97e4fd499c5d4e4488c760d5a7fbecf3028b465a04274fcd52384/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1918a3cf564d16d95bca7301005f41ab2ad50b07cd3b9da50d3ed986db148d6a", size = 3474879, upload-time = "2026-05-24T20:01:23.058Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/9d/d9/2907ea38eb60687d297bf9c39e5ee58053c87b57fe8a9cae97090cecbf10/sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b00098cdbdbd38c7be3d568b0c9c3122b8c0ec62b911b57cd5e6e0254d60a76d", size = 3486117, upload-time = "2026-05-24T20:03:59.052Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f2/e3/5aa06f167559f8c0bdae487e297d23ba548150ab016a3418265d617a4985/sqlalchemy-2.0.50-cp314-cp314t-win32.whl", hash = "sha256:1fbd55a969d7ac44a98e3dec75016074f809fa08f871585ace58dde110d1bf3e", size = 2150823, upload-time = "2026-05-24T20:08:58.644Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/65/9b/112fb8f977582d7489d036e409e3723948bcf5320b3ac465f3c481bbe8f9/sqlalchemy-2.0.50-cp314-cp314t-win_amd64.whl", hash = "sha256:c5c3cdb753a9004183e1ccb634b41611654c989e61bc68617ce878e46d6f1e51", size = 2185794, upload-time = "2026-05-24T20:09:00.319Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/d0/10/f7220e9b784d295d241c86ed99aeb537f92afcd469a64861f2717e9bb077/sqlalchemy-2.0.50-py3-none-any.whl", hash = "sha256:92064363517a3ff8212b5a93b8c62876579d8dfd1ca5b561335f30152d884fa9", size = 1943861, upload-time = "2026-05-24T19:59:01.119Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "starlette"
|
name = "starlette"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user