Fix centered content layout widths

This commit is contained in:
ash66
2026-05-26 12:34:12 +08:00
parent 34d72d7ce9
commit fec22a3a2c
16 changed files with 2102 additions and 140 deletions

View File

@@ -1,18 +1,29 @@
"""Initialize the app.domain.documents package."""
from .models import Chunk, Document, DocumentStatus, ParsedDocument
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository, ParseArtifactStore
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
from .ports import (
ChunkBuilder,
DocumentBinaryStore,
DocumentParser,
DocumentProcessingStore,
DocumentRepository,
ParseArtifactStore,
)
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = [
"Chunk",
"Document",
"DocumentArtifact",
"DocumentProcessingRun",
"DocumentStatus",
"DocumentStatusEvent",
"ParsedDocument",
"ChunkBuilder",
"DocumentBinaryStore",
"DocumentParser",
"DocumentProcessingStore",
"DocumentRepository",
"ParseArtifactStore",
]

View File

@@ -76,3 +76,61 @@ class Chunk:
semantic_id: str = ""
block_type: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class DocumentProcessingRun:
"""Represent one processing attempt for a document."""
run_id: str
doc_id: str
trigger_type: str
run_status: str
parser_backend: str = ""
chunk_backend: str = ""
embedding_model: str = ""
index_name: str = ""
started_at: datetime = field(default_factory=utcnow)
stored_at: datetime | None = None
parsed_at: datetime | None = None
indexed_at: datetime | None = None
finished_at: datetime | None = None
layout_count: int = 0
structure_node_count: int = 0
semantic_block_count: int = 0
vector_chunk_count: int = 0
chunk_count: int = 0
failure_stage: str = ""
error_message: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class DocumentStatusEvent:
"""Represent a document lifecycle event emitted during processing."""
event_id: str
doc_id: str
run_id: str
from_status: str
to_status: str
stage: str
message: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
occurred_at: datetime = field(default_factory=utcnow)
@dataclass
class DocumentArtifact:
"""Represent a persisted artifact reference for one processing run."""
artifact_id: str
doc_id: str
run_id: str
artifact_type: str
object_name: str
content_type: str
byte_size: int = 0
checksum: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
created_at: datetime = field(default_factory=utcnow)

View File

@@ -4,7 +4,7 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from .models import Chunk, Document, DocumentStatus, ParsedDocument
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
# Keep domain contracts explicit so adapters can swap implementations cleanly.
@@ -128,3 +128,111 @@ class ParseArtifactStore(ABC):
def get_structure_nodes(self, doc_id: str) -> list[dict]:
"""Return all structure nodes for a document."""
pass
class DocumentProcessingStore(ABC):
"""Persist document processing runs, events, and artifact references."""
@abstractmethod
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
"""Create a new processing run record."""
pass
@abstractmethod
def mark_run_stored(
self,
run_id: str,
*,
stored_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as having persisted the source file."""
pass
@abstractmethod
def mark_run_parsed(
self,
run_id: str,
*,
parser_backend: str,
layout_count: int,
structure_node_count: int,
semantic_block_count: int,
vector_chunk_count: int,
parsed_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Record parse completion details for a run."""
pass
@abstractmethod
def mark_run_indexed(
self,
run_id: str,
*,
chunk_count: int,
index_name: str,
indexed_at: object | None = None,
finished_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as successfully indexed."""
pass
@abstractmethod
def mark_run_failed(
self,
run_id: str,
*,
failure_stage: str,
error_message: str,
finished_at: object | None = None,
metadata: dict | None = None,
) -> DocumentProcessingRun | None:
"""Mark a run as failed."""
pass
@abstractmethod
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
"""Append a document status event."""
pass
@abstractmethod
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
"""Replace all artifacts for a run with the provided list."""
pass
@abstractmethod
def delete_by_document(self, doc_id: str) -> None:
"""Delete all processing data for a document."""
pass
@abstractmethod
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
"""List all processing runs for a document."""
pass
@abstractmethod
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
"""Return one processing run by identifier."""
pass
@abstractmethod
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
"""List status events for a document."""
pass
@abstractmethod
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
"""List status events for a run."""
pass
@abstractmethod
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
"""List artifact references for a document."""
pass
@abstractmethod
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
"""List artifact references for a run."""
pass