Fix centered content layout widths
This commit is contained in:
@@ -1,18 +1,29 @@
|
||||
"""Initialize the app.domain.documents package."""
|
||||
|
||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
||||
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository, ParseArtifactStore
|
||||
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
|
||||
from .ports import (
|
||||
ChunkBuilder,
|
||||
DocumentBinaryStore,
|
||||
DocumentParser,
|
||||
DocumentProcessingStore,
|
||||
DocumentRepository,
|
||||
ParseArtifactStore,
|
||||
)
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Chunk",
|
||||
"Document",
|
||||
"DocumentArtifact",
|
||||
"DocumentProcessingRun",
|
||||
"DocumentStatus",
|
||||
"DocumentStatusEvent",
|
||||
"ParsedDocument",
|
||||
"ChunkBuilder",
|
||||
"DocumentBinaryStore",
|
||||
"DocumentParser",
|
||||
"DocumentProcessingStore",
|
||||
"DocumentRepository",
|
||||
"ParseArtifactStore",
|
||||
]
|
||||
|
||||
@@ -76,3 +76,61 @@ class Chunk:
|
||||
semantic_id: str = ""
|
||||
block_type: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentProcessingRun:
|
||||
"""Represent one processing attempt for a document."""
|
||||
|
||||
run_id: str
|
||||
doc_id: str
|
||||
trigger_type: str
|
||||
run_status: str
|
||||
parser_backend: str = ""
|
||||
chunk_backend: str = ""
|
||||
embedding_model: str = ""
|
||||
index_name: str = ""
|
||||
started_at: datetime = field(default_factory=utcnow)
|
||||
stored_at: datetime | None = None
|
||||
parsed_at: datetime | None = None
|
||||
indexed_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
layout_count: int = 0
|
||||
structure_node_count: int = 0
|
||||
semantic_block_count: int = 0
|
||||
vector_chunk_count: int = 0
|
||||
chunk_count: int = 0
|
||||
failure_stage: str = ""
|
||||
error_message: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentStatusEvent:
|
||||
"""Represent a document lifecycle event emitted during processing."""
|
||||
|
||||
event_id: str
|
||||
doc_id: str
|
||||
run_id: str
|
||||
from_status: str
|
||||
to_status: str
|
||||
stage: str
|
||||
message: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
occurred_at: datetime = field(default_factory=utcnow)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentArtifact:
|
||||
"""Represent a persisted artifact reference for one processing run."""
|
||||
|
||||
artifact_id: str
|
||||
doc_id: str
|
||||
run_id: str
|
||||
artifact_type: str
|
||||
object_name: str
|
||||
content_type: str
|
||||
byte_size: int = 0
|
||||
checksum: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
created_at: datetime = field(default_factory=utcnow)
|
||||
|
||||
@@ -4,7 +4,7 @@ from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
||||
from .models import Chunk, Document, DocumentArtifact, DocumentProcessingRun, DocumentStatus, DocumentStatusEvent, ParsedDocument
|
||||
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
||||
|
||||
|
||||
@@ -128,3 +128,111 @@ class ParseArtifactStore(ABC):
|
||||
def get_structure_nodes(self, doc_id: str) -> list[dict]:
|
||||
"""Return all structure nodes for a document."""
|
||||
pass
|
||||
|
||||
|
||||
class DocumentProcessingStore(ABC):
|
||||
"""Persist document processing runs, events, and artifact references."""
|
||||
|
||||
@abstractmethod
|
||||
def create_run(self, run: DocumentProcessingRun) -> DocumentProcessingRun:
|
||||
"""Create a new processing run record."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def mark_run_stored(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
stored_at: object | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> DocumentProcessingRun | None:
|
||||
"""Mark a run as having persisted the source file."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def mark_run_parsed(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
parser_backend: str,
|
||||
layout_count: int,
|
||||
structure_node_count: int,
|
||||
semantic_block_count: int,
|
||||
vector_chunk_count: int,
|
||||
parsed_at: object | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> DocumentProcessingRun | None:
|
||||
"""Record parse completion details for a run."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def mark_run_indexed(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
chunk_count: int,
|
||||
index_name: str,
|
||||
indexed_at: object | None = None,
|
||||
finished_at: object | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> DocumentProcessingRun | None:
|
||||
"""Mark a run as successfully indexed."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def mark_run_failed(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
failure_stage: str,
|
||||
error_message: str,
|
||||
finished_at: object | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> DocumentProcessingRun | None:
|
||||
"""Mark a run as failed."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def append_status_event(self, event: DocumentStatusEvent) -> DocumentStatusEvent:
|
||||
"""Append a document status event."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def replace_artifacts_for_run(self, run_id: str, artifacts: list[DocumentArtifact]) -> list[DocumentArtifact]:
|
||||
"""Replace all artifacts for a run with the provided list."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_by_document(self, doc_id: str) -> None:
|
||||
"""Delete all processing data for a document."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_runs_by_document(self, doc_id: str) -> list[DocumentProcessingRun]:
|
||||
"""List all processing runs for a document."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_run(self, run_id: str) -> DocumentProcessingRun | None:
|
||||
"""Return one processing run by identifier."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_status_events_by_document(self, doc_id: str) -> list[DocumentStatusEvent]:
|
||||
"""List status events for a document."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_status_events_by_run(self, run_id: str) -> list[DocumentStatusEvent]:
|
||||
"""List status events for a run."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_artifacts_by_document(self, doc_id: str) -> list[DocumentArtifact]:
|
||||
"""List artifact references for a document."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_artifacts_by_run(self, run_id: str) -> list[DocumentArtifact]:
|
||||
"""List artifact references for a run."""
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user