Fix SSE route dependency and align architecture docs
This commit is contained in:
77
backend/app/domain/documents/models.py
Normal file
77
backend/app/domain/documents/models.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Define domain models for documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
# Keep module behavior explicit so the backend flow stays easy to audit.
|
||||
|
||||
|
||||
def utcnow() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
|
||||
class DocumentStatus(str, Enum):
|
||||
"""Define the Document Status enumeration."""
|
||||
PENDING = "pending"
|
||||
STORED = "stored"
|
||||
PARSED = "parsed"
|
||||
INDEXED = "indexed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Document:
|
||||
"""Represent the Document type."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
file_name: str
|
||||
object_name: str
|
||||
content_type: str
|
||||
size_bytes: int
|
||||
status: DocumentStatus = DocumentStatus.PENDING
|
||||
regulation_type: str = ""
|
||||
version: str = ""
|
||||
summary: str = ""
|
||||
summary_latency_ms: int = 0
|
||||
chunk_count: int = 0
|
||||
parser_name: str = ""
|
||||
index_name: str = ""
|
||||
error_message: str = ""
|
||||
created_at: datetime = field(default_factory=utcnow)
|
||||
updated_at: datetime = field(default_factory=utcnow)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedDocument:
|
||||
"""Represent the Parsed Document type."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
structure_nodes: list[dict[str, Any]]
|
||||
semantic_blocks: list[dict[str, Any]]
|
||||
vector_chunks: list[dict[str, Any]]
|
||||
parser_name: str
|
||||
raw_text: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
"""Represent the Chunk type."""
|
||||
chunk_id: str
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
content: str
|
||||
embedding_text: str
|
||||
section_title: str = ""
|
||||
section_path: list[str] = field(default_factory=list)
|
||||
page_number: int = 0
|
||||
regulation_type: str = ""
|
||||
version: str = ""
|
||||
semantic_id: str = ""
|
||||
block_type: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
Reference in New Issue
Block a user