Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -0,0 +1,5 @@
"""Initialize the app.domain package."""
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = []

View File

@@ -0,0 +1,15 @@
"""Initialize the app.domain.conversation package."""
from .models import AnswerResult, AnswerSource, ConversationMessage, ConversationSession
from .ports import AnswerGenerator, ConversationStore
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = [
"AnswerGenerator",
"AnswerResult",
"AnswerSource",
"ConversationMessage",
"ConversationSession",
"ConversationStore",
]

View File

@@ -0,0 +1,53 @@
"""Define domain models for conversation."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
# Keep module behavior explicit so the backend flow stays easy to audit.
@dataclass
class AnswerSource:
"""Represent answer source data."""
doc_id: str
doc_name: str
chunk_id: str
section_title: str
page_number: int
score: float
content: str
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class ConversationMessage:
"""Represent conversation message data."""
role: str
content: str
timestamp: int
sources: list[dict[str, Any]] = field(default_factory=list)
@dataclass
class ConversationSession:
"""Represent conversation session data."""
session_id: str
messages: list[ConversationMessage] = field(default_factory=list)
created_at: int = 0
updated_at: int = 0
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class AnswerResult:
"""Represent answer result data."""
answer: str
sources: list[AnswerSource] = field(default_factory=list)
model: str = ""
latency_ms: int = 0
retrieved_count: int = 0
context_tokens: int = 0
truncated: bool = False
error: str | None = None

View File

@@ -0,0 +1,78 @@
"""Define domain ports for conversation."""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Generator
from app.domain.retrieval.models import RetrievedChunk
from .models import AnswerResult, ConversationSession
# Keep domain contracts explicit so adapters can swap implementations cleanly.
class AnswerGenerator(ABC):
"""Represent the Answer Generator type."""
@abstractmethod
def generate(
self,
*,
query: str,
retrieved_chunks: list[RetrievedChunk],
history: list[dict[str, str]] | None = None,
provider: str | None = None,
model: str | None = None,
prompt_template: str | None = None,
) -> AnswerResult:
"""Handle generate for the Answer Generator instance."""
pass
@abstractmethod
def stream_generate(
self,
*,
query: str,
retrieved_chunks: list[RetrievedChunk],
history: list[dict[str, str]] | None = None,
provider: str | None = None,
model: str | None = None,
prompt_template: str | None = None,
) -> Generator[dict, None, AnswerResult]:
"""Stream generate for the Answer Generator instance."""
pass
class ConversationStore(ABC):
"""Provide the Conversation Store store implementation."""
@abstractmethod
def create_session(self, metadata: dict | None = None) -> ConversationSession:
"""Create session for the Conversation Store instance."""
pass
@abstractmethod
def get_session(self, session_id: str) -> ConversationSession | None:
"""Return session for the Conversation Store instance."""
pass
@abstractmethod
def save_message(
self,
session_id: str,
*,
role: str,
content: str,
sources: list[dict] | None = None,
) -> ConversationSession | None:
"""Save message for the Conversation Store instance."""
pass
@abstractmethod
def delete_session(self, session_id: str) -> bool:
"""Delete session for the Conversation Store instance."""
pass
@abstractmethod
def list_sessions(self) -> list[dict]:
"""List sessions for the Conversation Store instance."""
pass

View File

@@ -0,0 +1,17 @@
"""Initialize the app.domain.documents package."""
from .models import Chunk, Document, DocumentStatus, ParsedDocument
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = [
"Chunk",
"Document",
"DocumentStatus",
"ParsedDocument",
"ChunkBuilder",
"DocumentBinaryStore",
"DocumentParser",
"DocumentRepository",
]

View File

@@ -0,0 +1,77 @@
"""Define domain models for documents."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import UTC, datetime
from enum import Enum
from typing import Any
# Keep module behavior explicit so the backend flow stays easy to audit.
def utcnow() -> datetime:
return datetime.now(UTC)
class DocumentStatus(str, Enum):
"""Define the Document Status enumeration."""
PENDING = "pending"
STORED = "stored"
PARSED = "parsed"
INDEXED = "indexed"
FAILED = "failed"
@dataclass
class Document:
"""Represent the Document type."""
doc_id: str
doc_name: str
file_name: str
object_name: str
content_type: str
size_bytes: int
status: DocumentStatus = DocumentStatus.PENDING
regulation_type: str = ""
version: str = ""
summary: str = ""
summary_latency_ms: int = 0
chunk_count: int = 0
parser_name: str = ""
index_name: str = ""
error_message: str = ""
created_at: datetime = field(default_factory=utcnow)
updated_at: datetime = field(default_factory=utcnow)
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class ParsedDocument:
"""Represent the Parsed Document type."""
doc_id: str
doc_name: str
structure_nodes: list[dict[str, Any]]
semantic_blocks: list[dict[str, Any]]
vector_chunks: list[dict[str, Any]]
parser_name: str
raw_text: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class Chunk:
"""Represent the Chunk type."""
chunk_id: str
doc_id: str
doc_name: str
content: str
embedding_text: str
section_title: str = ""
section_path: list[str] = field(default_factory=list)
page_number: int = 0
regulation_type: str = ""
version: str = ""
semantic_id: str = ""
block_type: str = ""
metadata: dict[str, Any] = field(default_factory=dict)

View File

@@ -0,0 +1,96 @@
"""Define domain ports for documents."""
from __future__ import annotations
from abc import ABC, abstractmethod
from .models import Chunk, Document, DocumentStatus, ParsedDocument
# Keep domain contracts explicit so adapters can swap implementations cleanly.
class DocumentRepository(ABC):
"""Provide the Document Repository repository implementation."""
@abstractmethod
def create(self, document: Document) -> Document:
"""Handle create for the Document Repository instance."""
pass
@abstractmethod
def update(self, document: Document) -> Document:
"""Handle update for the Document Repository instance."""
pass
@abstractmethod
def get(self, doc_id: str) -> Document | None:
"""Handle get for the Document Repository instance."""
pass
@abstractmethod
def list(self, limit: int | None = None) -> list[Document]:
"""Handle list for the Document Repository instance."""
pass
@abstractmethod
def update_status(
self,
doc_id: str,
status: DocumentStatus,
*,
error_message: str = "",
chunk_count: int | None = None,
summary: str | None = None,
summary_latency_ms: int | None = None,
parser_name: str | None = None,
index_name: str | None = None,
metadata: dict | None = None,
) -> Document | None:
"""Update status for the Document Repository instance."""
pass
class DocumentBinaryStore(ABC):
"""Provide the Document Binary Store store implementation."""
@abstractmethod
def save(
self,
*,
object_name: str,
data: bytes,
content_type: str,
metadata: dict[str, str] | None = None,
) -> None:
"""Handle save for the Document Binary Store instance."""
pass
@abstractmethod
def read(self, object_name: str) -> bytes:
"""Handle read for the Document Binary Store instance."""
pass
@abstractmethod
def delete(self, object_name: str) -> None:
"""Handle delete for the Document Binary Store instance."""
pass
class DocumentParser(ABC):
"""Provide the Document Parser parser."""
@abstractmethod
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
"""Handle parse for the Document Parser instance."""
pass
class ChunkBuilder(ABC):
"""Provide the Chunk Builder builder."""
@abstractmethod
def build(
self,
*,
parsed_document: ParsedDocument,
regulation_type: str,
version: str,
) -> list[Chunk]:
"""Handle build for the Chunk Builder instance."""
pass

View File

@@ -0,0 +1,8 @@
"""Initialize the app.domain.retrieval package."""
from .models import RetrievalQuery, RetrievedChunk
from .ports import EmbeddingProvider, Retriever, VectorIndex
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = ["RetrievalQuery", "RetrievedChunk", "EmbeddingProvider", "Retriever", "VectorIndex"]

View File

@@ -0,0 +1,29 @@
"""Define domain models for retrieval."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
# Keep module behavior explicit so the backend flow stays easy to audit.
@dataclass
class RetrievalQuery:
"""Represent the Retrieval Query type."""
query: str
top_k: int
filters: str | None = None
@dataclass
class RetrievedChunk:
"""Represent the Retrieved Chunk type."""
chunk_id: str
doc_id: str
doc_name: str
content: str
score: float
section_title: str = ""
page_number: int = 0
metadata: dict[str, Any] = field(default_factory=dict)

View File

@@ -0,0 +1,60 @@
"""Define domain ports for retrieval."""
from __future__ import annotations
from abc import ABC, abstractmethod
from app.domain.documents.models import Chunk
from .models import RetrievalQuery, RetrievedChunk
# Keep domain contracts explicit so adapters can swap implementations cleanly.
class EmbeddingProvider(ABC):
"""Provide the Embedding Provider provider."""
@abstractmethod
def embed_texts(self, texts: list[str]) -> list[list[float]]:
"""Embed texts for the Embedding Provider instance."""
pass
@abstractmethod
def embed_query(self, text: str) -> list[float]:
"""Embed query for the Embedding Provider instance."""
pass
class VectorIndex(ABC):
"""Provide the Vector Index index implementation."""
@abstractmethod
def upsert(self, chunks: list[Chunk], vectors: list[list[float]]) -> int:
"""Handle upsert for the Vector Index instance."""
pass
@abstractmethod
def delete_by_document(self, doc_id: str) -> int:
"""Delete by document for the Vector Index instance."""
pass
@abstractmethod
def search(self, query_vector: list[float], top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
"""Handle search for the Vector Index instance."""
pass
@abstractmethod
def health(self) -> dict:
"""Handle health for the Vector Index instance."""
pass
class Retriever(ABC):
"""Provide the Retriever retriever."""
@abstractmethod
def retrieve(self, query: RetrievalQuery) -> list[RetrievedChunk]:
"""Handle retrieve for the Retriever instance."""
pass
@abstractmethod
def search(self, query: str, top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
"""Handle search for the Retriever instance."""
pass