Fix SSE route dependency and align architecture docs
This commit is contained in:
5
backend/app/domain/__init__.py
Normal file
5
backend/app/domain/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Initialize the app.domain package."""
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = []
|
||||
15
backend/app/domain/conversation/__init__.py
Normal file
15
backend/app/domain/conversation/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Initialize the app.domain.conversation package."""
|
||||
|
||||
from .models import AnswerResult, AnswerSource, ConversationMessage, ConversationSession
|
||||
from .ports import AnswerGenerator, ConversationStore
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AnswerGenerator",
|
||||
"AnswerResult",
|
||||
"AnswerSource",
|
||||
"ConversationMessage",
|
||||
"ConversationSession",
|
||||
"ConversationStore",
|
||||
]
|
||||
53
backend/app/domain/conversation/models.py
Normal file
53
backend/app/domain/conversation/models.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""Define domain models for conversation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
# Keep module behavior explicit so the backend flow stays easy to audit.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnswerSource:
|
||||
"""Represent answer source data."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
chunk_id: str
|
||||
section_title: str
|
||||
page_number: int
|
||||
score: float
|
||||
content: str
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConversationMessage:
|
||||
"""Represent conversation message data."""
|
||||
role: str
|
||||
content: str
|
||||
timestamp: int
|
||||
sources: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConversationSession:
|
||||
"""Represent conversation session data."""
|
||||
session_id: str
|
||||
messages: list[ConversationMessage] = field(default_factory=list)
|
||||
created_at: int = 0
|
||||
updated_at: int = 0
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnswerResult:
|
||||
"""Represent answer result data."""
|
||||
answer: str
|
||||
sources: list[AnswerSource] = field(default_factory=list)
|
||||
model: str = ""
|
||||
latency_ms: int = 0
|
||||
retrieved_count: int = 0
|
||||
context_tokens: int = 0
|
||||
truncated: bool = False
|
||||
error: str | None = None
|
||||
78
backend/app/domain/conversation/ports.py
Normal file
78
backend/app/domain/conversation/ports.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Define domain ports for conversation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generator
|
||||
|
||||
from app.domain.retrieval.models import RetrievedChunk
|
||||
|
||||
from .models import AnswerResult, ConversationSession
|
||||
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
||||
|
||||
|
||||
|
||||
class AnswerGenerator(ABC):
|
||||
"""Represent the Answer Generator type."""
|
||||
@abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
retrieved_chunks: list[RetrievedChunk],
|
||||
history: list[dict[str, str]] | None = None,
|
||||
provider: str | None = None,
|
||||
model: str | None = None,
|
||||
prompt_template: str | None = None,
|
||||
) -> AnswerResult:
|
||||
"""Handle generate for the Answer Generator instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def stream_generate(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
retrieved_chunks: list[RetrievedChunk],
|
||||
history: list[dict[str, str]] | None = None,
|
||||
provider: str | None = None,
|
||||
model: str | None = None,
|
||||
prompt_template: str | None = None,
|
||||
) -> Generator[dict, None, AnswerResult]:
|
||||
"""Stream generate for the Answer Generator instance."""
|
||||
pass
|
||||
|
||||
|
||||
class ConversationStore(ABC):
|
||||
"""Provide the Conversation Store store implementation."""
|
||||
@abstractmethod
|
||||
def create_session(self, metadata: dict | None = None) -> ConversationSession:
|
||||
"""Create session for the Conversation Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_session(self, session_id: str) -> ConversationSession | None:
|
||||
"""Return session for the Conversation Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_message(
|
||||
self,
|
||||
session_id: str,
|
||||
*,
|
||||
role: str,
|
||||
content: str,
|
||||
sources: list[dict] | None = None,
|
||||
) -> ConversationSession | None:
|
||||
"""Save message for the Conversation Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
"""Delete session for the Conversation Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_sessions(self) -> list[dict]:
|
||||
"""List sessions for the Conversation Store instance."""
|
||||
pass
|
||||
17
backend/app/domain/documents/__init__.py
Normal file
17
backend/app/domain/documents/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Initialize the app.domain.documents package."""
|
||||
|
||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
||||
from .ports import ChunkBuilder, DocumentBinaryStore, DocumentParser, DocumentRepository
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Chunk",
|
||||
"Document",
|
||||
"DocumentStatus",
|
||||
"ParsedDocument",
|
||||
"ChunkBuilder",
|
||||
"DocumentBinaryStore",
|
||||
"DocumentParser",
|
||||
"DocumentRepository",
|
||||
]
|
||||
77
backend/app/domain/documents/models.py
Normal file
77
backend/app/domain/documents/models.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Define domain models for documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
# Keep module behavior explicit so the backend flow stays easy to audit.
|
||||
|
||||
|
||||
def utcnow() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
|
||||
class DocumentStatus(str, Enum):
|
||||
"""Define the Document Status enumeration."""
|
||||
PENDING = "pending"
|
||||
STORED = "stored"
|
||||
PARSED = "parsed"
|
||||
INDEXED = "indexed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Document:
|
||||
"""Represent the Document type."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
file_name: str
|
||||
object_name: str
|
||||
content_type: str
|
||||
size_bytes: int
|
||||
status: DocumentStatus = DocumentStatus.PENDING
|
||||
regulation_type: str = ""
|
||||
version: str = ""
|
||||
summary: str = ""
|
||||
summary_latency_ms: int = 0
|
||||
chunk_count: int = 0
|
||||
parser_name: str = ""
|
||||
index_name: str = ""
|
||||
error_message: str = ""
|
||||
created_at: datetime = field(default_factory=utcnow)
|
||||
updated_at: datetime = field(default_factory=utcnow)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedDocument:
|
||||
"""Represent the Parsed Document type."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
structure_nodes: list[dict[str, Any]]
|
||||
semantic_blocks: list[dict[str, Any]]
|
||||
vector_chunks: list[dict[str, Any]]
|
||||
parser_name: str
|
||||
raw_text: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
"""Represent the Chunk type."""
|
||||
chunk_id: str
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
content: str
|
||||
embedding_text: str
|
||||
section_title: str = ""
|
||||
section_path: list[str] = field(default_factory=list)
|
||||
page_number: int = 0
|
||||
regulation_type: str = ""
|
||||
version: str = ""
|
||||
semantic_id: str = ""
|
||||
block_type: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
96
backend/app/domain/documents/ports.py
Normal file
96
backend/app/domain/documents/ports.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Define domain ports for documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .models import Chunk, Document, DocumentStatus, ParsedDocument
|
||||
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
||||
|
||||
|
||||
|
||||
class DocumentRepository(ABC):
|
||||
"""Provide the Document Repository repository implementation."""
|
||||
@abstractmethod
|
||||
def create(self, document: Document) -> Document:
|
||||
"""Handle create for the Document Repository instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update(self, document: Document) -> Document:
|
||||
"""Handle update for the Document Repository instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get(self, doc_id: str) -> Document | None:
|
||||
"""Handle get for the Document Repository instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list(self, limit: int | None = None) -> list[Document]:
|
||||
"""Handle list for the Document Repository instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_status(
|
||||
self,
|
||||
doc_id: str,
|
||||
status: DocumentStatus,
|
||||
*,
|
||||
error_message: str = "",
|
||||
chunk_count: int | None = None,
|
||||
summary: str | None = None,
|
||||
summary_latency_ms: int | None = None,
|
||||
parser_name: str | None = None,
|
||||
index_name: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> Document | None:
|
||||
"""Update status for the Document Repository instance."""
|
||||
pass
|
||||
|
||||
|
||||
class DocumentBinaryStore(ABC):
|
||||
"""Provide the Document Binary Store store implementation."""
|
||||
@abstractmethod
|
||||
def save(
|
||||
self,
|
||||
*,
|
||||
object_name: str,
|
||||
data: bytes,
|
||||
content_type: str,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
"""Handle save for the Document Binary Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read(self, object_name: str) -> bytes:
|
||||
"""Handle read for the Document Binary Store instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, object_name: str) -> None:
|
||||
"""Handle delete for the Document Binary Store instance."""
|
||||
pass
|
||||
|
||||
|
||||
class DocumentParser(ABC):
|
||||
"""Provide the Document Parser parser."""
|
||||
@abstractmethod
|
||||
def parse(self, *, file_path: str, doc_id: str, doc_name: str) -> ParsedDocument:
|
||||
"""Handle parse for the Document Parser instance."""
|
||||
pass
|
||||
|
||||
|
||||
class ChunkBuilder(ABC):
|
||||
"""Provide the Chunk Builder builder."""
|
||||
@abstractmethod
|
||||
def build(
|
||||
self,
|
||||
*,
|
||||
parsed_document: ParsedDocument,
|
||||
regulation_type: str,
|
||||
version: str,
|
||||
) -> list[Chunk]:
|
||||
"""Handle build for the Chunk Builder instance."""
|
||||
pass
|
||||
8
backend/app/domain/retrieval/__init__.py
Normal file
8
backend/app/domain/retrieval/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Initialize the app.domain.retrieval package."""
|
||||
|
||||
from .models import RetrievalQuery, RetrievedChunk
|
||||
from .ports import EmbeddingProvider, Retriever, VectorIndex
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = ["RetrievalQuery", "RetrievedChunk", "EmbeddingProvider", "Retriever", "VectorIndex"]
|
||||
29
backend/app/domain/retrieval/models.py
Normal file
29
backend/app/domain/retrieval/models.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Define domain models for retrieval."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
# Keep module behavior explicit so the backend flow stays easy to audit.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetrievalQuery:
|
||||
"""Represent the Retrieval Query type."""
|
||||
query: str
|
||||
top_k: int
|
||||
filters: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetrievedChunk:
|
||||
"""Represent the Retrieved Chunk type."""
|
||||
chunk_id: str
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
content: str
|
||||
score: float
|
||||
section_title: str = ""
|
||||
page_number: int = 0
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
60
backend/app/domain/retrieval/ports.py
Normal file
60
backend/app/domain/retrieval/ports.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Define domain ports for retrieval."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from app.domain.documents.models import Chunk
|
||||
|
||||
from .models import RetrievalQuery, RetrievedChunk
|
||||
# Keep domain contracts explicit so adapters can swap implementations cleanly.
|
||||
|
||||
|
||||
|
||||
class EmbeddingProvider(ABC):
|
||||
"""Provide the Embedding Provider provider."""
|
||||
@abstractmethod
|
||||
def embed_texts(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Embed texts for the Embedding Provider instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def embed_query(self, text: str) -> list[float]:
|
||||
"""Embed query for the Embedding Provider instance."""
|
||||
pass
|
||||
|
||||
|
||||
class VectorIndex(ABC):
|
||||
"""Provide the Vector Index index implementation."""
|
||||
@abstractmethod
|
||||
def upsert(self, chunks: list[Chunk], vectors: list[list[float]]) -> int:
|
||||
"""Handle upsert for the Vector Index instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_by_document(self, doc_id: str) -> int:
|
||||
"""Delete by document for the Vector Index instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query_vector: list[float], top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
|
||||
"""Handle search for the Vector Index instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def health(self) -> dict:
|
||||
"""Handle health for the Vector Index instance."""
|
||||
pass
|
||||
|
||||
|
||||
class Retriever(ABC):
|
||||
"""Provide the Retriever retriever."""
|
||||
@abstractmethod
|
||||
def retrieve(self, query: RetrievalQuery) -> list[RetrievedChunk]:
|
||||
"""Handle retrieve for the Retriever instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
|
||||
"""Handle search for the Retriever instance."""
|
||||
pass
|
||||
Reference in New Issue
Block a user