fix 文档管理模块 & 法规对话模块

This commit is contained in:
2026-05-20 23:34:08 +08:00
parent c22b03dc07
commit b065d55c86
39 changed files with 1671 additions and 540 deletions

View File

@@ -1,8 +1,8 @@
"""Initialize the app.domain.retrieval package."""
from .models import RetrievalQuery, RetrievedChunk
from .ports import EmbeddingProvider, Retriever, VectorIndex
from .ports import EmbeddingProvider, Reranker, Retriever, VectorIndex
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = ["RetrievalQuery", "RetrievedChunk", "EmbeddingProvider", "Retriever", "VectorIndex"]
__all__ = ["RetrievalQuery", "RetrievedChunk", "EmbeddingProvider", "Reranker", "Retriever", "VectorIndex"]

View File

@@ -10,7 +10,6 @@ from .models import RetrievalQuery, RetrievedChunk
# Keep domain contracts explicit so adapters can swap implementations cleanly.
class EmbeddingProvider(ABC):
"""Provide the Embedding Provider provider."""
@abstractmethod
@@ -41,12 +40,35 @@ class VectorIndex(ABC):
"""Handle search for the Vector Index instance."""
pass
@abstractmethod
def count_by_document(self) -> dict[str, int]:
"""Return a mapping of doc_id -> chunk count from the vector store."""
pass
@abstractmethod
def list_document_metadata(self) -> list[dict]:
"""Return per-document metadata rows from the vector store.
Each row contains at minimum: doc_id, doc_name, chunk_count.
Optional fields: regulation_type, version.
"""
pass
@abstractmethod
def health(self) -> dict:
"""Handle health for the Vector Index instance."""
pass
class Reranker(ABC):
"""Re-score and re-order a candidate list using a cross-encoder model."""
@abstractmethod
def rerank(self, query: str, chunks: list[RetrievedChunk], top_k: int) -> list[RetrievedChunk]:
"""Return top_k chunks sorted by cross-encoder score (descending)."""
pass
class Retriever(ABC):
"""Provide the Retriever retriever."""
@abstractmethod