Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,4 +1,4 @@
"""Milvus向量数据库客户端 - 存储与检索服务"""
"""Provide service-layer logic for milvus client."""
from pymilvus import (
connections,
@@ -17,11 +17,13 @@ import numpy as np
from ..embedding.text_chunker import TextChunk
from ..embedding.bge_m3_embedder import EmbeddingResult
from app.config.settings import settings
# Keep service responsibilities explicit so downstream behavior stays predictable.
@dataclass
class SearchResult:
"""检索结果"""
"""Represent the Search Result type."""
id: int
content: str
score: float
@@ -30,7 +32,7 @@ class SearchResult:
@dataclass
class MilvusDocument:
"""Milvus文档数据结构"""
"""Represent the Milvus Document type."""
doc_id: str
chunk_id: str
content: str
@@ -46,7 +48,7 @@ class MilvusDocument:
class MilvusClient:
"""Milvus向量数据库客户端"""
"""Represent the Milvus Client type."""
COLLECTION_NAME = "regulations"
@@ -73,6 +75,7 @@ class MilvusClient:
collection_name: str = None,
db_name: str = None
):
"""Initialize the Milvus Client instance."""
self.host = host or settings.milvus_host
self.port = port or settings.milvus_port
self.collection_name = collection_name or settings.milvus_collection
@@ -84,7 +87,7 @@ class MilvusClient:
logger.info(f"Milvus客户端配置: {self.host}:{self.port}, Collection: {self.collection_name}")
def connect(self) -> bool:
"""连接到Milvus服务器"""
"""Handle connect for the Milvus Client instance."""
try:
connections.connect(
alias="default",
@@ -101,7 +104,7 @@ class MilvusClient:
return False
def disconnect(self):
"""断开连接"""
"""Handle disconnect for the Milvus Client instance."""
try:
connections.disconnect("default")
self.connected = False
@@ -110,7 +113,7 @@ class MilvusClient:
logger.warning(f"断开连接时出错: {e}")
def create_collection(self, recreate: bool = False) -> bool:
"""创建Collection"""
"""Create collection for the Milvus Client instance."""
if not self.connected:
logger.warning("未连接到Milvus请先调用connect()")
return False
@@ -146,7 +149,7 @@ class MilvusClient:
return False
def _create_indexes(self):
"""创建向量索引"""
"""Handle create indexes for this module for the Milvus Client instance."""
if not self.collection:
return
@@ -177,13 +180,13 @@ class MilvusClient:
logger.warning(f"创建索引时出错: {e}")
def load_collection(self):
"""加载Collection到内存"""
"""Load collection for the Milvus Client instance."""
if self.collection:
self.collection.load()
logger.info(f"Collection已加载: {self.collection_name}")
def release_collection(self):
"""释放Collection内存"""
"""Handle release collection for the Milvus Client instance."""
if self.collection:
self.collection.release()
logger.info(f"Collection已释放: {self.collection_name}")
@@ -193,7 +196,7 @@ class MilvusClient:
chunks: List[TextChunk],
embeddings: EmbeddingResult
) -> List[int]:
"""插入文档分块和嵌入向量"""
"""Handle insert chunks for the Milvus Client instance."""
if not self.collection:
logger.warning("Collection未初始化")
return []
@@ -246,7 +249,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""混合检索Dense + Sparse"""
"""Handle hybrid search for the Milvus Client instance."""
if not self.collection:
logger.warning("Collection未初始化")
return []
@@ -254,10 +257,10 @@ class MilvusClient:
try:
self.collection.load()
# 使用简单的Dense检索兼容所有版本
# Keep service responsibilities explicit so downstream behavior stays predictable.
dense_results = self.dense_search(query_dense, top_k, filters)
# 可选合并Sparse结果
# Keep service responsibilities explicit so downstream behavior stays predictable.
if query_sparse:
sparse_results = self.sparse_search(query_sparse, top_k, filters)
merged = self._merge_results(dense_results, sparse_results, top_k)
@@ -277,7 +280,7 @@ class MilvusClient:
top_k: int,
dense_weight: float = 0.6
) -> List[SearchResult]:
"""手动融合Dense和Sparse结果"""
"""Handle merge results for this module for the Milvus Client instance."""
sparse_weight = 1 - dense_weight
merged_dict = {}
@@ -318,7 +321,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""纯Dense向量检索"""
"""Handle dense search for the Milvus Client instance."""
if not self.collection:
return []
@@ -375,7 +378,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""纯Sparse向量检索"""
"""Handle sparse search for the Milvus Client instance."""
if not self.collection:
return []
@@ -427,7 +430,7 @@ class MilvusClient:
return []
def delete_by_doc_id(self, doc_id: str) -> int:
"""根据doc_id删除记录"""
"""Delete by doc id for the Milvus Client instance."""
if not self.collection:
return 0
@@ -441,7 +444,7 @@ class MilvusClient:
return 0
def get_collection_stats(self) -> Dict[str, Any]:
"""获取Collection统计信息"""
"""Return collection stats for the Milvus Client instance."""
if not self.collection:
return {}
@@ -458,7 +461,7 @@ class MilvusClient:
def create_milvus_client() -> MilvusClient:
"""便捷函数创建Milvus客户端"""
"""Create milvus client."""
client = MilvusClient()
client.connect()
client.create_collection(recreate=False)
@@ -470,7 +473,7 @@ def insert_documents(
chunks: List[TextChunk],
embeddings: EmbeddingResult
) -> List[int]:
"""便捷函数:插入文档"""
"""Handle insert documents."""
return client.insert_chunks(chunks, embeddings)
@@ -480,5 +483,5 @@ def search_regulations(
query_sparse: Dict[int, float],
top_k: int = 10
) -> List[SearchResult]:
"""便捷函数:检索法规"""
"""Search regulations."""
return client.hybrid_search(query_dense, query_sparse, top_k)