Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,6 +1,18 @@
"""存储服务"""
"""Initialize the app.services.storage package."""
# Keep package boundaries explicit so backend imports stay predictable.
from .milvus_client import MilvusClient
from .minio_client import MinIOClient
__all__ = ["MilvusClient", "MinIOClient"]
def __getattr__(name: str):
"""Handle getattr for this module."""
if name == "MilvusClient":
from .milvus_client import MilvusClient
return MilvusClient
if name == "MinIOClient":
from .minio_client import MinIOClient
return MinIOClient
raise AttributeError(name)

View File

@@ -1,4 +1,4 @@
"""Milvus向量数据库客户端 - 存储与检索服务"""
"""Provide service-layer logic for milvus client."""
from pymilvus import (
connections,
@@ -17,11 +17,13 @@ import numpy as np
from ..embedding.text_chunker import TextChunk
from ..embedding.bge_m3_embedder import EmbeddingResult
from app.config.settings import settings
# Keep service responsibilities explicit so downstream behavior stays predictable.
@dataclass
class SearchResult:
"""检索结果"""
"""Represent the Search Result type."""
id: int
content: str
score: float
@@ -30,7 +32,7 @@ class SearchResult:
@dataclass
class MilvusDocument:
"""Milvus文档数据结构"""
"""Represent the Milvus Document type."""
doc_id: str
chunk_id: str
content: str
@@ -46,7 +48,7 @@ class MilvusDocument:
class MilvusClient:
"""Milvus向量数据库客户端"""
"""Represent the Milvus Client type."""
COLLECTION_NAME = "regulations"
@@ -73,6 +75,7 @@ class MilvusClient:
collection_name: str = None,
db_name: str = None
):
"""Initialize the Milvus Client instance."""
self.host = host or settings.milvus_host
self.port = port or settings.milvus_port
self.collection_name = collection_name or settings.milvus_collection
@@ -84,7 +87,7 @@ class MilvusClient:
logger.info(f"Milvus客户端配置: {self.host}:{self.port}, Collection: {self.collection_name}")
def connect(self) -> bool:
"""连接到Milvus服务器"""
"""Handle connect for the Milvus Client instance."""
try:
connections.connect(
alias="default",
@@ -101,7 +104,7 @@ class MilvusClient:
return False
def disconnect(self):
"""断开连接"""
"""Handle disconnect for the Milvus Client instance."""
try:
connections.disconnect("default")
self.connected = False
@@ -110,7 +113,7 @@ class MilvusClient:
logger.warning(f"断开连接时出错: {e}")
def create_collection(self, recreate: bool = False) -> bool:
"""创建Collection"""
"""Create collection for the Milvus Client instance."""
if not self.connected:
logger.warning("未连接到Milvus请先调用connect()")
return False
@@ -146,7 +149,7 @@ class MilvusClient:
return False
def _create_indexes(self):
"""创建向量索引"""
"""Handle create indexes for this module for the Milvus Client instance."""
if not self.collection:
return
@@ -177,13 +180,13 @@ class MilvusClient:
logger.warning(f"创建索引时出错: {e}")
def load_collection(self):
"""加载Collection到内存"""
"""Load collection for the Milvus Client instance."""
if self.collection:
self.collection.load()
logger.info(f"Collection已加载: {self.collection_name}")
def release_collection(self):
"""释放Collection内存"""
"""Handle release collection for the Milvus Client instance."""
if self.collection:
self.collection.release()
logger.info(f"Collection已释放: {self.collection_name}")
@@ -193,7 +196,7 @@ class MilvusClient:
chunks: List[TextChunk],
embeddings: EmbeddingResult
) -> List[int]:
"""插入文档分块和嵌入向量"""
"""Handle insert chunks for the Milvus Client instance."""
if not self.collection:
logger.warning("Collection未初始化")
return []
@@ -246,7 +249,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""混合检索Dense + Sparse"""
"""Handle hybrid search for the Milvus Client instance."""
if not self.collection:
logger.warning("Collection未初始化")
return []
@@ -254,10 +257,10 @@ class MilvusClient:
try:
self.collection.load()
# 使用简单的Dense检索兼容所有版本
# Keep service responsibilities explicit so downstream behavior stays predictable.
dense_results = self.dense_search(query_dense, top_k, filters)
# 可选合并Sparse结果
# Keep service responsibilities explicit so downstream behavior stays predictable.
if query_sparse:
sparse_results = self.sparse_search(query_sparse, top_k, filters)
merged = self._merge_results(dense_results, sparse_results, top_k)
@@ -277,7 +280,7 @@ class MilvusClient:
top_k: int,
dense_weight: float = 0.6
) -> List[SearchResult]:
"""手动融合Dense和Sparse结果"""
"""Handle merge results for this module for the Milvus Client instance."""
sparse_weight = 1 - dense_weight
merged_dict = {}
@@ -318,7 +321,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""纯Dense向量检索"""
"""Handle dense search for the Milvus Client instance."""
if not self.collection:
return []
@@ -375,7 +378,7 @@ class MilvusClient:
top_k: int = 10,
filters: Optional[str] = None
) -> List[SearchResult]:
"""纯Sparse向量检索"""
"""Handle sparse search for the Milvus Client instance."""
if not self.collection:
return []
@@ -427,7 +430,7 @@ class MilvusClient:
return []
def delete_by_doc_id(self, doc_id: str) -> int:
"""根据doc_id删除记录"""
"""Delete by doc id for the Milvus Client instance."""
if not self.collection:
return 0
@@ -441,7 +444,7 @@ class MilvusClient:
return 0
def get_collection_stats(self) -> Dict[str, Any]:
"""获取Collection统计信息"""
"""Return collection stats for the Milvus Client instance."""
if not self.collection:
return {}
@@ -458,7 +461,7 @@ class MilvusClient:
def create_milvus_client() -> MilvusClient:
"""便捷函数创建Milvus客户端"""
"""Create milvus client."""
client = MilvusClient()
client.connect()
client.create_collection(recreate=False)
@@ -470,7 +473,7 @@ def insert_documents(
chunks: List[TextChunk],
embeddings: EmbeddingResult
) -> List[int]:
"""便捷函数:插入文档"""
"""Handle insert documents."""
return client.insert_chunks(chunks, embeddings)
@@ -480,5 +483,5 @@ def search_regulations(
query_sparse: Dict[int, float],
top_k: int = 10
) -> List[SearchResult]:
"""便捷函数:检索法规"""
"""Search regulations."""
return client.hybrid_search(query_dense, query_sparse, top_k)

View File

@@ -1,4 +1,4 @@
"""MinIO对象存储客户端 - 文档文件存储"""
"""Provide service-layer logic for minio client."""
from minio import Minio
from minio.error import S3Error
@@ -8,10 +8,12 @@ from io import BytesIO
import os
from app.config.settings import settings
# Keep service responsibilities explicit so downstream behavior stays predictable.
class MinIOClient:
"""MinIO对象存储客户端"""
"""Represent the Min I O Client type."""
def __init__(
self,
@@ -21,16 +23,7 @@ class MinIOClient:
bucket: str = None,
secure: bool = None
):
"""
初始化MinIO客户端
Args:
endpoint: MinIO服务地址
access_key: 访问密钥
secret_key: 秘密密钥
bucket: 存储桶名称
secure: 是否使用HTTPS
"""
"""Initialize the Min I O Client instance."""
self.endpoint = endpoint or settings.minio_endpoint
self.access_key = access_key or settings.minio_access_key
self.secret_key = secret_key or settings.minio_secret_key
@@ -43,7 +36,7 @@ class MinIOClient:
logger.info(f"MinIO客户端配置: {self.endpoint}, bucket={self.bucket}")
def connect(self) -> bool:
"""连接MinIO服务"""
"""Handle connect for the Min I O Client instance."""
try:
self.client = Minio(
self.endpoint,
@@ -60,7 +53,7 @@ class MinIOClient:
return False
def ensure_bucket(self) -> bool:
"""确保存储桶存在"""
"""Handle ensure bucket for the Min I O Client instance."""
if not self.connected:
logger.warning("未连接MinIO请先调用connect()")
return False
@@ -82,17 +75,7 @@ class MinIOClient:
object_name: str,
metadata: Dict[str, Any] = None
) -> bool:
"""
上传本地文件到MinIO
Args:
file_path: 本地文件路径
object_name: MinIO对象名称
metadata: 元数据
Returns:
bool: 是否成功
"""
"""Handle upload file for the Min I O Client instance."""
if not self.connected:
self.connect()
self.ensure_bucket()
@@ -125,18 +108,7 @@ class MinIOClient:
content_type: str = "application/octet-stream",
metadata: Dict[str, Any] = None
) -> bool:
"""
上传字节数据到MinIO
Args:
data: 文件字节数据
object_name: MinIO对象名称
content_type: 内容类型
metadata: 元数据注意MinIO仅支持US-ASCII字符
Returns:
bool: 是否成功
"""
"""Handle upload bytes for the Min I O Client instance."""
if not self.connected:
self.connect()
self.ensure_bucket()
@@ -144,18 +116,18 @@ class MinIOClient:
try:
data_stream = BytesIO(data)
# 处理metadata仅保留ASCII安全字符
# Keep service responsibilities explicit so downstream behavior stays predictable.
safe_metadata = None
if metadata:
safe_metadata = {}
for key, value in metadata.items():
if isinstance(value, str):
# 只保留ASCII字符或转换为安全格式
# Keep service responsibilities explicit so downstream behavior stays predictable.
try:
value.encode('ascii')
safe_metadata[key] = value
except UnicodeEncodeError:
# 中文字符跳过或用占位符
# Keep service responsibilities explicit so downstream behavior stays predictable.
safe_metadata[key] = ""
else:
safe_metadata[key] = str(value)
@@ -181,16 +153,7 @@ class MinIOClient:
object_name: str,
file_path: str
) -> bool:
"""
从MinIO下载文件到本地
Args:
object_name: MinIO对象名称
file_path: 本地保存路径
Returns:
bool: 是否成功
"""
"""Handle download file for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -212,16 +175,7 @@ class MinIOClient:
object_name: str,
expires: int = 3600
) -> Optional[str]:
"""
获取对象下载URL临时URL
Args:
object_name: MinIO对象名称
expires: URL有效期
Returns:
str: 下载URL
"""
"""Return object url for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -238,15 +192,7 @@ class MinIOClient:
return None
def get_object_data(self, object_name: str) -> Optional[bytes]:
"""
获取对象数据(字节)
Args:
object_name: MinIO对象名称
Returns:
bytes: 文件数据
"""
"""Return object data for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -262,15 +208,7 @@ class MinIOClient:
return None
def delete_object(self, object_name: str) -> bool:
"""
删除对象
Args:
object_name: MinIO对象名称
Returns:
bool: 是否成功
"""
"""Delete object for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -284,15 +222,7 @@ class MinIOClient:
return False
def list_objects(self, prefix: str = "") -> list:
"""
列出存储桶中的对象
Args:
prefix: 对象名称前缀
Returns:
list: 对象列表
"""
"""List objects for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -305,15 +235,7 @@ class MinIOClient:
return []
def object_exists(self, object_name: str) -> bool:
"""
检查对象是否存在
Args:
object_name: MinIO对象名称
Returns:
bool: 是否存在
"""
"""Handle object exists for the Min I O Client instance."""
if not self.connected:
self.connect()
@@ -325,7 +247,7 @@ class MinIOClient:
return False
def _get_content_type(self, file_path: str) -> str:
"""根据文件扩展名获取Content-Type"""
"""Handle get content type for this module for the Min I O Client instance."""
ext = os.path.splitext(file_path)[1].lower()
content_types = {
'.pdf': 'application/pdf',
@@ -338,13 +260,13 @@ class MinIOClient:
return content_types.get(ext, 'application/octet-stream')
def close(self):
"""关闭连接MinIO客户端无需显式关闭"""
"""Release the resources held by this component."""
self.connected = False
logger.info("MinIO客户端已关闭")
def create_minio_client() -> MinIOClient:
"""便捷函数创建MinIO客户端"""
"""Create minio client."""
client = MinIOClient()
client.connect()
client.ensure_bucket()