Fix SSE route dependency and align architecture docs
This commit is contained in:
@@ -1,6 +1,18 @@
|
||||
"""存储服务"""
|
||||
"""Initialize the app.services.storage package."""
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
from .milvus_client import MilvusClient
|
||||
from .minio_client import MinIOClient
|
||||
|
||||
__all__ = ["MilvusClient", "MinIOClient"]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Handle getattr for this module."""
|
||||
if name == "MilvusClient":
|
||||
from .milvus_client import MilvusClient
|
||||
|
||||
return MilvusClient
|
||||
if name == "MinIOClient":
|
||||
from .minio_client import MinIOClient
|
||||
|
||||
return MinIOClient
|
||||
raise AttributeError(name)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Milvus向量数据库客户端 - 存储与检索服务"""
|
||||
"""Provide service-layer logic for milvus client."""
|
||||
|
||||
from pymilvus import (
|
||||
connections,
|
||||
@@ -17,11 +17,13 @@ import numpy as np
|
||||
from ..embedding.text_chunker import TextChunk
|
||||
from ..embedding.bge_m3_embedder import EmbeddingResult
|
||||
from app.config.settings import settings
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""检索结果"""
|
||||
"""Represent the Search Result type."""
|
||||
id: int
|
||||
content: str
|
||||
score: float
|
||||
@@ -30,7 +32,7 @@ class SearchResult:
|
||||
|
||||
@dataclass
|
||||
class MilvusDocument:
|
||||
"""Milvus文档数据结构"""
|
||||
"""Represent the Milvus Document type."""
|
||||
doc_id: str
|
||||
chunk_id: str
|
||||
content: str
|
||||
@@ -46,7 +48,7 @@ class MilvusDocument:
|
||||
|
||||
|
||||
class MilvusClient:
|
||||
"""Milvus向量数据库客户端"""
|
||||
"""Represent the Milvus Client type."""
|
||||
|
||||
COLLECTION_NAME = "regulations"
|
||||
|
||||
@@ -73,6 +75,7 @@ class MilvusClient:
|
||||
collection_name: str = None,
|
||||
db_name: str = None
|
||||
):
|
||||
"""Initialize the Milvus Client instance."""
|
||||
self.host = host or settings.milvus_host
|
||||
self.port = port or settings.milvus_port
|
||||
self.collection_name = collection_name or settings.milvus_collection
|
||||
@@ -84,7 +87,7 @@ class MilvusClient:
|
||||
logger.info(f"Milvus客户端配置: {self.host}:{self.port}, Collection: {self.collection_name}")
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""连接到Milvus服务器"""
|
||||
"""Handle connect for the Milvus Client instance."""
|
||||
try:
|
||||
connections.connect(
|
||||
alias="default",
|
||||
@@ -101,7 +104,7 @@ class MilvusClient:
|
||||
return False
|
||||
|
||||
def disconnect(self):
|
||||
"""断开连接"""
|
||||
"""Handle disconnect for the Milvus Client instance."""
|
||||
try:
|
||||
connections.disconnect("default")
|
||||
self.connected = False
|
||||
@@ -110,7 +113,7 @@ class MilvusClient:
|
||||
logger.warning(f"断开连接时出错: {e}")
|
||||
|
||||
def create_collection(self, recreate: bool = False) -> bool:
|
||||
"""创建Collection"""
|
||||
"""Create collection for the Milvus Client instance."""
|
||||
if not self.connected:
|
||||
logger.warning("未连接到Milvus,请先调用connect()")
|
||||
return False
|
||||
@@ -146,7 +149,7 @@ class MilvusClient:
|
||||
return False
|
||||
|
||||
def _create_indexes(self):
|
||||
"""创建向量索引"""
|
||||
"""Handle create indexes for this module for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
return
|
||||
|
||||
@@ -177,13 +180,13 @@ class MilvusClient:
|
||||
logger.warning(f"创建索引时出错: {e}")
|
||||
|
||||
def load_collection(self):
|
||||
"""加载Collection到内存"""
|
||||
"""Load collection for the Milvus Client instance."""
|
||||
if self.collection:
|
||||
self.collection.load()
|
||||
logger.info(f"Collection已加载: {self.collection_name}")
|
||||
|
||||
def release_collection(self):
|
||||
"""释放Collection内存"""
|
||||
"""Handle release collection for the Milvus Client instance."""
|
||||
if self.collection:
|
||||
self.collection.release()
|
||||
logger.info(f"Collection已释放: {self.collection_name}")
|
||||
@@ -193,7 +196,7 @@ class MilvusClient:
|
||||
chunks: List[TextChunk],
|
||||
embeddings: EmbeddingResult
|
||||
) -> List[int]:
|
||||
"""插入文档分块和嵌入向量"""
|
||||
"""Handle insert chunks for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
logger.warning("Collection未初始化")
|
||||
return []
|
||||
@@ -246,7 +249,7 @@ class MilvusClient:
|
||||
top_k: int = 10,
|
||||
filters: Optional[str] = None
|
||||
) -> List[SearchResult]:
|
||||
"""混合检索:Dense + Sparse"""
|
||||
"""Handle hybrid search for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
logger.warning("Collection未初始化")
|
||||
return []
|
||||
@@ -254,10 +257,10 @@ class MilvusClient:
|
||||
try:
|
||||
self.collection.load()
|
||||
|
||||
# 使用简单的Dense检索(兼容所有版本)
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
dense_results = self.dense_search(query_dense, top_k, filters)
|
||||
|
||||
# 可选:合并Sparse结果
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
if query_sparse:
|
||||
sparse_results = self.sparse_search(query_sparse, top_k, filters)
|
||||
merged = self._merge_results(dense_results, sparse_results, top_k)
|
||||
@@ -277,7 +280,7 @@ class MilvusClient:
|
||||
top_k: int,
|
||||
dense_weight: float = 0.6
|
||||
) -> List[SearchResult]:
|
||||
"""手动融合Dense和Sparse结果"""
|
||||
"""Handle merge results for this module for the Milvus Client instance."""
|
||||
sparse_weight = 1 - dense_weight
|
||||
merged_dict = {}
|
||||
|
||||
@@ -318,7 +321,7 @@ class MilvusClient:
|
||||
top_k: int = 10,
|
||||
filters: Optional[str] = None
|
||||
) -> List[SearchResult]:
|
||||
"""纯Dense向量检索"""
|
||||
"""Handle dense search for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
return []
|
||||
|
||||
@@ -375,7 +378,7 @@ class MilvusClient:
|
||||
top_k: int = 10,
|
||||
filters: Optional[str] = None
|
||||
) -> List[SearchResult]:
|
||||
"""纯Sparse向量检索"""
|
||||
"""Handle sparse search for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
return []
|
||||
|
||||
@@ -427,7 +430,7 @@ class MilvusClient:
|
||||
return []
|
||||
|
||||
def delete_by_doc_id(self, doc_id: str) -> int:
|
||||
"""根据doc_id删除记录"""
|
||||
"""Delete by doc id for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
return 0
|
||||
|
||||
@@ -441,7 +444,7 @@ class MilvusClient:
|
||||
return 0
|
||||
|
||||
def get_collection_stats(self) -> Dict[str, Any]:
|
||||
"""获取Collection统计信息"""
|
||||
"""Return collection stats for the Milvus Client instance."""
|
||||
if not self.collection:
|
||||
return {}
|
||||
|
||||
@@ -458,7 +461,7 @@ class MilvusClient:
|
||||
|
||||
|
||||
def create_milvus_client() -> MilvusClient:
|
||||
"""便捷函数:创建Milvus客户端"""
|
||||
"""Create milvus client."""
|
||||
client = MilvusClient()
|
||||
client.connect()
|
||||
client.create_collection(recreate=False)
|
||||
@@ -470,7 +473,7 @@ def insert_documents(
|
||||
chunks: List[TextChunk],
|
||||
embeddings: EmbeddingResult
|
||||
) -> List[int]:
|
||||
"""便捷函数:插入文档"""
|
||||
"""Handle insert documents."""
|
||||
return client.insert_chunks(chunks, embeddings)
|
||||
|
||||
|
||||
@@ -480,5 +483,5 @@ def search_regulations(
|
||||
query_sparse: Dict[int, float],
|
||||
top_k: int = 10
|
||||
) -> List[SearchResult]:
|
||||
"""便捷函数:检索法规"""
|
||||
"""Search regulations."""
|
||||
return client.hybrid_search(query_dense, query_sparse, top_k)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""MinIO对象存储客户端 - 文档文件存储"""
|
||||
"""Provide service-layer logic for minio client."""
|
||||
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
@@ -8,10 +8,12 @@ from io import BytesIO
|
||||
import os
|
||||
|
||||
from app.config.settings import settings
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
|
||||
|
||||
|
||||
class MinIOClient:
|
||||
"""MinIO对象存储客户端"""
|
||||
"""Represent the Min I O Client type."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -21,16 +23,7 @@ class MinIOClient:
|
||||
bucket: str = None,
|
||||
secure: bool = None
|
||||
):
|
||||
"""
|
||||
初始化MinIO客户端
|
||||
|
||||
Args:
|
||||
endpoint: MinIO服务地址
|
||||
access_key: 访问密钥
|
||||
secret_key: 秘密密钥
|
||||
bucket: 存储桶名称
|
||||
secure: 是否使用HTTPS
|
||||
"""
|
||||
"""Initialize the Min I O Client instance."""
|
||||
self.endpoint = endpoint or settings.minio_endpoint
|
||||
self.access_key = access_key or settings.minio_access_key
|
||||
self.secret_key = secret_key or settings.minio_secret_key
|
||||
@@ -43,7 +36,7 @@ class MinIOClient:
|
||||
logger.info(f"MinIO客户端配置: {self.endpoint}, bucket={self.bucket}")
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""连接MinIO服务"""
|
||||
"""Handle connect for the Min I O Client instance."""
|
||||
try:
|
||||
self.client = Minio(
|
||||
self.endpoint,
|
||||
@@ -60,7 +53,7 @@ class MinIOClient:
|
||||
return False
|
||||
|
||||
def ensure_bucket(self) -> bool:
|
||||
"""确保存储桶存在"""
|
||||
"""Handle ensure bucket for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
logger.warning("未连接MinIO,请先调用connect()")
|
||||
return False
|
||||
@@ -82,17 +75,7 @@ class MinIOClient:
|
||||
object_name: str,
|
||||
metadata: Dict[str, Any] = None
|
||||
) -> bool:
|
||||
"""
|
||||
上传本地文件到MinIO
|
||||
|
||||
Args:
|
||||
file_path: 本地文件路径
|
||||
object_name: MinIO对象名称
|
||||
metadata: 元数据
|
||||
|
||||
Returns:
|
||||
bool: 是否成功
|
||||
"""
|
||||
"""Handle upload file for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
self.ensure_bucket()
|
||||
@@ -125,18 +108,7 @@ class MinIOClient:
|
||||
content_type: str = "application/octet-stream",
|
||||
metadata: Dict[str, Any] = None
|
||||
) -> bool:
|
||||
"""
|
||||
上传字节数据到MinIO
|
||||
|
||||
Args:
|
||||
data: 文件字节数据
|
||||
object_name: MinIO对象名称
|
||||
content_type: 内容类型
|
||||
metadata: 元数据(注意:MinIO仅支持US-ASCII字符)
|
||||
|
||||
Returns:
|
||||
bool: 是否成功
|
||||
"""
|
||||
"""Handle upload bytes for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
self.ensure_bucket()
|
||||
@@ -144,18 +116,18 @@ class MinIOClient:
|
||||
try:
|
||||
data_stream = BytesIO(data)
|
||||
|
||||
# 处理metadata:仅保留ASCII安全字符
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
safe_metadata = None
|
||||
if metadata:
|
||||
safe_metadata = {}
|
||||
for key, value in metadata.items():
|
||||
if isinstance(value, str):
|
||||
# 只保留ASCII字符或转换为安全格式
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
try:
|
||||
value.encode('ascii')
|
||||
safe_metadata[key] = value
|
||||
except UnicodeEncodeError:
|
||||
# 中文字符跳过或用占位符
|
||||
# Keep service responsibilities explicit so downstream behavior stays predictable.
|
||||
safe_metadata[key] = ""
|
||||
else:
|
||||
safe_metadata[key] = str(value)
|
||||
@@ -181,16 +153,7 @@ class MinIOClient:
|
||||
object_name: str,
|
||||
file_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
从MinIO下载文件到本地
|
||||
|
||||
Args:
|
||||
object_name: MinIO对象名称
|
||||
file_path: 本地保存路径
|
||||
|
||||
Returns:
|
||||
bool: 是否成功
|
||||
"""
|
||||
"""Handle download file for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -212,16 +175,7 @@ class MinIOClient:
|
||||
object_name: str,
|
||||
expires: int = 3600
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
获取对象下载URL(临时URL)
|
||||
|
||||
Args:
|
||||
object_name: MinIO对象名称
|
||||
expires: URL有效期(秒)
|
||||
|
||||
Returns:
|
||||
str: 下载URL
|
||||
"""
|
||||
"""Return object url for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -238,15 +192,7 @@ class MinIOClient:
|
||||
return None
|
||||
|
||||
def get_object_data(self, object_name: str) -> Optional[bytes]:
|
||||
"""
|
||||
获取对象数据(字节)
|
||||
|
||||
Args:
|
||||
object_name: MinIO对象名称
|
||||
|
||||
Returns:
|
||||
bytes: 文件数据
|
||||
"""
|
||||
"""Return object data for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -262,15 +208,7 @@ class MinIOClient:
|
||||
return None
|
||||
|
||||
def delete_object(self, object_name: str) -> bool:
|
||||
"""
|
||||
删除对象
|
||||
|
||||
Args:
|
||||
object_name: MinIO对象名称
|
||||
|
||||
Returns:
|
||||
bool: 是否成功
|
||||
"""
|
||||
"""Delete object for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -284,15 +222,7 @@ class MinIOClient:
|
||||
return False
|
||||
|
||||
def list_objects(self, prefix: str = "") -> list:
|
||||
"""
|
||||
列出存储桶中的对象
|
||||
|
||||
Args:
|
||||
prefix: 对象名称前缀
|
||||
|
||||
Returns:
|
||||
list: 对象列表
|
||||
"""
|
||||
"""List objects for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -305,15 +235,7 @@ class MinIOClient:
|
||||
return []
|
||||
|
||||
def object_exists(self, object_name: str) -> bool:
|
||||
"""
|
||||
检查对象是否存在
|
||||
|
||||
Args:
|
||||
object_name: MinIO对象名称
|
||||
|
||||
Returns:
|
||||
bool: 是否存在
|
||||
"""
|
||||
"""Handle object exists for the Min I O Client instance."""
|
||||
if not self.connected:
|
||||
self.connect()
|
||||
|
||||
@@ -325,7 +247,7 @@ class MinIOClient:
|
||||
return False
|
||||
|
||||
def _get_content_type(self, file_path: str) -> str:
|
||||
"""根据文件扩展名获取Content-Type"""
|
||||
"""Handle get content type for this module for the Min I O Client instance."""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
content_types = {
|
||||
'.pdf': 'application/pdf',
|
||||
@@ -338,13 +260,13 @@ class MinIOClient:
|
||||
return content_types.get(ext, 'application/octet-stream')
|
||||
|
||||
def close(self):
|
||||
"""关闭连接(MinIO客户端无需显式关闭)"""
|
||||
"""Release the resources held by this component."""
|
||||
self.connected = False
|
||||
logger.info("MinIO客户端已关闭")
|
||||
|
||||
|
||||
def create_minio_client() -> MinIOClient:
|
||||
"""便捷函数:创建MinIO客户端"""
|
||||
"""Create minio client."""
|
||||
client = MinIOClient()
|
||||
client.connect()
|
||||
client.ensure_bucket()
|
||||
|
||||
Reference in New Issue
Block a user