81 lines
2.2 KiB
Python
81 lines
2.2 KiB
Python
|
|
"""知识库检索接口"""
|
|||
|
|
|
|||
|
|
from fastapi import APIRouter, HTTPException
|
|||
|
|
from loguru import logger
|
|||
|
|
|
|||
|
|
from ..models import SearchRequest, SearchResponse, SearchResultItem, ErrorResponse
|
|||
|
|
from app.services.document_processor import DocumentProcessor
|
|||
|
|
|
|||
|
|
router = APIRouter(prefix="/knowledge", tags=["knowledge"])
|
|||
|
|
|
|||
|
|
|
|||
|
|
@router.post("/search", response_model=SearchResponse)
|
|||
|
|
async def search_knowledge(request: SearchRequest):
|
|||
|
|
"""
|
|||
|
|
检索法规知识库
|
|||
|
|
|
|||
|
|
使用混合检索:Dense向量 + Sparse向量 + RRF融合
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
request: 检索请求参数
|
|||
|
|
"""
|
|||
|
|
if not request.query or len(request.query.strip()) == 0:
|
|||
|
|
raise HTTPException(
|
|||
|
|
status_code=400,
|
|||
|
|
detail="查询文本不能为空"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
logger.info(f"收到检索请求: {request.query}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 执行检索
|
|||
|
|
processor = DocumentProcessor()
|
|||
|
|
results = processor.search(
|
|||
|
|
query=request.query,
|
|||
|
|
top_k=request.top_k,
|
|||
|
|
filters=request.filters
|
|||
|
|
)
|
|||
|
|
processor.close()
|
|||
|
|
|
|||
|
|
# 转换结果格式
|
|||
|
|
result_items = []
|
|||
|
|
for r in results:
|
|||
|
|
item = SearchResultItem(
|
|||
|
|
id=r.get("id", 0),
|
|||
|
|
content=r.get("content", ""),
|
|||
|
|
score=r.get("score", 0.0),
|
|||
|
|
metadata=r.get("metadata", {})
|
|||
|
|
)
|
|||
|
|
result_items.append(item)
|
|||
|
|
|
|||
|
|
return SearchResponse(
|
|||
|
|
query=request.query,
|
|||
|
|
total=len(result_items),
|
|||
|
|
results=result_items
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"检索失败: {e}")
|
|||
|
|
raise HTTPException(
|
|||
|
|
status_code=500,
|
|||
|
|
detail=f"检索失败: {str(e)}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@router.post("/retrieval", response_model=SearchResponse)
|
|||
|
|
async def knowledge_retrieval(request: SearchRequest):
|
|||
|
|
"""
|
|||
|
|
知识检索接口(与架构文档对齐)
|
|||
|
|
|
|||
|
|
该接口实现完整的检索流程:
|
|||
|
|
1. 意图识别
|
|||
|
|
2. BM25关键词检索 + 向量语义检索(双路召回)
|
|||
|
|
3. Cross-Encoder精排
|
|||
|
|
4. 返回结果
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
request: 检索请求
|
|||
|
|
"""
|
|||
|
|
# 当前版本使用混合检索,后续可添加精排步骤
|
|||
|
|
return await search_knowledge(request)
|