82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
# src/api/routes/knowledge.py
|
||
"""知识库检索接口"""
|
||
|
||
from fastapi import APIRouter, HTTPException
|
||
from loguru import logger
|
||
|
||
from ..models import SearchRequest, SearchResponse, SearchResultItem, ErrorResponse
|
||
from app.services.document_processor import DocumentProcessor
|
||
|
||
router = APIRouter(prefix="/knowledge", tags=["knowledge"])
|
||
|
||
|
||
@router.post("/search", response_model=SearchResponse)
|
||
async def search_knowledge(request: SearchRequest):
|
||
"""
|
||
检索法规知识库
|
||
|
||
使用混合检索:Dense向量 + Sparse向量 + RRF融合
|
||
|
||
Args:
|
||
request: 检索请求参数
|
||
"""
|
||
if not request.query or len(request.query.strip()) == 0:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="查询文本不能为空"
|
||
)
|
||
|
||
logger.info(f"收到检索请求: {request.query}")
|
||
|
||
try:
|
||
# 执行检索
|
||
processor = DocumentProcessor()
|
||
results = processor.search(
|
||
query=request.query,
|
||
top_k=request.top_k,
|
||
filters=request.filters
|
||
)
|
||
processor.close()
|
||
|
||
# 转换结果格式
|
||
result_items = []
|
||
for r in results:
|
||
item = SearchResultItem(
|
||
id=r.get("id", 0),
|
||
content=r.get("content", ""),
|
||
score=r.get("score", 0.0),
|
||
metadata=r.get("metadata", {})
|
||
)
|
||
result_items.append(item)
|
||
|
||
return SearchResponse(
|
||
query=request.query,
|
||
total=len(result_items),
|
||
results=result_items
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"检索失败: {e}")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"检索失败: {str(e)}"
|
||
)
|
||
|
||
|
||
@router.post("/retrieval", response_model=SearchResponse)
|
||
async def knowledge_retrieval(request: SearchRequest):
|
||
"""
|
||
知识检索接口(与架构文档对齐)
|
||
|
||
该接口实现完整的检索流程:
|
||
1. 意图识别
|
||
2. BM25关键词检索 + 向量语义检索(双路召回)
|
||
3. Cross-Encoder精排
|
||
4. 返回结果
|
||
|
||
Args:
|
||
request: 检索请求
|
||
"""
|
||
# 当前版本使用混合检索,后续可添加精排步骤
|
||
return await search_knowledge(request)
|