2026-05-18 16:32:42 +08:00
|
|
|
|
"""Define API routes for documents."""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
from io import BytesIO
|
|
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
|
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
|
|
|
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
|
|
from loguru import logger
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
|
from app.api.models import DocumentUploadResponse
|
|
|
|
|
|
from app.application.documents import DocumentProcessResult
|
|
|
|
|
|
from app.shared.bootstrap import get_document_command_service, get_document_query_service
|
|
|
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
|
router = APIRouter(prefix="/documents", tags=["documents"])
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
|
def _document_response(result: DocumentProcessResult) -> DocumentUploadResponse:
|
|
|
|
|
|
"""Handle document response for this module."""
|
|
|
|
|
|
return DocumentUploadResponse(
|
|
|
|
|
|
doc_id=result.doc_id,
|
|
|
|
|
|
doc_name=result.doc_name,
|
|
|
|
|
|
status=result.status,
|
|
|
|
|
|
message=result.message,
|
|
|
|
|
|
num_chunks=result.num_chunks,
|
|
|
|
|
|
summary=result.summary,
|
|
|
|
|
|
summary_latency_ms=result.summary_latency_ms,
|
|
|
|
|
|
)
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/upload", response_model=DocumentUploadResponse)
|
|
|
|
|
|
async def upload_document(
|
|
|
|
|
|
file: UploadFile = File(..., description="上传的文档文件"),
|
2026-06-04 15:43:44 +08:00
|
|
|
|
doc_id: str | None = Form(None, description="客户端预分配的文档ID,不传则自动生成"),
|
2026-05-18 16:32:42 +08:00
|
|
|
|
doc_name: str | None = Form(None, description="文档名称"),
|
|
|
|
|
|
regulation_type: str | None = Form(None, description="法规类型"),
|
|
|
|
|
|
version: str | None = Form(None, description="文档版本"),
|
|
|
|
|
|
generate_summary: bool = Form(False, description="是否生成摘要"),
|
2026-05-14 15:07:34 +08:00
|
|
|
|
):
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"""Handle upload document."""
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
|
if not file.filename:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="文件名不能为空")
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="上传文件为空")
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
try:
|
2026-05-18 16:32:42 +08:00
|
|
|
|
result = get_document_command_service().upload_and_process(
|
2026-06-04 15:43:44 +08:00
|
|
|
|
doc_id=doc_id,
|
2026-05-18 16:32:42 +08:00
|
|
|
|
file_name=file.filename,
|
|
|
|
|
|
content=content,
|
|
|
|
|
|
content_type=file.content_type or "application/octet-stream",
|
|
|
|
|
|
doc_name=doc_name,
|
2026-05-14 15:07:34 +08:00
|
|
|
|
regulation_type=regulation_type or "",
|
2026-05-18 16:32:42 +08:00
|
|
|
|
version=version or "",
|
|
|
|
|
|
generate_summary=generate_summary,
|
2026-05-14 15:07:34 +08:00
|
|
|
|
)
|
2026-05-18 16:32:42 +08:00
|
|
|
|
if result.status == "failed":
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=result.message)
|
|
|
|
|
|
return _document_response(result)
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.exception("文档上传失败")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(exc))
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/status/{doc_id}", response_model=DocumentUploadResponse)
|
|
|
|
|
|
async def get_document_status(doc_id: str):
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"""Return document status."""
|
|
|
|
|
|
document = get_document_query_service().get(doc_id)
|
|
|
|
|
|
if not document:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="文档不存在")
|
2026-05-14 15:07:34 +08:00
|
|
|
|
return DocumentUploadResponse(
|
2026-05-18 16:32:42 +08:00
|
|
|
|
doc_id=document.doc_id,
|
|
|
|
|
|
doc_name=document.doc_name,
|
|
|
|
|
|
status=document.status.value,
|
|
|
|
|
|
message=document.error_message or "查询成功",
|
|
|
|
|
|
num_chunks=document.chunk_count,
|
|
|
|
|
|
summary=document.summary,
|
|
|
|
|
|
summary_latency_ms=document.summary_latency_ms,
|
2026-05-20 23:34:08 +08:00
|
|
|
|
regulation_type=document.regulation_type,
|
|
|
|
|
|
version=document.version,
|
2026-05-14 15:07:34 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/download/{doc_id}")
|
|
|
|
|
|
async def download_document(doc_id: str):
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"""Handle download document."""
|
2026-05-14 15:07:34 +08:00
|
|
|
|
try:
|
2026-05-18 16:32:42 +08:00
|
|
|
|
document, file_data = get_document_query_service().download(doc_id)
|
|
|
|
|
|
encoded_name = quote(document.file_name)
|
2026-05-14 15:07:34 +08:00
|
|
|
|
return StreamingResponse(
|
|
|
|
|
|
BytesIO(file_data),
|
2026-05-18 16:32:42 +08:00
|
|
|
|
media_type=document.content_type or "application/octet-stream",
|
|
|
|
|
|
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
|
2026-05-14 15:07:34 +08:00
|
|
|
|
)
|
2026-05-18 16:32:42 +08:00
|
|
|
|
except FileNotFoundError as exc:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail=str(exc))
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.exception("文档下载失败")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(exc))
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/list")
|
|
|
|
|
|
async def list_documents():
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"""List documents."""
|
|
|
|
|
|
documents = get_document_query_service().list_documents()
|
|
|
|
|
|
return {
|
|
|
|
|
|
"documents": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"doc_id": item.doc_id,
|
|
|
|
|
|
"doc_name": item.doc_name,
|
|
|
|
|
|
"status": item.status.value,
|
|
|
|
|
|
"chunk_count": item.chunk_count,
|
|
|
|
|
|
"updated_at": item.updated_at.isoformat(),
|
|
|
|
|
|
}
|
|
|
|
|
|
for item in documents
|
|
|
|
|
|
],
|
|
|
|
|
|
"total": len(documents),
|
|
|
|
|
|
}
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/management-list")
|
|
|
|
|
|
async def get_document_management_list():
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"""Return document management list."""
|
2026-05-20 23:34:08 +08:00
|
|
|
|
documents = get_document_query_service().list_documents()
|
2026-05-18 16:32:42 +08:00
|
|
|
|
return {
|
|
|
|
|
|
"documents": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"doc_id": item.doc_id,
|
|
|
|
|
|
"doc_name": item.doc_name,
|
|
|
|
|
|
"status": item.status.value,
|
|
|
|
|
|
"chunk_count": item.chunk_count,
|
2026-05-20 23:34:08 +08:00
|
|
|
|
"size_bytes": item.size_bytes,
|
|
|
|
|
|
"summary": item.summary,
|
2026-05-18 16:32:42 +08:00
|
|
|
|
"updated_at": item.updated_at.isoformat(),
|
2026-05-20 23:34:08 +08:00
|
|
|
|
"regulation_type": item.regulation_type,
|
|
|
|
|
|
"version": item.version,
|
2026-05-18 16:32:42 +08:00
|
|
|
|
}
|
|
|
|
|
|
for item in documents
|
|
|
|
|
|
],
|
|
|
|
|
|
"total": len(documents),
|
|
|
|
|
|
}
|
2026-05-20 23:34:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete("/{doc_id}")
|
|
|
|
|
|
async def delete_document(doc_id: str):
|
|
|
|
|
|
"""Delete a document and its associated data."""
|
|
|
|
|
|
deleted = get_document_command_service().delete(doc_id)
|
|
|
|
|
|
if not deleted:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="文档不存在")
|
|
|
|
|
|
return {"doc_id": doc_id, "deleted": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/{doc_id}/retry", response_model=DocumentUploadResponse)
|
|
|
|
|
|
async def retry_document(doc_id: str):
|
|
|
|
|
|
"""Re-process a failed document."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = get_document_command_service().retry(doc_id)
|
|
|
|
|
|
if result.status == "failed":
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=result.message)
|
|
|
|
|
|
return _document_response(result)
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.exception("文档重试失败")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(exc))
|