Files
AIRegulation-DocAnalysis/backend/app/api/routes/docs.py
2026-05-14 15:07:34 +08:00

115 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import APIRouter, UploadFile, File, HTTPException
import os
import uuid
from datetime import datetime
from app.schemas.doc import (
DocumentUploadResponse,
DocumentListResponse,
DocumentInfo,
ParseResponse,
EmbedResponse,
)
from app.services.mock_data import get_mock_documents, generate_doc_id
router = APIRouter(prefix="/docs", tags=["文档管理"])
# 临时存储文档信息包含预设的mock文档
documents_store: dict[str, dict] = {}
# 初始化时加载mock文档
for doc in get_mock_documents():
documents_store[doc["id"]] = doc
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(file: UploadFile = File(...)):
"""上传法规文档"""
# 检查文件格式
allowed_ext = [".pdf", ".docx", ".doc", ".txt"]
ext = os.path.splitext(file.filename)[1].lower()
if ext not in allowed_ext:
raise HTTPException(400, f"Unsupported file format: {ext}")
# 生成文档ID
doc_id = generate_doc_id()
# 保存文件
raw_dir = "/airegulation/demo-mao/backend/data/raw"
os.makedirs(raw_dir, exist_ok=True)
file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}")
content = await file.read()
with open(file_path, "wb") as f:
f.write(content)
# 记录文档信息
documents_store[doc_id] = {
"id": doc_id,
"name": file.filename,
"path": file_path,
"size": len(content),
"status": "uploaded",
"chunks": 0,
"created_at": datetime.now(),
}
return DocumentUploadResponse(
doc_id=doc_id,
filename=file.filename,
size=len(content),
)
@router.get("/list", response_model=DocumentListResponse)
async def list_documents():
"""获取已索引文档列表"""
docs = [
DocumentInfo(
id=d["id"],
name=d["name"],
chunks=d["chunks"],
status=d["status"],
created_at=d.get("created_at"),
)
for d in documents_store.values()
]
return DocumentListResponse(docs=docs)
@router.post("/parse/{doc_id}", response_model=ParseResponse)
async def parse_document(doc_id: str):
"""解析文档并分块"""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")
doc = documents_store[doc_id]
# 模拟解析逻辑
doc["status"] = "parsed"
# 根据文件大小计算chunks数量
file_size = doc.get("size", 100000)
doc["chunks"] = max(20, file_size // 8000)
return ParseResponse(doc_id=doc_id, chunks=doc["chunks"])
@router.post("/embed/{doc_id}", response_model=EmbedResponse)
async def embed_document(doc_id: str):
"""嵌入并存入向量库"""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")
doc = documents_store[doc_id]
# 模拟嵌入逻辑
doc["status"] = "indexed"
return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"])
@router.delete("/delete/{doc_id}")
async def delete_document(doc_id: str):
"""删除文档"""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")
del documents_store[doc_id]
return {"success": True}