2026-05-18 16:32:42 +08:00
|
|
|
"""Define API routes for docs."""
|
|
|
|
|
|
2026-05-14 15:07:34 +08:00
|
|
|
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
|
|
|
import os
|
|
|
|
|
import uuid
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from app.schemas.doc import (
|
|
|
|
|
DocumentUploadResponse,
|
|
|
|
|
DocumentListResponse,
|
|
|
|
|
DocumentInfo,
|
|
|
|
|
ParseResponse,
|
|
|
|
|
EmbedResponse,
|
|
|
|
|
)
|
|
|
|
|
from app.services.mock_data import get_mock_documents, generate_doc_id
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
|
|
|
|
|
2026-05-14 15:07:34 +08:00
|
|
|
|
|
|
|
|
router = APIRouter(prefix="/docs", tags=["文档管理"])
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
documents_store: dict[str, dict] = {}
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
for doc in get_mock_documents():
|
|
|
|
|
documents_store[doc["id"]] = doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/upload", response_model=DocumentUploadResponse)
|
|
|
|
|
async def upload_document(file: UploadFile = File(...)):
|
2026-05-18 16:32:42 +08:00
|
|
|
"""Handle upload document."""
|
|
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
allowed_ext = [".pdf", ".docx", ".doc", ".txt"]
|
|
|
|
|
ext = os.path.splitext(file.filename)[1].lower()
|
|
|
|
|
if ext not in allowed_ext:
|
|
|
|
|
raise HTTPException(400, f"Unsupported file format: {ext}")
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
doc_id = generate_doc_id()
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
raw_dir = "/airegulation/demo-mao/backend/data/raw"
|
|
|
|
|
os.makedirs(raw_dir, exist_ok=True)
|
|
|
|
|
file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}")
|
|
|
|
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
with open(file_path, "wb") as f:
|
|
|
|
|
f.write(content)
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
documents_store[doc_id] = {
|
|
|
|
|
"id": doc_id,
|
|
|
|
|
"name": file.filename,
|
|
|
|
|
"path": file_path,
|
|
|
|
|
"size": len(content),
|
|
|
|
|
"status": "uploaded",
|
|
|
|
|
"chunks": 0,
|
|
|
|
|
"created_at": datetime.now(),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return DocumentUploadResponse(
|
|
|
|
|
doc_id=doc_id,
|
|
|
|
|
filename=file.filename,
|
|
|
|
|
size=len(content),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/list", response_model=DocumentListResponse)
|
|
|
|
|
async def list_documents():
|
2026-05-18 16:32:42 +08:00
|
|
|
"""List documents."""
|
2026-05-14 15:07:34 +08:00
|
|
|
docs = [
|
|
|
|
|
DocumentInfo(
|
|
|
|
|
id=d["id"],
|
|
|
|
|
name=d["name"],
|
|
|
|
|
chunks=d["chunks"],
|
|
|
|
|
status=d["status"],
|
|
|
|
|
created_at=d.get("created_at"),
|
|
|
|
|
)
|
|
|
|
|
for d in documents_store.values()
|
|
|
|
|
]
|
|
|
|
|
return DocumentListResponse(docs=docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/parse/{doc_id}", response_model=ParseResponse)
|
|
|
|
|
async def parse_document(doc_id: str):
|
2026-05-18 16:32:42 +08:00
|
|
|
"""Parse document."""
|
2026-05-14 15:07:34 +08:00
|
|
|
if doc_id not in documents_store:
|
|
|
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
|
|
|
|
|
|
doc = documents_store[doc_id]
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
doc["status"] = "parsed"
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
file_size = doc.get("size", 100000)
|
|
|
|
|
doc["chunks"] = max(20, file_size // 8000)
|
|
|
|
|
|
|
|
|
|
return ParseResponse(doc_id=doc_id, chunks=doc["chunks"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/embed/{doc_id}", response_model=EmbedResponse)
|
|
|
|
|
async def embed_document(doc_id: str):
|
2026-05-18 16:32:42 +08:00
|
|
|
"""Embed document."""
|
2026-05-14 15:07:34 +08:00
|
|
|
if doc_id not in documents_store:
|
|
|
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
|
|
|
|
|
|
doc = documents_store[doc_id]
|
2026-05-18 16:32:42 +08:00
|
|
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
2026-05-14 15:07:34 +08:00
|
|
|
doc["status"] = "indexed"
|
|
|
|
|
|
|
|
|
|
return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete("/delete/{doc_id}")
|
|
|
|
|
async def delete_document(doc_id: str):
|
2026-05-18 16:32:42 +08:00
|
|
|
"""Delete document."""
|
2026-05-14 15:07:34 +08:00
|
|
|
if doc_id not in documents_store:
|
|
|
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
|
|
|
|
|
|
del documents_store[doc_id]
|
|
|
|
|
return {"success": True}
|