"""Define API routes for docs.""" from fastapi import APIRouter, UploadFile, File, HTTPException import os import uuid from datetime import datetime from app.schemas.doc import ( DocumentUploadResponse, DocumentListResponse, DocumentInfo, ParseResponse, EmbedResponse, ) from app.services.mock_data import get_mock_documents, generate_doc_id # Keep route handlers close to their transport-layer wiring for easier auditing. router = APIRouter(prefix="/docs", tags=["文档管理"]) # Keep route handlers close to their transport-layer wiring for easier auditing. documents_store: dict[str, dict] = {} # Keep route handlers close to their transport-layer wiring for easier auditing. for doc in get_mock_documents(): documents_store[doc["id"]] = doc @router.post("/upload", response_model=DocumentUploadResponse) async def upload_document(file: UploadFile = File(...)): """Handle upload document.""" # Keep route handlers close to their transport-layer wiring for easier auditing. allowed_ext = [".pdf", ".docx", ".doc", ".txt"] ext = os.path.splitext(file.filename)[1].lower() if ext not in allowed_ext: raise HTTPException(400, f"Unsupported file format: {ext}") # Keep route handlers close to their transport-layer wiring for easier auditing. doc_id = generate_doc_id() # Keep route handlers close to their transport-layer wiring for easier auditing. raw_dir = "/airegulation/demo-mao/backend/data/raw" os.makedirs(raw_dir, exist_ok=True) file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}") content = await file.read() with open(file_path, "wb") as f: f.write(content) # Keep route handlers close to their transport-layer wiring for easier auditing. documents_store[doc_id] = { "id": doc_id, "name": file.filename, "path": file_path, "size": len(content), "status": "uploaded", "chunks": 0, "created_at": datetime.now(), } return DocumentUploadResponse( doc_id=doc_id, filename=file.filename, size=len(content), ) @router.get("/list", response_model=DocumentListResponse) async def list_documents(): """List documents.""" docs = [ DocumentInfo( id=d["id"], name=d["name"], chunks=d["chunks"], status=d["status"], created_at=d.get("created_at"), ) for d in documents_store.values() ] return DocumentListResponse(docs=docs) @router.post("/parse/{doc_id}", response_model=ParseResponse) async def parse_document(doc_id: str): """Parse document.""" if doc_id not in documents_store: raise HTTPException(404, "Document not found") doc = documents_store[doc_id] # Keep route handlers close to their transport-layer wiring for easier auditing. doc["status"] = "parsed" # Keep route handlers close to their transport-layer wiring for easier auditing. file_size = doc.get("size", 100000) doc["chunks"] = max(20, file_size // 8000) return ParseResponse(doc_id=doc_id, chunks=doc["chunks"]) @router.post("/embed/{doc_id}", response_model=EmbedResponse) async def embed_document(doc_id: str): """Embed document.""" if doc_id not in documents_store: raise HTTPException(404, "Document not found") doc = documents_store[doc_id] # Keep route handlers close to their transport-layer wiring for easier auditing. doc["status"] = "indexed" return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"]) @router.delete("/delete/{doc_id}") async def delete_document(doc_id: str): """Delete document.""" if doc_id not in documents_store: raise HTTPException(404, "Document not found") del documents_store[doc_id] return {"success": True}