初始化

2026-05-11 11:22:55 +08:00
parent 5f6c571434
commit 80dcd070f7
39 changed files with 1997 additions and 0 deletions
--- a/app/api/routes/docs.py
+++ b/app/api/routes/docs.py
@@ -0,0 +1,115 @@
+from fastapi import APIRouter, UploadFile, File, HTTPException
+import os
+import uuid
+from datetime import datetime
+from app.schemas.doc import (
+    DocumentUploadResponse,
+    DocumentListResponse,
+    DocumentInfo,
+    ParseResponse,
+    EmbedResponse,
+)
+from app.services.mock_data import get_mock_documents, generate_doc_id
+
+router = APIRouter(prefix="/docs", tags=["文档管理"])
+
+# 临时存储文档信息（包含预设的mock文档）
+documents_store: dict[str, dict] = {}
+
+# 初始化时加载mock文档
+for doc in get_mock_documents():
+    documents_store[doc["id"]] = doc
+
+
+@router.post("/upload", response_model=DocumentUploadResponse)
+async def upload_document(file: UploadFile = File(...)):
+    """上传法规文档"""
+    # 检查文件格式
+    allowed_ext = [".pdf", ".docx", ".doc", ".txt"]
+    ext = os.path.splitext(file.filename)[1].lower()
+    if ext not in allowed_ext:
+        raise HTTPException(400, f"Unsupported file format: {ext}")
+
+    # 生成文档ID
+    doc_id = generate_doc_id()
+
+    # 保存文件
+    raw_dir = "/airegulation/demo-mao/backend/data/raw"
+    os.makedirs(raw_dir, exist_ok=True)
+    file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}")
+
+    content = await file.read()
+    with open(file_path, "wb") as f:
+        f.write(content)
+
+    # 记录文档信息
+    documents_store[doc_id] = {
+        "id": doc_id,
+        "name": file.filename,
+        "path": file_path,
+        "size": len(content),
+        "status": "uploaded",
+        "chunks": 0,
+        "created_at": datetime.now(),
+    }
+
+    return DocumentUploadResponse(
+        doc_id=doc_id,
+        filename=file.filename,
+        size=len(content),
+    )
+
+
+@router.get("/list", response_model=DocumentListResponse)
+async def list_documents():
+    """获取已索引文档列表"""
+    docs = [
+        DocumentInfo(
+            id=d["id"],
+            name=d["name"],
+            chunks=d["chunks"],
+            status=d["status"],
+            created_at=d.get("created_at"),
+        )
+        for d in documents_store.values()
+    ]
+    return DocumentListResponse(docs=docs)
+
+
+@router.post("/parse/{doc_id}", response_model=ParseResponse)
+async def parse_document(doc_id: str):
+    """解析文档并分块"""
+    if doc_id not in documents_store:
+        raise HTTPException(404, "Document not found")
+
+    doc = documents_store[doc_id]
+    # 模拟解析逻辑
+    doc["status"] = "parsed"
+    # 根据文件大小计算chunks数量
+    file_size = doc.get("size", 100000)
+    doc["chunks"] = max(20, file_size // 8000)
+
+    return ParseResponse(doc_id=doc_id, chunks=doc["chunks"])
+
+
+@router.post("/embed/{doc_id}", response_model=EmbedResponse)
+async def embed_document(doc_id: str):
+    """嵌入并存入向量库"""
+    if doc_id not in documents_store:
+        raise HTTPException(404, "Document not found")
+
+    doc = documents_store[doc_id]
+    # 模拟嵌入逻辑
+    doc["status"] = "indexed"
+
+    return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"])
+
+
+@router.delete("/delete/{doc_id}")
+async def delete_document(doc_id: str):
+    """删除文档"""
+    if doc_id not in documents_store:
+        raise HTTPException(404, "Document not found")
+
+    del documents_store[doc_id]
+    return {"success": True}