Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,3 +1,5 @@
"""Define API routes for docs."""
from fastapi import APIRouter, UploadFile, File, HTTPException
import os
import uuid
@@ -10,30 +12,32 @@ from app.schemas.doc import (
EmbedResponse,
)
from app.services.mock_data import get_mock_documents, generate_doc_id
# Keep route handlers close to their transport-layer wiring for easier auditing.
router = APIRouter(prefix="/docs", tags=["文档管理"])
# 临时存储文档信息包含预设的mock文档
# Keep route handlers close to their transport-layer wiring for easier auditing.
documents_store: dict[str, dict] = {}
# 初始化时加载mock文档
# Keep route handlers close to their transport-layer wiring for easier auditing.
for doc in get_mock_documents():
documents_store[doc["id"]] = doc
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(file: UploadFile = File(...)):
"""上传法规文档"""
# 检查文件格式
"""Handle upload document."""
# Keep route handlers close to their transport-layer wiring for easier auditing.
allowed_ext = [".pdf", ".docx", ".doc", ".txt"]
ext = os.path.splitext(file.filename)[1].lower()
if ext not in allowed_ext:
raise HTTPException(400, f"Unsupported file format: {ext}")
# 生成文档ID
# Keep route handlers close to their transport-layer wiring for easier auditing.
doc_id = generate_doc_id()
# 保存文件
# Keep route handlers close to their transport-layer wiring for easier auditing.
raw_dir = "/airegulation/demo-mao/backend/data/raw"
os.makedirs(raw_dir, exist_ok=True)
file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}")
@@ -42,7 +46,7 @@ async def upload_document(file: UploadFile = File(...)):
with open(file_path, "wb") as f:
f.write(content)
# 记录文档信息
# Keep route handlers close to their transport-layer wiring for easier auditing.
documents_store[doc_id] = {
"id": doc_id,
"name": file.filename,
@@ -62,7 +66,7 @@ async def upload_document(file: UploadFile = File(...)):
@router.get("/list", response_model=DocumentListResponse)
async def list_documents():
"""获取已索引文档列表"""
"""List documents."""
docs = [
DocumentInfo(
id=d["id"],
@@ -78,14 +82,14 @@ async def list_documents():
@router.post("/parse/{doc_id}", response_model=ParseResponse)
async def parse_document(doc_id: str):
"""解析文档并分块"""
"""Parse document."""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")
doc = documents_store[doc_id]
# 模拟解析逻辑
# Keep route handlers close to their transport-layer wiring for easier auditing.
doc["status"] = "parsed"
# 根据文件大小计算chunks数量
# Keep route handlers close to their transport-layer wiring for easier auditing.
file_size = doc.get("size", 100000)
doc["chunks"] = max(20, file_size // 8000)
@@ -94,12 +98,12 @@ async def parse_document(doc_id: str):
@router.post("/embed/{doc_id}", response_model=EmbedResponse)
async def embed_document(doc_id: str):
"""嵌入并存入向量库"""
"""Embed document."""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")
doc = documents_store[doc_id]
# 模拟嵌入逻辑
# Keep route handlers close to their transport-layer wiring for easier auditing.
doc["status"] = "indexed"
return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"])
@@ -107,7 +111,7 @@ async def embed_document(doc_id: str):
@router.delete("/delete/{doc_id}")
async def delete_document(doc_id: str):
"""删除文档"""
"""Delete document."""
if doc_id not in documents_store:
raise HTTPException(404, "Document not found")