Refactor code structure for improved readability and maintainability

This commit is contained in:
ash66
2026-05-14 18:09:15 +08:00
parent 10d04c4083
commit 35cd927d02
105 changed files with 9043 additions and 7720 deletions

View File

@@ -1,6 +1,6 @@
# AI+合规智能中枢后端
`backend` 已承接原 `src` 的完整 FastAPI 后端能力,当前正式入口为 `app.main:app`
`backend` 是当前正式使用的 FastAPI 后端目录,入口为 `app.main:app`
## 启动
@@ -9,10 +9,10 @@ pip install -r backend/requirements.txt
PYTHONPATH=backend uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
```
也可以直接使用根目录脚本:
也可以直接使用根目录统一脚本:
```bash
./start_api.sh
./dev.sh start api --foreground
```
## 主要接口
@@ -45,6 +45,5 @@ backend/
## 说明
- `backend/app/api/main.py` 来自原 `src/api/main.py`,已切换为 `app.*` 导入。
- 路由前缀保持为 `/api/v1`,以兼容当前前端。
-`backend/app/api/routes/docs.py``rag.py``compliance.py``status.py` 仍保留在仓库中,但不再作为主路由入口。

View File

@@ -1,2 +1 @@
# src/api/__init__.py
"""API接口模块"""
"""API接口模块"""

View File

@@ -1,4 +1,3 @@
# src/api/models/__init__.py
"""API数据模型"""
from .document import (
@@ -19,4 +18,4 @@ __all__ = [
"SearchResponse",
"DocumentStatusResponse",
"ErrorResponse"
]
]

View File

@@ -1,4 +1,3 @@
# src/api/models/document.py
"""文档相关Pydantic数据模型"""
from pydantic import BaseModel, Field
@@ -60,4 +59,4 @@ class ErrorResponse(BaseModel):
"""错误响应"""
error: str = Field(..., description="错误类型")
message: str = Field(..., description="错误消息")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")

View File

@@ -1,4 +1,3 @@
# src/api/routes/__init__.py
"""API路由模块"""
from fastapi import APIRouter
@@ -14,4 +13,4 @@ api_router.include_router(documents_router)
api_router.include_router(knowledge_router)
api_router.include_router(agent_router)
__all__ = ["api_router", "documents_router", "knowledge_router", "agent_router"]
__all__ = ["api_router", "documents_router", "knowledge_router", "agent_router"]

View File

@@ -1,4 +1,3 @@
# src/api/routes/agent.py
"""Agent API接口 - 问答对话接口"""
from fastapi import APIRouter, HTTPException, Depends

View File

@@ -1,4 +1,3 @@
# src/api/routes/documents.py
"""文档上传与处理接口"""
from fastapi import APIRouter, UploadFile, File, Form, HTTPException

View File

@@ -1,4 +1,3 @@
# src/api/routes/knowledge.py
"""知识库检索接口"""
from fastapi import APIRouter, HTTPException

View File

@@ -1,4 +1,3 @@
# src/config/__init__.py
"""配置模块"""
from .settings import Settings, get_settings, settings

View File

@@ -1,4 +1,3 @@
# src/config/logging.py
"""日志配置"""
from loguru import logger
@@ -29,4 +28,4 @@ def setup_logging(level: str = "INFO"):
compression="zip"
)
return logger
return logger

View File

@@ -1,4 +1,3 @@
# src/config/settings.py
"""配置管理 - 环境变量和默认配置"""
from pydantic_settings import BaseSettings

View File

@@ -1,7 +1,6 @@
# src/services/agent/__init__.py
"""Agent服务模块"""
from .qa_agent import QAAgent, ask_compliance_question
from .session_manager import SessionManager, ChatSession
__all__ = ["QAAgent", "ask_compliance_question", "SessionManager", "ChatSession"]
__all__ = ["QAAgent", "ask_compliance_question", "SessionManager", "ChatSession"]

View File

@@ -1,4 +1,3 @@
# src/services/agent/qa_agent.py
"""RAG问答Agent - 合规智能问答核心实现"""
import time

View File

@@ -1,4 +1,3 @@
# src/services/agent/session_manager.py
"""多轮对话会话管理"""
import time
@@ -244,4 +243,4 @@ class SessionManager:
def clear_all_sessions(self):
"""清空所有会话"""
self._sessions.clear()
logger.info("所有会话已清空")
logger.info("所有会话已清空")

View File

@@ -1,4 +1,3 @@
# src/services/document_processor.py
"""文档处理主流程 - 解析→摘要→分块→嵌入→入库"""
import os

View File

@@ -1,7 +1,6 @@
# src/services/embedding/__init__.py
"""嵌入和分块服务"""
from .text_chunker import RegulationChunker
from .bge_m3_embedder import BGEM3Embedder
__all__ = ["RegulationChunker", "BGEM3Embedder"]
__all__ = ["RegulationChunker", "BGEM3Embedder"]

View File

@@ -1,4 +1,3 @@
# src/services/embedding/bge_m3_embedder.py
"""BGE-M3嵌入服务 - Dense+Sparse双路向量生成"""
import numpy as np
@@ -293,4 +292,4 @@ def embed_single_text(
) -> Dict:
"""便捷函数:对单个文本生成嵌入"""
embedder = BGEM3Embedder(model_name=model_name, **kwargs)
return embedder.embed_single(text)
return embedder.embed_single(text)

View File

@@ -1,4 +1,3 @@
# src/services/embedding/text_chunker.py
"""智能分块器 - 章节级+条款级双粒度切割"""
import re
@@ -446,4 +445,4 @@ def chunk_regulation_document(
doc_name,
regulation_type,
version
)
)

View File

@@ -1,4 +1,3 @@
# src/services/llm/__init__.py
"""LLM服务模块"""
from .llm_factory import LLMFactory, get_llm_client
@@ -12,4 +11,4 @@ __all__ = [
"BaseLLMClient", "LLMResponse", "LLMConfig", "LLMProvider",
"DeepSeekClient", "QwenClient", "QwenVLClient",
"DocumentSummarizer", "summarize_document", "DocumentSummary"
]
]

View File

@@ -1,4 +1,3 @@
# src/services/llm/base_client.py
"""LLM客户端基类 - 统一接口定义"""
from abc import ABC, abstractmethod
@@ -113,4 +112,4 @@ class BaseLLMClient(ABC):
# 中文字符约1.5 token英文约0.25 token
chinese_chars = sum(1 for c in text if '' <= c <= '鿿')
other_chars = len(text) - chinese_chars
return int(chinese_chars * 1.5 + other_chars * 0.25)
return int(chinese_chars * 1.5 + other_chars * 0.25)

View File

@@ -1,4 +1,3 @@
# src/services/llm/deepseek_client.py
"""DeepSeek LLM客户端 - OpenAI兼容API"""
import time

View File

@@ -1,4 +1,3 @@
# src/services/llm/document_summarizer.py
"""文档摘要生成服务 - LLM生成法规文档摘要"""
from typing import Dict, Optional

View File

@@ -1,4 +1,3 @@
# src/services/llm/llm_factory.py
"""LLM工厂 - 统一创建和管理LLM客户端"""
from typing import Optional, Dict, Any

View File

@@ -1,4 +1,3 @@
# src/services/llm/qwen_client.py
"""Qwen LLM客户端 - 支持OpenAI兼容API格式"""
import time

View File

@@ -1,7 +1,6 @@
# src/services/parser/__init__.py
"""文档解析服务"""
from .pdf_parser import PDFParser
from .docx_parser import DocxParser
__all__ = ["PDFParser", "DocxParser"]
__all__ = ["PDFParser", "DocxParser"]

View File

@@ -1,4 +1,3 @@
# src/services/parser/docx_parser.py
"""Word文档解析 - 使用python-docx"""
from docx import Document
@@ -284,4 +283,4 @@ def parse_docx(file_path: str) -> DocxDocumentContent:
def parse_docx_to_markdown(file_path: str) -> str:
"""便捷函数解析Word并返回Markdown"""
parser = DocxParser()
return parser.parse_to_markdown(file_path)
return parser.parse_to_markdown(file_path)

View File

@@ -1,4 +1,3 @@
# src/services/parser/mineru_parser.py
"""MinerU多模态PDF解析 - 版面感知解析"""
from typing import Optional, Dict
@@ -201,4 +200,4 @@ def parse_with_mineru(file_path: str) -> MinerUResult:
def parse_pdf_smart(file_path: str) -> str:
"""便捷函数智能解析PDF自动选择最佳解析器"""
orchestrator = ParserOrchestrator()
return orchestrator.parse_pdf(file_path)
return orchestrator.parse_pdf(file_path)

View File

@@ -1,4 +1,3 @@
# src/services/parser/pdf_parser.py
"""PDF文档解析 - 使用PyMuPDF基础解析"""
import fitz # PyMuPDF
@@ -265,4 +264,4 @@ def parse_pdf(file_path: str, **kwargs) -> PDFDocumentContent:
def parse_pdf_to_markdown(file_path: str) -> str:
"""便捷函数解析PDF并返回Markdown"""
parser = PDFParser()
return parser.parse_to_markdown(file_path)
return parser.parse_to_markdown(file_path)

View File

@@ -1,4 +1,3 @@
# src/services/rag/__init__.py
"""RAG服务模块"""
from .retriever import Retriever, retrieve_regulations
@@ -9,4 +8,4 @@ __all__ = [
"Retriever", "retrieve_regulations",
"ContextBuilder", "build_rag_context",
"PromptTemplates", "get_prompt_template"
]
]

View File

@@ -1,4 +1,3 @@
# src/services/rag/context_builder.py
"""RAG上下文构建服务 - 构建LLM输入上下文"""
from typing import List, Dict, Optional

View File

@@ -1,4 +1,3 @@
# src/services/rag/prompt_templates.py
"""RAG Prompt模板 - 合规问答专用Prompt"""
from typing import Dict, Optional
@@ -293,4 +292,4 @@ def get_prompt_template(name: str) -> PromptTemplate:
template = PromptTemplates.get_template(name)
if not template:
raise ValueError(f"不存在的模板: {name}")
return template
return template

View File

@@ -1,4 +1,3 @@
# src/services/rag/retriever.py
"""RAG检索服务 - 封装Milvus检索"""
from typing import List, Dict, Optional, Any

View File

@@ -1,7 +1,6 @@
# src/services/storage/__init__.py
"""存储服务"""
from .milvus_client import MilvusClient
from .minio_client import MinIOClient
__all__ = ["MilvusClient", "MinIOClient"]
__all__ = ["MilvusClient", "MinIOClient"]

View File

@@ -1,4 +1,3 @@
# src/services/storage/milvus_client.py
"""Milvus向量数据库客户端 - 存储与检索服务"""
from pymilvus import (

View File

@@ -1,4 +1,3 @@
# src/services/storage/minio_client.py
"""MinIO对象存储客户端 - 文档文件存储"""
from minio import Minio

View File

@@ -1,2 +1 @@
# src/workers/__init__.py
"""异步任务Worker模块"""
"""异步任务Worker模块"""

View File

@@ -1,35 +0,0 @@
[project]
name = "ai-regulations-backend"
version = "0.1.0"
description = "Migrated FastAPI backend for AI regulations demo"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"fastapi>=0.110.0",
"uvicorn[standard]>=0.27.0",
"python-multipart>=0.0.9",
"pydantic>=2.0.0",
"pydantic-settings>=2.0.0",
"python-dotenv>=1.0.0",
"loguru>=0.7.0",
"httpx>=0.25.0",
"tiktoken>=0.5.0",
"tenacity>=8.2.0",
"pymilvus>=2.4.0",
"minio>=7.1.0",
"pymupdf>=1.24.0",
"python-docx>=1.1.0",
"FlagEmbedding>=1.2.0",
"sentence-transformers>=2.2.0",
"torch>=2.0.0",
"numpy>=1.24.0",
"langchain>=0.1.0",
"langchain-milvus>=0.1.0",
]
[project.scripts]
backend = "main:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"