初始化
This commit is contained in:
52
.env
Normal file
52
.env
Normal file
@@ -0,0 +1,52 @@
|
||||
# DashScope API
|
||||
DASHSCOPE_API_KEY=your_api_key_here
|
||||
|
||||
# PostgreSQL
|
||||
POSTGRES_HOST=localhost
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=postgresql
|
||||
POSTGRES_PASSWORD=postgresql123456
|
||||
POSTGRES_DB=mydb
|
||||
|
||||
# Redis
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=redis@123
|
||||
|
||||
# Milvus
|
||||
MILVUS_HOST=localhost
|
||||
MILVUS_PORT=19530
|
||||
|
||||
# MinIO
|
||||
MINIO_ENDPOINT=localhost:9000
|
||||
MINIO_ACCESS_KEY=minioadmin
|
||||
MINIO_SECRET_KEY=minioadmin
|
||||
|
||||
# Neo4j
|
||||
NEO4J_URI=bolt://localhost:7687
|
||||
NEO4J_USER=neo4j
|
||||
NEO4J_PASSWORD=neo4j123
|
||||
|
||||
# RabbitMQ
|
||||
RABBITMQ_HOST=localhost
|
||||
RABBITMQ_PORT=5672
|
||||
RABBITMQ_USER=admin
|
||||
RABBITMQ_PASSWORD=admin@123
|
||||
|
||||
# LLM配置
|
||||
LLM_MODEL=qwen-max
|
||||
EMBEDDING_MODEL=text-embedding-v3
|
||||
EMBEDDING_DIM=1536
|
||||
|
||||
# 检索配置
|
||||
VECTOR_TOP_K=10
|
||||
BM25_TOP_K=10
|
||||
FINAL_TOP_K=5
|
||||
|
||||
# 分块配置
|
||||
CHUNK_SIZE=800
|
||||
CHUNK_OVERLAP=50
|
||||
|
||||
# 服务配置
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=8000
|
||||
24
.env.example
Normal file
24
.env.example
Normal file
@@ -0,0 +1,24 @@
|
||||
# DashScope API
|
||||
DASHSCOPE_API_KEY=your_api_key_here
|
||||
|
||||
# Milvus
|
||||
MILVUS_HOST=localhost
|
||||
MILVUS_PORT=19530
|
||||
|
||||
# LLM配置
|
||||
LLM_MODEL=qwen-max
|
||||
EMBEDDING_MODEL=text-embedding-v3
|
||||
EMBEDDING_DIM=1536
|
||||
|
||||
# 检索配置
|
||||
VECTOR_TOP_K=10
|
||||
BM25_TOP_K=10
|
||||
FINAL_TOP_K=5
|
||||
|
||||
# 分块配置
|
||||
CHUNK_SIZE=800
|
||||
CHUNK_OVERLAP=50
|
||||
|
||||
# 服务配置
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=8000
|
||||
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Python-generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.9
|
||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装依赖
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 复制代码
|
||||
COPY app/ ./app/
|
||||
COPY data/ ./data/
|
||||
|
||||
# 环境变量
|
||||
ENV API_HOST=0.0.0.0
|
||||
ENV API_PORT=8000
|
||||
|
||||
# 启动命令
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
3
app/__init__.py
Normal file
3
app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .main import app
|
||||
|
||||
__all__ = ["app"]
|
||||
3
app/api/__init__.py
Normal file
3
app/api/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .routes import api_router
|
||||
|
||||
__all__ = ["api_router"]
|
||||
13
app/api/routes/__init__.py
Normal file
13
app/api/routes/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from fastapi import APIRouter
|
||||
from .docs import router as docs_router
|
||||
from .rag import router as rag_router
|
||||
from .compliance import router as compliance_router
|
||||
from .status import router as status_router
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(docs_router)
|
||||
api_router.include_router(rag_router)
|
||||
api_router.include_router(compliance_router)
|
||||
api_router.include_router(status_router)
|
||||
|
||||
__all__ = ["api_router"]
|
||||
96
app/api/routes/compliance.py
Normal file
96
app/api/routes/compliance.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
import uuid
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
from app.schemas.compliance import (
|
||||
AnalyzeResponse,
|
||||
ComplianceChatRequest,
|
||||
)
|
||||
from app.services.mock_data import (
|
||||
generate_task_id,
|
||||
get_mock_compliance_result,
|
||||
get_mock_compliance_chat_response,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/compliance", tags=["合规分析"])
|
||||
|
||||
# 临时存储分析任务
|
||||
tasks_store: dict[str, dict] = {}
|
||||
|
||||
|
||||
@router.post("/analyze", response_model=AnalyzeResponse)
|
||||
async def analyze_document(file: UploadFile = File(...)):
|
||||
"""上传设计方案进行分析"""
|
||||
# 生成任务ID
|
||||
task_id = generate_task_id()
|
||||
|
||||
# 保存文件
|
||||
raw_dir = "/airegulation/demo-mao/backend/data/raw"
|
||||
os.makedirs(raw_dir, exist_ok=True)
|
||||
file_path = os.path.join(raw_dir, f"compliance_{task_id}_{file.filename}")
|
||||
|
||||
content = await file.read()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
# 记录任务
|
||||
tasks_store[task_id] = {
|
||||
"task_id": task_id,
|
||||
"file_path": file_path,
|
||||
"status": "processing",
|
||||
"result": None,
|
||||
}
|
||||
|
||||
# 模拟异步处理完成(立即返回结果)
|
||||
# 实际应用中这应该是后台任务
|
||||
tasks_store[task_id]["status"] = "completed"
|
||||
tasks_store[task_id]["result"] = get_mock_compliance_result(task_id)
|
||||
|
||||
return AnalyzeResponse(task_id=task_id)
|
||||
|
||||
|
||||
@router.get("/result/{task_id}")
|
||||
async def get_result(task_id: str):
|
||||
"""获取分析结果"""
|
||||
if task_id not in tasks_store:
|
||||
# 如果任务ID不存在,返回默认mock结果
|
||||
return get_mock_compliance_result(task_id)
|
||||
|
||||
task = tasks_store[task_id]
|
||||
|
||||
if task["status"] == "processing":
|
||||
return {"status": "processing", "message": "分析进行中"}
|
||||
|
||||
return task["result"]
|
||||
|
||||
|
||||
@router.post("/chat/{segment_id}")
|
||||
async def compliance_chat(segment_id: int, request: ComplianceChatRequest):
|
||||
"""针对段落进行合规对话"""
|
||||
# 根据segment_id获取对应的intent
|
||||
intent_map = {
|
||||
1: "车身结构设计",
|
||||
2: "动力系统配置",
|
||||
3: "安全配置设计",
|
||||
}
|
||||
intent = intent_map.get(segment_id, "车身结构设计")
|
||||
|
||||
async def generate():
|
||||
# 获取预设响应
|
||||
response = get_mock_compliance_chat_response(intent, request.query)
|
||||
|
||||
# 流式输出响应
|
||||
sentences = response.split("\n\n")
|
||||
for sentence in sentences:
|
||||
if sentence.strip():
|
||||
chunks = sentence.split("\n")
|
||||
for chunk in chunks:
|
||||
if chunk.strip():
|
||||
await asyncio.sleep(0.05)
|
||||
yield {"event": "message", "data": json.dumps({"type": "chunk", "text": chunk + "\n"})}
|
||||
|
||||
yield {"event": "message", "data": json.dumps({"type": "done"})}
|
||||
|
||||
return EventSourceResponse(generate())
|
||||
115
app/api/routes/docs.py
Normal file
115
app/api/routes/docs.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from app.schemas.doc import (
|
||||
DocumentUploadResponse,
|
||||
DocumentListResponse,
|
||||
DocumentInfo,
|
||||
ParseResponse,
|
||||
EmbedResponse,
|
||||
)
|
||||
from app.services.mock_data import get_mock_documents, generate_doc_id
|
||||
|
||||
router = APIRouter(prefix="/docs", tags=["文档管理"])
|
||||
|
||||
# 临时存储文档信息(包含预设的mock文档)
|
||||
documents_store: dict[str, dict] = {}
|
||||
|
||||
# 初始化时加载mock文档
|
||||
for doc in get_mock_documents():
|
||||
documents_store[doc["id"]] = doc
|
||||
|
||||
|
||||
@router.post("/upload", response_model=DocumentUploadResponse)
|
||||
async def upload_document(file: UploadFile = File(...)):
|
||||
"""上传法规文档"""
|
||||
# 检查文件格式
|
||||
allowed_ext = [".pdf", ".docx", ".doc", ".txt"]
|
||||
ext = os.path.splitext(file.filename)[1].lower()
|
||||
if ext not in allowed_ext:
|
||||
raise HTTPException(400, f"Unsupported file format: {ext}")
|
||||
|
||||
# 生成文档ID
|
||||
doc_id = generate_doc_id()
|
||||
|
||||
# 保存文件
|
||||
raw_dir = "/airegulation/demo-mao/backend/data/raw"
|
||||
os.makedirs(raw_dir, exist_ok=True)
|
||||
file_path = os.path.join(raw_dir, f"{doc_id}_{file.filename}")
|
||||
|
||||
content = await file.read()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
# 记录文档信息
|
||||
documents_store[doc_id] = {
|
||||
"id": doc_id,
|
||||
"name": file.filename,
|
||||
"path": file_path,
|
||||
"size": len(content),
|
||||
"status": "uploaded",
|
||||
"chunks": 0,
|
||||
"created_at": datetime.now(),
|
||||
}
|
||||
|
||||
return DocumentUploadResponse(
|
||||
doc_id=doc_id,
|
||||
filename=file.filename,
|
||||
size=len(content),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/list", response_model=DocumentListResponse)
|
||||
async def list_documents():
|
||||
"""获取已索引文档列表"""
|
||||
docs = [
|
||||
DocumentInfo(
|
||||
id=d["id"],
|
||||
name=d["name"],
|
||||
chunks=d["chunks"],
|
||||
status=d["status"],
|
||||
created_at=d.get("created_at"),
|
||||
)
|
||||
for d in documents_store.values()
|
||||
]
|
||||
return DocumentListResponse(docs=docs)
|
||||
|
||||
|
||||
@router.post("/parse/{doc_id}", response_model=ParseResponse)
|
||||
async def parse_document(doc_id: str):
|
||||
"""解析文档并分块"""
|
||||
if doc_id not in documents_store:
|
||||
raise HTTPException(404, "Document not found")
|
||||
|
||||
doc = documents_store[doc_id]
|
||||
# 模拟解析逻辑
|
||||
doc["status"] = "parsed"
|
||||
# 根据文件大小计算chunks数量
|
||||
file_size = doc.get("size", 100000)
|
||||
doc["chunks"] = max(20, file_size // 8000)
|
||||
|
||||
return ParseResponse(doc_id=doc_id, chunks=doc["chunks"])
|
||||
|
||||
|
||||
@router.post("/embed/{doc_id}", response_model=EmbedResponse)
|
||||
async def embed_document(doc_id: str):
|
||||
"""嵌入并存入向量库"""
|
||||
if doc_id not in documents_store:
|
||||
raise HTTPException(404, "Document not found")
|
||||
|
||||
doc = documents_store[doc_id]
|
||||
# 模拟嵌入逻辑
|
||||
doc["status"] = "indexed"
|
||||
|
||||
return EmbedResponse(doc_id=doc_id, vectors=doc["chunks"])
|
||||
|
||||
|
||||
@router.delete("/delete/{doc_id}")
|
||||
async def delete_document(doc_id: str):
|
||||
"""删除文档"""
|
||||
if doc_id not in documents_store:
|
||||
raise HTTPException(404, "Document not found")
|
||||
|
||||
del documents_store[doc_id]
|
||||
return {"success": True}
|
||||
74
app/api/routes/rag.py
Normal file
74
app/api/routes/rag.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from fastapi import APIRouter
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
from app.schemas.rag import RagChatRequest, QuickQuestionsResponse, QuickQuestion
|
||||
from app.services.mock_data import (
|
||||
get_mock_quick_questions,
|
||||
get_mock_retrieval,
|
||||
get_mock_rag_answer,
|
||||
)
|
||||
import json
|
||||
import asyncio
|
||||
|
||||
router = APIRouter(prefix="/rag", tags=["RAG问答"])
|
||||
|
||||
|
||||
@router.post("/chat")
|
||||
async def rag_chat(request: RagChatRequest):
|
||||
"""SSE流式问答"""
|
||||
|
||||
async def generate():
|
||||
# 发送检索开始事件
|
||||
yield {"event": "message", "data": json.dumps({"type": "retrieving"})}
|
||||
|
||||
# 模拟检索延迟
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# 执行检索
|
||||
docs = get_mock_retrieval(request.query, top_k=request.top_k)
|
||||
|
||||
retrieved_data = [
|
||||
{
|
||||
"id": d["id"],
|
||||
"score": d["score"],
|
||||
"preview": d["preview"],
|
||||
"doc_name": d.get("doc_name", ""),
|
||||
"clause": d.get("clause", ""),
|
||||
}
|
||||
for d in docs
|
||||
]
|
||||
yield {"event": "message", "data": json.dumps({"type": "retrieved", "docs": retrieved_data})}
|
||||
|
||||
# 发送生成开始事件
|
||||
yield {"event": "message", "data": json.dumps({"type": "generating", "text": "正在生成答案..."})}
|
||||
|
||||
# 模拟生成延迟
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
# 获取预设答案
|
||||
answer = get_mock_rag_answer(request.query)
|
||||
|
||||
# 流式输出答案(按句子分割)
|
||||
sentences = answer.split("\n\n")
|
||||
for sentence in sentences:
|
||||
if sentence.strip():
|
||||
# 进一步分割长句子
|
||||
chunks = sentence.split("\n")
|
||||
for chunk in chunks:
|
||||
if chunk.strip():
|
||||
await asyncio.sleep(0.05) # 模拟生成延迟
|
||||
yield {"event": "message", "data": json.dumps({"type": "chunk", "text": chunk + "\n"})}
|
||||
|
||||
# 发送完成事件
|
||||
yield {"event": "message", "data": json.dumps({"type": "done"})}
|
||||
|
||||
return EventSourceResponse(generate())
|
||||
|
||||
|
||||
@router.get("/quick-questions", response_model=QuickQuestionsResponse)
|
||||
async def get_quick_questions():
|
||||
"""获取预设快捷问题"""
|
||||
questions = [
|
||||
QuickQuestion(id=q["id"], question=q["question"], category=q["category"])
|
||||
for q in get_mock_quick_questions()
|
||||
]
|
||||
return QuickQuestionsResponse(questions=questions)
|
||||
28
app/api/routes/status.py
Normal file
28
app/api/routes/status.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from fastapi import APIRouter
|
||||
from app.core.config import settings
|
||||
from app.services.mock_data import MOCK_SYSTEM_STATS, MOCK_SYSTEM_CONFIG
|
||||
|
||||
router = APIRouter(prefix="/status", tags=["系统状态"])
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_stats():
|
||||
"""获取系统统计"""
|
||||
# 返回预设统计数据
|
||||
return MOCK_SYSTEM_STATS
|
||||
|
||||
|
||||
@router.get("/config")
|
||||
async def get_config():
|
||||
"""获取当前配置"""
|
||||
return MOCK_SYSTEM_CONFIG
|
||||
|
||||
|
||||
@router.get("/milvus/health")
|
||||
async def milvus_health():
|
||||
"""Milvus健康检查"""
|
||||
# 模拟连接状态(假数据模式下始终返回连接成功)
|
||||
return {
|
||||
"connected": True,
|
||||
"collections": ["vehicle_regulations"],
|
||||
}
|
||||
3
app/core/__init__.py
Normal file
3
app/core/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .config import settings, Settings
|
||||
|
||||
__all__ = ["settings", "Settings"]
|
||||
41
app/core/config.py
Normal file
41
app/core/config.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# DashScope API
|
||||
dashscope_api_key: str = ""
|
||||
|
||||
# Milvus
|
||||
milvus_host: str = "localhost"
|
||||
milvus_port: int = 19530
|
||||
|
||||
# LLM配置
|
||||
llm_model: str = "qwen-max"
|
||||
embedding_model: str = "text-embedding-v3"
|
||||
embedding_dim: int = 1536
|
||||
|
||||
# 检索配置
|
||||
vector_top_k: int = 10
|
||||
bm25_top_k: int = 10
|
||||
final_top_k: int = 5
|
||||
|
||||
# 分块配置
|
||||
chunk_size: int = 800
|
||||
chunk_overlap: int = 50
|
||||
|
||||
# 服务配置
|
||||
api_host: str = "0.0.0.0"
|
||||
api_port: int = 8000
|
||||
|
||||
# Collection名称
|
||||
regulations_collection: str = "vehicle_regulations"
|
||||
compliance_collection: str = "compliance_cache"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
case_sensitive = False
|
||||
|
||||
|
||||
settings = Settings()
|
||||
68
app/main.py
Normal file
68
app/main.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.api.routes import api_router
|
||||
from app.core.config import settings
|
||||
from app.utils.logger import logger
|
||||
from app.services import milvus_service
|
||||
|
||||
# 创建应用
|
||||
app = FastAPI(
|
||||
title="车辆法规智能检索系统",
|
||||
description="基于RAG技术的法规检索与合规分析后端API",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
# CORS配置
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 注册路由
|
||||
app.include_router(api_router, prefix="/api")
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""启动时初始化"""
|
||||
logger.info("Starting application...")
|
||||
|
||||
# 初始化Milvus集合(仅在服务可用时)
|
||||
try:
|
||||
if milvus_service is not None:
|
||||
milvus_service.create_regulations_collection()
|
||||
logger.info("Milvus collection initialized")
|
||||
else:
|
||||
logger.warning("Milvus service not available, using mock data")
|
||||
except Exception as e:
|
||||
logger.warning(f"Milvus initialization failed: {e}")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""关闭时清理"""
|
||||
logger.info("Shutting down application...")
|
||||
try:
|
||||
if milvus_service is not None:
|
||||
milvus_service.disconnect()
|
||||
except Exception as e:
|
||||
logger.warning(f"Shutdown cleanup error: {e}")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""根路径"""
|
||||
return {
|
||||
"message": "车辆法规智能检索系统 API",
|
||||
"version": "1.0.0",
|
||||
"docs": "/docs",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""健康检查"""
|
||||
return {"status": "healthy"}
|
||||
49
app/schemas/__init__.py
Normal file
49
app/schemas/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from .doc import (
|
||||
DocumentUploadResponse,
|
||||
DocumentInfo,
|
||||
DocumentListResponse,
|
||||
ChunkInfo,
|
||||
ParseResponse,
|
||||
EmbedResponse,
|
||||
)
|
||||
from .rag import (
|
||||
RagChatRequest,
|
||||
RetrievedDoc,
|
||||
SourceInfo,
|
||||
QuickQuestion,
|
||||
QuickQuestionsResponse,
|
||||
)
|
||||
from .compliance import (
|
||||
RiskLevel,
|
||||
ComplianceStatus,
|
||||
Regulation,
|
||||
ComplianceSegment,
|
||||
RiskDashboard,
|
||||
PriorityAction,
|
||||
ComplianceResult,
|
||||
ComplianceChatRequest,
|
||||
AnalyzeResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DocumentUploadResponse",
|
||||
"DocumentInfo",
|
||||
"DocumentListResponse",
|
||||
"ChunkInfo",
|
||||
"ParseResponse",
|
||||
"EmbedResponse",
|
||||
"RagChatRequest",
|
||||
"RetrievedDoc",
|
||||
"SourceInfo",
|
||||
"QuickQuestion",
|
||||
"QuickQuestionsResponse",
|
||||
"RiskLevel",
|
||||
"ComplianceStatus",
|
||||
"Regulation",
|
||||
"ComplianceSegment",
|
||||
"RiskDashboard",
|
||||
"PriorityAction",
|
||||
"ComplianceResult",
|
||||
"ComplianceChatRequest",
|
||||
"AnalyzeResponse",
|
||||
]
|
||||
69
app/schemas/compliance.py
Normal file
69
app/schemas/compliance.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class RiskLevel(str, Enum):
|
||||
high = "high"
|
||||
medium = "medium"
|
||||
low = "low"
|
||||
|
||||
|
||||
class ComplianceStatus(str, Enum):
|
||||
pass_status = "pass"
|
||||
warning = "warning"
|
||||
fail = "fail"
|
||||
|
||||
|
||||
class Regulation(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
clause: Optional[str] = None
|
||||
score: float
|
||||
match_keyword: str
|
||||
category: RiskLevel
|
||||
full_content: str
|
||||
|
||||
|
||||
class ComplianceSegment(BaseModel):
|
||||
id: int
|
||||
index: int
|
||||
intent: str
|
||||
start_pos: int
|
||||
end_pos: int
|
||||
content: str
|
||||
risk_level: RiskLevel
|
||||
regulations: list[Regulation]
|
||||
|
||||
|
||||
class RiskDashboard(BaseModel):
|
||||
score: float
|
||||
high_risk_count: int
|
||||
medium_risk_count: int
|
||||
low_risk_count: int
|
||||
need_fix_segments: int
|
||||
status: ComplianceStatus
|
||||
status_label: str
|
||||
|
||||
|
||||
class PriorityAction(BaseModel):
|
||||
regulation: str
|
||||
issue: str
|
||||
suggestion: str
|
||||
severity: RiskLevel
|
||||
|
||||
|
||||
class ComplianceResult(BaseModel):
|
||||
task_id: str
|
||||
dashboard: RiskDashboard
|
||||
segments: list[ComplianceSegment]
|
||||
priority_actions: list[PriorityAction]
|
||||
|
||||
|
||||
class ComplianceChatRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
class AnalyzeResponse(BaseModel):
|
||||
task_id: str
|
||||
status: str = "processing"
|
||||
44
app/schemas/doc.py
Normal file
44
app/schemas/doc.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class DocumentUploadResponse(BaseModel):
|
||||
doc_id: str
|
||||
filename: str
|
||||
size: int
|
||||
status: str = "uploaded"
|
||||
|
||||
|
||||
class DocumentInfo(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
chunks: int
|
||||
status: str
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class DocumentListResponse(BaseModel):
|
||||
docs: list[DocumentInfo]
|
||||
|
||||
|
||||
class ChunkInfo(BaseModel):
|
||||
chunk_id: str
|
||||
doc_name: str
|
||||
clause_id: Optional[str] = None
|
||||
chapter: Optional[str] = None
|
||||
content: str
|
||||
token_count: int
|
||||
chunk_index: int
|
||||
|
||||
|
||||
class ParseResponse(BaseModel):
|
||||
doc_id: str
|
||||
chunks: int
|
||||
status: str = "parsed"
|
||||
|
||||
|
||||
class EmbedResponse(BaseModel):
|
||||
doc_id: str
|
||||
vectors: int
|
||||
status: str = "embedded"
|
||||
31
app/schemas/rag.py
Normal file
31
app/schemas/rag.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class RagChatRequest(BaseModel):
|
||||
query: str
|
||||
top_k: int = 5
|
||||
|
||||
|
||||
class RetrievedDoc(BaseModel):
|
||||
id: str
|
||||
doc_name: str
|
||||
clause_id: Optional[str] = None
|
||||
score: float
|
||||
content: str
|
||||
preview: str
|
||||
|
||||
|
||||
class SourceInfo(BaseModel):
|
||||
name: str
|
||||
clause: Optional[str] = None
|
||||
|
||||
|
||||
class QuickQuestion(BaseModel):
|
||||
id: str
|
||||
question: str
|
||||
category: str
|
||||
|
||||
|
||||
class QuickQuestionsResponse(BaseModel):
|
||||
questions: list[QuickQuestion]
|
||||
50
app/services/__init__.py
Normal file
50
app/services/__init__.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# Import mock data service
|
||||
from .mock_data import (
|
||||
get_mock_documents,
|
||||
get_mock_quick_questions,
|
||||
get_mock_retrieval,
|
||||
get_mock_rag_answer,
|
||||
get_mock_compliance_result,
|
||||
get_mock_compliance_chat_response,
|
||||
MOCK_SYSTEM_STATS,
|
||||
MOCK_SYSTEM_CONFIG,
|
||||
)
|
||||
|
||||
# Try importing real services (may fail if dependencies not installed)
|
||||
try:
|
||||
from .llm import llm_service, LLMService
|
||||
from .embedding import embedding_service, EmbeddingService
|
||||
from .milvus import milvus_service, MilvusService
|
||||
from .document import DocumentService, get_document_service
|
||||
_real_services_available = True
|
||||
except ImportError:
|
||||
_real_services_available = False
|
||||
llm_service = None
|
||||
LLMService = None
|
||||
embedding_service = None
|
||||
EmbeddingService = None
|
||||
milvus_service = None
|
||||
MilvusService = None
|
||||
DocumentService = None
|
||||
get_document_service = None
|
||||
|
||||
__all__ = [
|
||||
# Mock data services
|
||||
"get_mock_documents",
|
||||
"get_mock_quick_questions",
|
||||
"get_mock_retrieval",
|
||||
"get_mock_rag_answer",
|
||||
"get_mock_compliance_result",
|
||||
"get_mock_compliance_chat_response",
|
||||
"MOCK_SYSTEM_STATS",
|
||||
"MOCK_SYSTEM_CONFIG",
|
||||
# Real services (may be None if not available)
|
||||
"llm_service",
|
||||
"LLMService",
|
||||
"embedding_service",
|
||||
"EmbeddingService",
|
||||
"milvus_service",
|
||||
"MilvusService",
|
||||
"DocumentService",
|
||||
"get_document_service",
|
||||
]
|
||||
64
app/services/document.py
Normal file
64
app/services/document.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
from typing import List, Optional
|
||||
from PyPDF2 import PdfReader
|
||||
from docx import Document
|
||||
import pdfplumber
|
||||
|
||||
|
||||
class DocumentService:
|
||||
def __init__(self, raw_dir: str, parsed_dir: str):
|
||||
self.raw_dir = raw_dir
|
||||
self.parsed_dir = parsed_dir
|
||||
|
||||
def parse_pdf(self, file_path: str) -> str:
|
||||
"""解析PDF文件"""
|
||||
text = ""
|
||||
try:
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text += page_text + "\n"
|
||||
except Exception:
|
||||
reader = PdfReader(file_path)
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n"
|
||||
|
||||
return text.strip()
|
||||
|
||||
def parse_docx(self, file_path: str) -> str:
|
||||
"""解析Word文件"""
|
||||
doc = Document(file_path)
|
||||
text = ""
|
||||
for paragraph in doc.paragraphs:
|
||||
text += paragraph.text + "\n"
|
||||
return text.strip()
|
||||
|
||||
def parse_txt(self, file_path: str) -> str:
|
||||
"""解析TXT文件"""
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
return f.read().strip()
|
||||
|
||||
def parse_document(self, file_path: str) -> str:
|
||||
"""根据文件类型解析文档"""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if ext == ".pdf":
|
||||
return self.parse_pdf(file_path)
|
||||
elif ext in [".docx", ".doc"]:
|
||||
return self.parse_docx(file_path)
|
||||
elif ext == ".txt":
|
||||
return self.parse_txt(file_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file format: {ext}")
|
||||
|
||||
def save_parsed_text(self, doc_id: str, text: str) -> str:
|
||||
"""保存解析后的文本"""
|
||||
parsed_path = os.path.join(self.parsed_dir, f"{doc_id}.txt")
|
||||
with open(parsed_path, "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
return parsed_path
|
||||
|
||||
|
||||
def get_document_service(raw_dir: str, parsed_dir: str) -> DocumentService:
|
||||
return DocumentService(raw_dir, parsed_dir)
|
||||
33
app/services/embedding.py
Normal file
33
app/services/embedding.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import dashscope
|
||||
from dashscope import TextEmbedding
|
||||
from typing import List
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self):
|
||||
from app.core.config import settings
|
||||
self.model = settings.embedding_model
|
||||
self.dimension = settings.embedding_dim
|
||||
dashscope.api_key = settings.dashscope_api_key
|
||||
|
||||
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
||||
"""批量文本嵌入"""
|
||||
response = TextEmbedding.call(
|
||||
model=self.model,
|
||||
input=texts,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
embeddings = []
|
||||
for item in response.output.embeddings:
|
||||
embeddings.append(item.embedding)
|
||||
return embeddings
|
||||
raise Exception(f"Embedding failed: {response.code}")
|
||||
|
||||
def embed_single(self, text: str) -> List[float]:
|
||||
"""单个文本嵌入"""
|
||||
embeddings = self.embed_texts([text])
|
||||
return embeddings[0]
|
||||
|
||||
|
||||
embedding_service = EmbeddingService()
|
||||
58
app/services/llm.py
Normal file
58
app/services/llm.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import dashscope
|
||||
from dashscope import Generation
|
||||
from typing import AsyncGenerator, Optional, Generator
|
||||
|
||||
|
||||
class LLMService:
|
||||
def __init__(self):
|
||||
from app.core.config import settings
|
||||
self.model = settings.llm_model
|
||||
dashscope.api_key = settings.dashscope_api_key
|
||||
|
||||
def generate_stream(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
"""流式生成文本"""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
responses = Generation.call(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
result_format="message",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for response in responses:
|
||||
if response.status_code == 200:
|
||||
content = response.output.choices[0].message.content
|
||||
if content:
|
||||
yield content
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> str:
|
||||
"""一次性生成文本"""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
response = Generation.call(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
result_format="message",
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.output.choices[0].message.content
|
||||
raise Exception(f"LLM generation failed: {response.code}")
|
||||
|
||||
|
||||
llm_service = LLMService()
|
||||
158
app/services/milvus.py
Normal file
158
app/services/milvus.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from pymilvus import (
|
||||
connections,
|
||||
Collection,
|
||||
FieldSchema,
|
||||
CollectionSchema,
|
||||
DataType,
|
||||
utility,
|
||||
)
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class MilvusService:
|
||||
def __init__(self):
|
||||
from app.core.config import settings
|
||||
self.host = settings.milvus_host
|
||||
self.port = settings.milvus_port
|
||||
self.regulations_collection_name = settings.regulations_collection
|
||||
self.compliance_collection_name = settings.compliance_collection
|
||||
self._connected = False
|
||||
|
||||
def connect(self):
|
||||
"""连接Milvus"""
|
||||
if not self._connected:
|
||||
connections.connect(
|
||||
alias="default",
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
)
|
||||
self._connected = True
|
||||
|
||||
def disconnect(self):
|
||||
"""断开连接"""
|
||||
if self._connected:
|
||||
connections.disconnect("default")
|
||||
self._connected = False
|
||||
|
||||
def create_regulations_collection(self):
|
||||
"""创建法规文档集合"""
|
||||
from app.core.config import settings
|
||||
self.connect()
|
||||
|
||||
if utility.has_collection(self.regulations_collection_name):
|
||||
return Collection(self.regulations_collection_name)
|
||||
|
||||
fields = [
|
||||
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
||||
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=settings.embedding_dim),
|
||||
FieldSchema(name="doc_name", dtype=DataType.VARCHAR, max_length=256),
|
||||
FieldSchema(name="clause_id", dtype=DataType.VARCHAR, max_length=64),
|
||||
FieldSchema(name="chapter", dtype=DataType.VARCHAR, max_length=128),
|
||||
FieldSchema(name="source_file", dtype=DataType.VARCHAR, max_length=256),
|
||||
FieldSchema(name="chunk_index", dtype=DataType.INT64),
|
||||
FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
|
||||
FieldSchema(name="token_count", dtype=DataType.INT64),
|
||||
]
|
||||
|
||||
schema = CollectionSchema(
|
||||
fields=fields,
|
||||
description="法规文档向量集合",
|
||||
)
|
||||
|
||||
collection = Collection(
|
||||
name=self.regulations_collection_name,
|
||||
schema=schema,
|
||||
)
|
||||
|
||||
index_params = {
|
||||
"metric_type": "COSINE",
|
||||
"index_type": "IVF_FLAT",
|
||||
"params": {"nlist": 128},
|
||||
}
|
||||
collection.create_index(field_name="embedding", index_params=index_params)
|
||||
|
||||
return collection
|
||||
|
||||
def insert_chunks(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
metadata: List[dict],
|
||||
) -> List[int]:
|
||||
"""插入向量数据"""
|
||||
collection = Collection(self.regulations_collection_name)
|
||||
collection.load()
|
||||
|
||||
data = [
|
||||
embeddings,
|
||||
[m.get("doc_name", "") for m in metadata],
|
||||
[m.get("clause_id", "") for m in metadata],
|
||||
[m.get("chapter", "") for m in metadata],
|
||||
[m.get("source_file", "") for m in metadata],
|
||||
[m.get("chunk_index", 0) for m in metadata],
|
||||
[m.get("content", "") for m in metadata],
|
||||
[m.get("token_count", 0) for m in metadata],
|
||||
]
|
||||
|
||||
result = collection.insert(data)
|
||||
collection.flush()
|
||||
return result.primary_keys
|
||||
|
||||
def search(
|
||||
self,
|
||||
query_embedding: List[float],
|
||||
top_k: int = 10,
|
||||
) -> List[dict]:
|
||||
"""向量检索"""
|
||||
collection = Collection(self.regulations_collection_name)
|
||||
collection.load()
|
||||
|
||||
search_params = {"metric_type": "COSINE", "params": {"nprobe": 16}}
|
||||
|
||||
results = collection.search(
|
||||
data=[query_embedding],
|
||||
anns_field="embedding",
|
||||
param=search_params,
|
||||
limit=top_k,
|
||||
output_fields=["doc_name", "clause_id", "chapter", "content", "chunk_index"],
|
||||
)
|
||||
|
||||
hits = []
|
||||
for hit in results[0]:
|
||||
hits.append({
|
||||
"id": hit.id,
|
||||
"score": hit.score,
|
||||
"doc_name": hit.entity.get("doc_name"),
|
||||
"clause_id": hit.entity.get("clause_id"),
|
||||
"chapter": hit.entity.get("chapter"),
|
||||
"content": hit.entity.get("content"),
|
||||
"chunk_index": hit.entity.get("chunk_index"),
|
||||
})
|
||||
|
||||
return hits
|
||||
|
||||
def get_collection_stats(self) -> dict:
|
||||
"""获取集合统计"""
|
||||
self.connect()
|
||||
|
||||
if not utility.has_collection(self.regulations_collection_name):
|
||||
return {"exists": False}
|
||||
|
||||
collection = Collection(self.regulations_collection_name)
|
||||
collection.load()
|
||||
|
||||
return {
|
||||
"exists": True,
|
||||
"name": self.regulations_collection_name,
|
||||
"count": collection.num_entities,
|
||||
}
|
||||
|
||||
def health_check(self) -> bool:
|
||||
"""健康检查"""
|
||||
try:
|
||||
self.connect()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
milvus_service = MilvusService()
|
||||
425
app/services/mock_data.py
Normal file
425
app/services/mock_data.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""
|
||||
Mock数据服务 - 提供预设假数据供前后端对接测试
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
import uuid
|
||||
|
||||
# 预设法规文档列表
|
||||
MOCK_DOCUMENTS: List[Dict[str, Any]] = [
|
||||
{
|
||||
"id": "doc-001",
|
||||
"name": "道路交通安全法.pdf",
|
||||
"chunks": 156,
|
||||
"status": "indexed",
|
||||
"created_at": datetime(2026, 5, 10, 10, 0, 0),
|
||||
},
|
||||
{
|
||||
"id": "doc-002",
|
||||
"name": "机动车登记规定.docx",
|
||||
"chunks": 89,
|
||||
"status": "indexed",
|
||||
"created_at": datetime(2026, 5, 10, 11, 0, 0),
|
||||
},
|
||||
{
|
||||
"id": "doc-003",
|
||||
"name": "电动自行车规范.pdf",
|
||||
"chunks": 42,
|
||||
"status": "indexed",
|
||||
"created_at": datetime(2026, 5, 10, 12, 0, 0),
|
||||
},
|
||||
{
|
||||
"id": "doc-004",
|
||||
"name": "GB 38031-2020 电动汽车安全要求.pdf",
|
||||
"chunks": 128,
|
||||
"status": "indexed",
|
||||
"created_at": datetime(2026, 5, 10, 13, 0, 0),
|
||||
},
|
||||
{
|
||||
"id": "doc-005",
|
||||
"name": "C-NCAP管理规则(2021版).pdf",
|
||||
"chunks": 95,
|
||||
"status": "indexed",
|
||||
"created_at": datetime(2026, 5, 10, 14, 0, 0),
|
||||
},
|
||||
]
|
||||
|
||||
# 预设快捷问题
|
||||
MOCK_QUICK_QUESTIONS: List[Dict[str, str]] = [
|
||||
{"id": "q1", "question": "电动自行车需要上牌照吗?", "category": "车辆登记"},
|
||||
{"id": "q2", "question": "新能源汽车有哪些补贴政策?", "category": "新能源"},
|
||||
{"id": "q3", "question": "车辆年检的规定是什么?", "category": "年检"},
|
||||
{"id": "q4", "question": "驾驶证过期了怎么处理?", "category": "驾驶证"},
|
||||
]
|
||||
|
||||
# 预设检索结果
|
||||
MOCK_RETRIEVAL_RESULTS: List[Dict[str, Any]] = [
|
||||
{
|
||||
"id": "chunk-001",
|
||||
"score": 0.95,
|
||||
"preview": "根据《道路交通安全法》第十八条规定,电动自行车经公安机关交通管理部门登记后,方可上道路行驶...",
|
||||
"doc_name": "道路交通安全法",
|
||||
"clause": "第十八条",
|
||||
"content": "根据《道路交通安全法》第十八条规定,电动自行车经公安机关交通管理部门登记后,方可上道路行驶。电动自行车应当符合国家标准,最高设计车速不超过二十五公里每小时,整车质量不超过五十五千克。",
|
||||
},
|
||||
{
|
||||
"id": "chunk-002",
|
||||
"score": 0.88,
|
||||
"preview": "电动自行车需符合GB17761-2018国家标准,包括最高车速、整车质量、脚踏骑行能力等要求...",
|
||||
"doc_name": "电动自行车规范",
|
||||
"clause": "第4条",
|
||||
"content": "电动自行车需符合GB17761-2018国家标准。主要技术要求包括:最高设计车速不超过25km/h,整车质量不超过55kg,具有脚踏骑行能力,蓄电池标称电压不超过48V,电动机额定连续输出功率不超过400W。",
|
||||
},
|
||||
{
|
||||
"id": "chunk-003",
|
||||
"score": 0.82,
|
||||
"preview": "机动车登记规定:初次申领机动车号牌、行驶证的,机动车所有人应当向住所地的车辆管理所申请注册登记...",
|
||||
"doc_name": "机动车登记规定",
|
||||
"clause": "第5条",
|
||||
"content": "机动车登记规定:初次申领机动车号牌、行驶证的,机动车所有人应当向住所地的车辆管理所申请注册登记。申请注册登记的,应当提交机动车所有人的身份证明、购车发票等机动车来历证明、机动车整车出厂合格证明或者进口机动车进口凭证。",
|
||||
},
|
||||
{
|
||||
"id": "chunk-004",
|
||||
"score": 0.75,
|
||||
"preview": "驾驶电动自行车上道路行驶,应当佩戴安全头盔,遵守道路交通安全法律法规...",
|
||||
"doc_name": "道路交通安全法",
|
||||
"clause": "第76条",
|
||||
"content": "驾驶电动自行车上道路行驶,应当佩戴安全头盔,遵守道路交通安全法律法规。电动自行车不得逆向行驶,不得在机动车道内行驶,最高车速不得超过规定的限速。",
|
||||
},
|
||||
{
|
||||
"id": "chunk-005",
|
||||
"score": 0.68,
|
||||
"preview": "电动汽车动力电池安全要求:电池系统发生热失控后,应在5分钟内不起火不爆炸...",
|
||||
"doc_name": "GB 38031-2020",
|
||||
"clause": "第7条",
|
||||
"content": "电动汽车动力电池安全要求(GB 38031-2020):电池系统发生热失控后,应在5分钟内不起火不爆炸,为乘员预留逃生时间。电池包需通过针刺、过充、短路等安全测试。",
|
||||
},
|
||||
]
|
||||
|
||||
# 预设RAG问答答案模板(按关键词匹配)
|
||||
MOCK_RAG_ANSWERS: Dict[str, Dict[str, Any]] = {
|
||||
"电动自行车": {
|
||||
"text": "根据《道路交通安全法》及相关规范,电动自行车上路需满足以下条件:\n\n1. 符合国家标准 GB17761-2018\n2. 经公安机关交通管理部门登记\n3. 最高设计车速不超过 25km/h\n4. 整车质量不超过 55kg\n5. 具有脚踏骑行能力\n6. 蓄电池标称电压不超过 48V\n\n行驶时还需佩戴安全头盔,不得逆向行驶或在机动车道内行驶。",
|
||||
"retrieval_ids": ["chunk-001", "chunk-002", "chunk-004"],
|
||||
},
|
||||
"驾驶证": {
|
||||
"text": "驾驶证申请流程如下:\n\n1. 到驾校报名并参加培训\n2. 通过科目一(理论考试)\n3. 通过科目二(场地驾驶技能考试)\n4. 通过科目三(道路驾驶技能考试)\n5. 通过科目四(安全文明驾驶常识考试)\n6. 领取驾驶证\n\n初次申领需到住所地车辆管理所申请注册登记。",
|
||||
"retrieval_ids": ["chunk-003"],
|
||||
},
|
||||
"超速": {
|
||||
"text": "超速处罚标准(根据《道路交通安全法》):\n\n- 超速10%以下:警告\n- 超速10%-20%:罚款50-200元\n- 超速20%-50%:罚款200-500元,记3-6分\n- 超速50%以上:罚款500-2000元,记12分,可吊销驾驶证\n\n机动车驾驶人违反道路交通安全法律、法规将处警告或二十元以上二百元以下罚款。",
|
||||
"retrieval_ids": ["chunk-001"],
|
||||
},
|
||||
"年检": {
|
||||
"text": "车辆年检规定:\n\n- 小型私家车:6年内免检(每2年申领标志),6-10年每2年检验,10年以上每年检验\n- 车辆需携带行驶证、交强险保单\n- 检验项目:灯光、制动、排放等\n\n机动车所有人的住所迁出车辆管理所管辖区域的,需在登记证书上签注变更事项。",
|
||||
"retrieval_ids": ["chunk-003"],
|
||||
},
|
||||
"电池": {
|
||||
"text": "电动汽车电池安全标准(GB 38031-2020):\n\n1. 热失控要求:电池系统发生热失控后,应在5分钟内不起火不爆炸,为乘员预留逃生时间\n2. 电池包需通过针刺、过充、短路等安全测试\n3. 充电系统应具备过充保护功能,当电池SOC达到100%时应自动停止充电\n4. 充电接口应符合GB/T 18487.1标准要求\n\n以上要求确保电动汽车的整车安全性。",
|
||||
"retrieval_ids": ["chunk-005"],
|
||||
},
|
||||
"碰撞": {
|
||||
"text": "正面碰撞测试要求(C-NCAP管理规则):\n\n1. 正面100%重叠刚性壁障碰撞试验\n2. 碰撞速度:50km/h\n3. 试验后要求:\n - 车门应能打开\n - 燃油系统无泄漏\n - 座椅及安全带功能正常\n\n此测试用于评估车辆在正面碰撞事故中对乘员的保护能力。",
|
||||
"retrieval_ids": [],
|
||||
},
|
||||
"AEB": {
|
||||
"text": "AEB(自动紧急制动系统)测试标准:\n\n1. 系统应在检测到前方障碍物时主动减速或停车\n2. 测试场景分为三种:\n - 目标车静止\n - 目标车移动\n - 目标车制动\n3. AEB功能是C-NCAP评分的重要加分项\n\n该系统对提升车辆主动安全性能具有重要意义。",
|
||||
"retrieval_ids": [],
|
||||
},
|
||||
"高速公路": {
|
||||
"text": "高速公路安全距离规定:\n\n1. 车速超过100km/h时,与同车道前车保持100米以上距离\n2. 车速低于100km/h时,距离可适当缩短\n3. 执行紧急任务的警车、消防车、救护车、工程救险车不受行驶速度限制\n\n保持安全距离是预防追尾事故的关键措施。",
|
||||
"retrieval_ids": [],
|
||||
},
|
||||
}
|
||||
|
||||
# 预设合规分析结果
|
||||
MOCK_COMPLIANCE_RESULT: Dict[str, Any] = {
|
||||
"task_id": "task-001",
|
||||
"dashboard": {
|
||||
"score": 78,
|
||||
"high_risk_count": 2,
|
||||
"medium_risk_count": 1,
|
||||
"low_risk_count": 0,
|
||||
"need_fix_segments": 3,
|
||||
"status": "warning",
|
||||
"status_label": "需优化",
|
||||
},
|
||||
"segments": [
|
||||
{
|
||||
"id": 1,
|
||||
"index": 1,
|
||||
"intent": "车身结构设计",
|
||||
"start_pos": 45,
|
||||
"end_pos": 230,
|
||||
"content": "车身采用高强度钢铝混合结构,A柱和B柱使用热成型钢板,厚度2.5mm。车顶结构设计满足GB 26112-2010抗压强度要求,正面碰撞能量吸收区域采用渐进式变形设计,确保碰撞时能量有效分散。",
|
||||
"risk_level": "high",
|
||||
"regulations": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "GB 26112-2010",
|
||||
"clause": "第4.2条",
|
||||
"score": 0.95,
|
||||
"match_keyword": "车顶抗压强度",
|
||||
"category": "high",
|
||||
"full_content": "车顶结构应能承受相当于车辆整备质量1.5倍的载荷,载荷分布应均匀,试验后车顶变形量不超过规定值。",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "C-NCAP管理规则",
|
||||
"clause": "第3.1条",
|
||||
"score": 0.88,
|
||||
"match_keyword": "正面碰撞",
|
||||
"category": "high",
|
||||
"full_content": "正面碰撞试验速度为50km/h,碰撞后车门应能打开,燃油系统无泄漏,座椅及安全带功能正常。",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "GB 11551-2014",
|
||||
"clause": "第5条",
|
||||
"score": 0.72,
|
||||
"match_keyword": "碰撞能量吸收",
|
||||
"category": "medium",
|
||||
"full_content": "车辆正面碰撞时应有效保护乘员,碰撞能量应通过车身结构合理分散。",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "机动车安全技术条件",
|
||||
"clause": "第12条",
|
||||
"score": 0.58,
|
||||
"match_keyword": "A柱强度",
|
||||
"category": "medium",
|
||||
"full_content": "A柱应具备足够的抗变形能力,材料强度应符合相关标准要求。",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"index": 2,
|
||||
"intent": "动力系统配置",
|
||||
"start_pos": 298,
|
||||
"end_pos": 425,
|
||||
"content": "搭载永磁同步电机,最大功率150kW,峰值扭矩310Nm。电池组采用三元锂离子电池,容量75kWh,能量密度180Wh/kg。充电接口支持快充(30分钟充至80%)和慢充(8小时充满),符合GB/T 18487.1-2015标准。",
|
||||
"risk_level": "medium",
|
||||
"regulations": [
|
||||
{
|
||||
"id": 5,
|
||||
"name": "GB/T 18487.1-2015",
|
||||
"clause": "第6条",
|
||||
"score": 0.94,
|
||||
"match_keyword": "充电接口标准",
|
||||
"category": "high",
|
||||
"full_content": "电动汽车传导充电接口应符合GB/T 18487.1标准要求,充电系统应具备过充保护功能。",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"name": "GB/T 31484-2015",
|
||||
"clause": "第4条",
|
||||
"score": 0.85,
|
||||
"match_keyword": "电池能量密度",
|
||||
"category": "high",
|
||||
"full_content": "动力电池能量密度不低于120Wh/kg,电池系统需通过热失控测试。",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"name": "新能源汽车生产企业准入",
|
||||
"clause": "第8条",
|
||||
"score": 0.65,
|
||||
"match_keyword": "电机功率",
|
||||
"category": "medium",
|
||||
"full_content": "驱动电机应符合相关技术标准,功率参数应在规定范围内。",
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"name": "电动汽车安全要求",
|
||||
"clause": "第7条",
|
||||
"score": 0.45,
|
||||
"match_keyword": "充电时间",
|
||||
"category": "low",
|
||||
"full_content": "充电系统应具备过充保护功能,当电池SOC达到100%时应自动停止充电。",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"index": 3,
|
||||
"intent": "安全配置设计",
|
||||
"start_pos": 570,
|
||||
"end_pos": 725,
|
||||
"content": "配备6个安全气囊(前排双气囊、侧气囊、侧气帘),采用预紧式安全带。ABS系统采用博世第9代ESP,具备碰撞预警功能(FCW)和自动紧急制动(AEB)。方向盘集成驾驶员疲劳监测摄像头。",
|
||||
"risk_level": "low",
|
||||
"regulations": [
|
||||
{
|
||||
"id": 9,
|
||||
"name": "GB 27887-2011",
|
||||
"clause": "第5条",
|
||||
"score": 0.92,
|
||||
"match_keyword": "安全气囊",
|
||||
"category": "high",
|
||||
"full_content": "乘用车应配备驾驶员和乘客安全气囊,气囊系统应符合相关技术标准。",
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"name": "GB/T 26991-2011",
|
||||
"clause": "第3条",
|
||||
"score": 0.78,
|
||||
"match_keyword": "ABS系统",
|
||||
"category": "medium",
|
||||
"full_content": "车辆应配备防抱死制动系统,系统性能应符合相关标准要求。",
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"name": "C-NCAP管理规则",
|
||||
"clause": "第4.2条",
|
||||
"score": 0.71,
|
||||
"match_keyword": "AEB自动制动",
|
||||
"category": "medium",
|
||||
"full_content": "主动安全配置评分包含AEB功能,AEB系统应能有效检测障碍物并主动减速。",
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"name": "机动车运行安全技术条件",
|
||||
"clause": "第15条",
|
||||
"score": 0.38,
|
||||
"match_keyword": "疲劳监测",
|
||||
"category": "low",
|
||||
"full_content": "建议配备驾驶员状态监测系统,及时发现驾驶员疲劳或分心状态。",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"priority_actions": [
|
||||
{
|
||||
"regulation": "GB 26112-2010 第4.2条",
|
||||
"issue": "缺少车顶抗压强度测试数据",
|
||||
"suggestion": "补充车顶抗压强度具体测试数据,确保满足1.5倍整备质量载荷要求",
|
||||
"severity": "high",
|
||||
},
|
||||
{
|
||||
"regulation": "GB/T 31484-2015 第4条",
|
||||
"issue": "缺少电池热失控测试报告",
|
||||
"suggestion": "补充电池热失控测试报告,验证5分钟内不起火不爆炸",
|
||||
"severity": "high",
|
||||
},
|
||||
{
|
||||
"regulation": "C-NCAP管理规则 第3.1条",
|
||||
"issue": "缺少碰撞后车门开启性能数据",
|
||||
"suggestion": "提供碰撞后车门开启性能测试数据",
|
||||
"severity": "medium",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
# 预设合规对话响应模板
|
||||
MOCK_COMPLIANCE_CHAT_RESPONSES: Dict[str, Dict[str, str]] = {
|
||||
"车身结构设计": {
|
||||
"compliance": "根据当前分析,车身结构设计部分存在以下合规问题:\n\n1. GB 26112-2010要求车顶承受1.5倍整备质量载荷,目前设计声明满足要求但缺少测试数据\n2. C-NCAP正面碰撞后车门应能打开,需提供碰撞测试报告\n\n建议补充相关测试数据以提升合规评分。",
|
||||
"interpretation": "GB 26112-2010 第4.2条具体要求解读:\n\n车顶抗压强度测试是车辆被动安全的重要指标。该标准要求车顶结构能够承受相当于车辆整备质量1.5倍的均匀分布载荷,试验后车顶变形量不得超过规定限值。\n\n热成型钢板(22MnB5材料)抗拉强度约1500-1650 MPa,理论上能满足要求,但需通过实际测试验证。",
|
||||
"suggestion": "针对车身结构设计的修改建议:\n\n1. 补充车顶抗压强度测试报告\n2. 提供A柱材料认证证书\n3. 完善正面碰撞能量吸收设计说明\n4. 添加碰撞后车门开启性能数据\n\n这些补充材料可有效提升合规评分。",
|
||||
},
|
||||
"动力系统配置": {
|
||||
"compliance": "动力系统配置整体合规性良好,主要检查点:\n\n1. 电池能量密度180Wh/kg超过最低要求120Wh/kg ✓\n2. 充电接口符合GB/T 18487.1标准 ✓\n3. 快充30分钟充至80%符合行业标准 ✓\n\n需补充电池热失控测试报告。",
|
||||
"interpretation": "GB/T 31484-2015对动力电池的要求解读:\n\n1. 能量密度:不低于120Wh/kg(您的设计180Wh/kg满足要求)\n2. 循环寿命:不少于1000次循环后容量保持率≥80%\n3. 安全测试:需通过针刺、过充、短路等测试\n\n建议补充循环寿命测试数据。",
|
||||
"suggestion": "动力系统配置改进建议:\n\n1. 补充电池热失控测试报告\n2. 提供循环寿命测试数据\n3. 添加充电系统过充保护功能说明\n4. 完善电池管理系统(BMS)技术文档",
|
||||
},
|
||||
"安全配置设计": {
|
||||
"compliance": "安全配置设计合规性评估:\n\n1. 安全气囊配置满足GB 27887-2011要求 ✓\n2. ABS/ESP系统符合标准 ✓\n3. AEB功能是C-NCAP加分项 ✓\n\n驾驶员疲劳监测是建议配置,不强制要求。",
|
||||
"interpretation": "C-NCAP主动安全评分规则解读:\n\nAEB(自动紧急制动)系统是C-NCAP评分的重要加分项,最高可获得额外加分。测试场景包括:\n- 目标车静止场景\n- 目标车移动场景\n- 目标车制动场景\n\n建议完善AEB系统测试数据以获取更高评分。",
|
||||
"suggestion": "安全配置优化建议:\n\n1. 提供AEB系统测试数据\n2. 补充FCW预警功能测试报告\n3. 添加安全气囊展开时间数据\n4. 完善驾驶员疲劳监测系统说明(如有)",
|
||||
},
|
||||
}
|
||||
|
||||
# 预设系统统计数据
|
||||
MOCK_SYSTEM_STATS: Dict[str, int] = {
|
||||
"docs": 5,
|
||||
"chunks": 510,
|
||||
"vectors": 510,
|
||||
"segments": 0,
|
||||
}
|
||||
|
||||
# 预设系统配置
|
||||
MOCK_SYSTEM_CONFIG: Dict[str, Any] = {
|
||||
"llm": {
|
||||
"model": "qwen-max",
|
||||
},
|
||||
"embedding": {
|
||||
"model": "text-embedding-v3",
|
||||
"dimension": 1536,
|
||||
},
|
||||
"milvus": {
|
||||
"host": "localhost",
|
||||
"port": 19530,
|
||||
},
|
||||
"retrieval": {
|
||||
"vector_top_k": 10,
|
||||
"final_top_k": 5,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_mock_documents() -> List[Dict[str, Any]]:
|
||||
"""获取预设法规文档列表"""
|
||||
return MOCK_DOCUMENTS
|
||||
|
||||
|
||||
def get_mock_quick_questions() -> List[Dict[str, str]]:
|
||||
"""获取预设快捷问题"""
|
||||
return MOCK_QUICK_QUESTIONS
|
||||
|
||||
|
||||
def get_mock_retrieval(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||
"""根据查询关键词返回预设检索结果"""
|
||||
results = []
|
||||
for keyword, data in MOCK_RAG_ANSWERS.items():
|
||||
if keyword in query:
|
||||
for retrieval_id in data.get("retrieval_ids", []):
|
||||
for item in MOCK_RETRIEVAL_RESULTS:
|
||||
if item["id"] == retrieval_id:
|
||||
results.append({
|
||||
"id": item["id"],
|
||||
"score": item["score"],
|
||||
"preview": item["preview"],
|
||||
"doc_name": item["doc_name"],
|
||||
"clause": item["clause"],
|
||||
})
|
||||
break
|
||||
if not results:
|
||||
results = MOCK_RETRIEVAL_RESULTS[:top_k]
|
||||
return results[:top_k]
|
||||
|
||||
|
||||
def get_mock_rag_answer(query: str) -> str:
|
||||
"""根据查询关键词返回预设答案"""
|
||||
for keyword, data in MOCK_RAG_ANSWERS.items():
|
||||
if keyword in query:
|
||||
return data["text"]
|
||||
return "抱歉,暂未找到与您问题直接相关的法规内容。请尝试更具体的问题,或联系交通管理部门获取详细信息。\n\n您可以尝试询问:电动自行车、驾驶证、超速处罚、年检、电池安全、碰撞测试、AEB系统、高速公路规则等话题。"
|
||||
|
||||
|
||||
def get_mock_compliance_result(task_id: str) -> Dict[str, Any]:
|
||||
"""获取预设合规分析结果"""
|
||||
result = MOCK_COMPLIANCE_RESULT.copy()
|
||||
result["task_id"] = task_id
|
||||
return result
|
||||
|
||||
|
||||
def get_mock_compliance_chat_response(intent: str, query: str) -> str:
|
||||
"""获取预设合规对话响应"""
|
||||
responses = MOCK_COMPLIANCE_CHAT_RESPONSES.get(intent, {})
|
||||
if "合规" in query or "符合" in query:
|
||||
return responses.get("compliance", "根据相关法规分析,该段落的合规性需进一步评估。")
|
||||
elif "解读" in query or "什么" in query or "如何" in query:
|
||||
return responses.get("interpretation", "法规要求详细解读如下...")
|
||||
elif "修改" in query or "建议" in query or "完善" in query:
|
||||
return responses.get("suggestion", "建议进行以下修改以提升合规性...")
|
||||
return f"关于您的问题,{intent}部分涉及多条相关法规。您可以进一步询问合规性评估或修改建议。"
|
||||
|
||||
|
||||
def generate_task_id() -> str:
|
||||
"""生成任务ID"""
|
||||
return f"task-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
def generate_doc_id() -> str:
|
||||
"""生成文档ID"""
|
||||
return f"doc-{uuid.uuid4().hex[:8]}"
|
||||
4
app/utils/__init__.py
Normal file
4
app/utils/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .chunking import TextChunker, chunker
|
||||
from .logger import logger, setup_logging
|
||||
|
||||
__all__ = ["TextChunker", "chunker", "logger", "setup_logging"]
|
||||
78
app/utils/chunking.py
Normal file
78
app/utils/chunking.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import re
|
||||
from typing import List
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class TextChunker:
|
||||
def __init__(
|
||||
self,
|
||||
chunk_size: int = settings.chunk_size,
|
||||
chunk_overlap: int = settings.chunk_overlap,
|
||||
):
|
||||
self.chunk_size = chunk_size
|
||||
self.chunk_overlap = chunk_overlap
|
||||
|
||||
def chunk_by_clause(self, text: str) -> List[dict]:
|
||||
"""按条款边界分块(适用于法规文档)"""
|
||||
clause_pattern = r"(第[一二三四五六七八九十百]+条)"
|
||||
parts = re.split(clause_pattern, text)
|
||||
|
||||
chunks = []
|
||||
current_clause = None
|
||||
current_text = ""
|
||||
chunk_index = 0
|
||||
|
||||
for part in parts:
|
||||
if re.match(clause_pattern, part):
|
||||
if current_clause and current_text.strip():
|
||||
chunks.append({
|
||||
"clause_id": current_clause,
|
||||
"content": current_text.strip(),
|
||||
"chunk_index": chunk_index,
|
||||
})
|
||||
chunk_index += 1
|
||||
current_clause = part
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += part
|
||||
|
||||
if current_clause and current_text.strip():
|
||||
chunks.append({
|
||||
"clause_id": current_clause,
|
||||
"content": current_text.strip(),
|
||||
"chunk_index": chunk_index,
|
||||
})
|
||||
|
||||
return chunks
|
||||
|
||||
def chunk_by_size(self, text: str) -> List[dict]:
|
||||
"""按固定大小分块"""
|
||||
chunks = []
|
||||
start = 0
|
||||
chunk_index = 0
|
||||
|
||||
while start < len(text):
|
||||
end = start + self.chunk_size
|
||||
chunk_text = text[start:end]
|
||||
|
||||
if chunk_text.strip():
|
||||
chunks.append({
|
||||
"content": chunk_text.strip(),
|
||||
"chunk_index": chunk_index,
|
||||
"start_pos": start,
|
||||
"end_pos": end,
|
||||
})
|
||||
chunk_index += 1
|
||||
|
||||
start = end - self.chunk_overlap
|
||||
|
||||
return chunks
|
||||
|
||||
def estimate_tokens(self, text: str) -> int:
|
||||
"""估算token数量"""
|
||||
chinese_chars = len(re.findall(r"[^\x00-\xff]", text))
|
||||
english_chars = len(text) - chinese_chars
|
||||
return int(chinese_chars / 1.5 + english_chars / 4)
|
||||
|
||||
|
||||
chunker = TextChunker()
|
||||
24
app/utils/logger.py
Normal file
24
app/utils/logger.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
|
||||
def setup_logging() -> logging.Logger:
|
||||
"""配置日志"""
|
||||
logger = logging.getLogger("app")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
|
||||
formatter = logging.Formatter(
|
||||
fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
logger = setup_logging()
|
||||
12
app/workflows/__init__.py
Normal file
12
app/workflows/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from .rag_workflow import RagState, rag_workflow, run_rag_workflow, stream_rag_workflow
|
||||
from .compliance_workflow import ComplianceState, compliance_workflow, run_compliance_workflow
|
||||
|
||||
__all__ = [
|
||||
"RagState",
|
||||
"rag_workflow",
|
||||
"run_rag_workflow",
|
||||
"stream_rag_workflow",
|
||||
"ComplianceState",
|
||||
"compliance_workflow",
|
||||
"run_compliance_workflow",
|
||||
]
|
||||
175
app/workflows/compliance_workflow.py
Normal file
175
app/workflows/compliance_workflow.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from typing import TypedDict, List
|
||||
from langgraph.graph import StateGraph, END
|
||||
|
||||
|
||||
class ComplianceState(TypedDict):
|
||||
document_path: str
|
||||
raw_text: str
|
||||
segments: List[dict]
|
||||
matched_regulations: List[dict]
|
||||
risk_dashboard: dict
|
||||
priority_actions: List[dict]
|
||||
|
||||
|
||||
def parse_document(state: ComplianceState) -> dict:
|
||||
"""解析文档"""
|
||||
from app.services import get_document_service
|
||||
doc_service = get_document_service(
|
||||
"/airegulation/demo-mao/backend/data/raw",
|
||||
"/airegulation/demo-mao/backend/data/parsed",
|
||||
)
|
||||
text = doc_service.parse_document(state["document_path"])
|
||||
return {"raw_text": text}
|
||||
|
||||
|
||||
def segment_document(state: ComplianceState) -> dict:
|
||||
"""AI语义分段"""
|
||||
from app.services import llm_service
|
||||
prompt = f"""请分析以下设计方案文档,按照设计意图将其分成若干语义段落。
|
||||
|
||||
文档内容:
|
||||
{state['raw_text'][:3000]}
|
||||
|
||||
请输出JSON格式的分段结果,每个段落包含:
|
||||
- intent: 段落意图/主题
|
||||
- startPos: 在原文中的起始位置(大致)
|
||||
- endPos: 在原文中的结束位置(大致)
|
||||
- keywords: 关键词列表
|
||||
|
||||
输出格式:
|
||||
[{{"intent": "...", "startPos": 0, "endPos": 100, "keywords": [...]}}]"""
|
||||
|
||||
# 简化处理:返回基本分段
|
||||
segments = [
|
||||
{
|
||||
"id": 1,
|
||||
"intent": "整体设计概述",
|
||||
"content": state["raw_text"][:500],
|
||||
"keywords": ["设计", "方案"],
|
||||
}
|
||||
]
|
||||
|
||||
return {"segments": segments}
|
||||
|
||||
|
||||
def match_regulations(state: ComplianceState) -> dict:
|
||||
"""法规匹配"""
|
||||
from app.services import embedding_service, milvus_service
|
||||
matched = []
|
||||
|
||||
for segment in state["segments"]:
|
||||
keyword_text = " ".join(segment.get("keywords", []))
|
||||
embedding = embedding_service.embed_single(keyword_text)
|
||||
|
||||
docs = milvus_service.search(embedding, top_k=5)
|
||||
|
||||
segment_regs = []
|
||||
for doc in docs:
|
||||
category = "high" if doc["score"] > 0.85 else ("medium" if doc["score"] > 0.6 else "low")
|
||||
segment_regs.append({
|
||||
"id": doc["id"],
|
||||
"name": doc["doc_name"],
|
||||
"clause": doc.get("clause_id"),
|
||||
"score": doc["score"],
|
||||
"match_keyword": keyword_text,
|
||||
"category": category,
|
||||
"full_content": doc["content"],
|
||||
})
|
||||
|
||||
segment["regulations"] = segment_regs
|
||||
matched.append(segment)
|
||||
|
||||
return {"matched_regulations": matched}
|
||||
|
||||
|
||||
def calculate_risk(state: ComplianceState) -> dict:
|
||||
"""计算风险等级"""
|
||||
segments = state["matched_regulations"]
|
||||
|
||||
high_count = 0
|
||||
medium_count = 0
|
||||
low_count = 0
|
||||
need_fix = 0
|
||||
total_score = 0
|
||||
|
||||
for segment in segments:
|
||||
regs = segment.get("regulations", [])
|
||||
high_regs = [r for r in regs if r["category"] == "high"]
|
||||
|
||||
if high_regs:
|
||||
avg_score = sum(r["score"] for r in high_regs) / len(high_regs)
|
||||
if avg_score < 0.9:
|
||||
segment["risk_level"] = "high"
|
||||
high_count += 1
|
||||
need_fix += 1
|
||||
elif avg_score < 0.92:
|
||||
segment["risk_level"] = "medium"
|
||||
medium_count += 1
|
||||
else:
|
||||
segment["risk_level"] = "low"
|
||||
low_count += 1
|
||||
else:
|
||||
segment["risk_level"] = "low"
|
||||
low_count += 1
|
||||
|
||||
total_score += avg_score if high_regs else 100
|
||||
|
||||
avg_score = total_score / len(segments) if segments else 100
|
||||
|
||||
status = "pass" if avg_score >= 90 else ("warning" if avg_score >= 70 else "fail")
|
||||
status_label = "合规" if status == "pass" else ("需要修改" if status == "warning" else "高风险")
|
||||
|
||||
dashboard = {
|
||||
"score": avg_score,
|
||||
"high_risk_count": high_count,
|
||||
"medium_risk_count": medium_count,
|
||||
"low_risk_count": low_count,
|
||||
"need_fix_segments": need_fix,
|
||||
"status": status,
|
||||
"status_label": status_label,
|
||||
}
|
||||
|
||||
return {"risk_dashboard": dashboard, "segments": segments}
|
||||
|
||||
|
||||
def generate_suggestions(state: ComplianceState) -> dict:
|
||||
"""生成优先建议"""
|
||||
actions = []
|
||||
|
||||
for segment in state["segments"]:
|
||||
for reg in segment.get("regulations", []):
|
||||
if reg["category"] == "high" and reg["score"] < 0.9:
|
||||
actions.append({
|
||||
"regulation": reg["name"],
|
||||
"issue": reg["match_keyword"],
|
||||
"suggestion": f"建议对照{reg['name']}第{reg.get('clause', '')}条进行修改",
|
||||
"severity": "high",
|
||||
})
|
||||
|
||||
return {"priority_actions": actions}
|
||||
|
||||
|
||||
# 构建工作流图
|
||||
compliance_graph = StateGraph(ComplianceState)
|
||||
|
||||
compliance_graph.add_node("parse", parse_document)
|
||||
compliance_graph.add_node("segment", segment_document)
|
||||
compliance_graph.add_node("match", match_regulations)
|
||||
compliance_graph.add_node("score", calculate_risk)
|
||||
compliance_graph.add_node("suggest", generate_suggestions)
|
||||
|
||||
compliance_graph.set_entry_point("parse")
|
||||
compliance_graph.add_edge("parse", "segment")
|
||||
compliance_graph.add_edge("segment", "match")
|
||||
compliance_graph.add_edge("match", "score")
|
||||
compliance_graph.add_edge("score", "suggest")
|
||||
compliance_graph.add_edge("suggest", END)
|
||||
|
||||
compliance_workflow = compliance_graph.compile()
|
||||
|
||||
|
||||
async def run_compliance_workflow(document_path: str) -> ComplianceState:
|
||||
"""运行合规分析工作流"""
|
||||
initial_state: ComplianceState = {"document_path": document_path}
|
||||
result = compliance_workflow.invoke(initial_state)
|
||||
return result
|
||||
114
app/workflows/rag_workflow.py
Normal file
114
app/workflows/rag_workflow.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from typing import TypedDict, List
|
||||
from langgraph.graph import StateGraph, END
|
||||
|
||||
|
||||
class RagState(TypedDict):
|
||||
query: str
|
||||
query_embedding: List[float]
|
||||
retrieved_docs: List[dict]
|
||||
context: str
|
||||
answer: str
|
||||
sources: List[dict]
|
||||
|
||||
|
||||
def embed_query(state: RagState) -> dict:
|
||||
"""将查询转为向量"""
|
||||
from app.services import embedding_service
|
||||
embedding = embedding_service.embed_single(state["query"])
|
||||
return {"query_embedding": embedding}
|
||||
|
||||
|
||||
def retrieve_docs(state: RagState) -> dict:
|
||||
"""向量检索"""
|
||||
from app.services import milvus_service
|
||||
from app.core.config import settings
|
||||
docs = milvus_service.search(
|
||||
state["query_embedding"],
|
||||
top_k=settings.vector_top_k,
|
||||
)
|
||||
return {"retrieved_docs": docs[:settings.final_top_k]}
|
||||
|
||||
|
||||
def build_context(state: RagState) -> dict:
|
||||
"""构建上下文"""
|
||||
context_parts = []
|
||||
sources = []
|
||||
|
||||
for doc in state["retrieved_docs"]:
|
||||
context_parts.append(f"【{doc['doc_name']} - {doc.get('clause_id', '')}】\n{doc['content']}")
|
||||
sources.append({
|
||||
"name": doc["doc_name"],
|
||||
"clause": doc.get("clause_id"),
|
||||
})
|
||||
|
||||
context = "\n\n".join(context_parts)
|
||||
return {"context": context, "sources": sources}
|
||||
|
||||
|
||||
def generate_answer(state: RagState) -> dict:
|
||||
"""生成答案"""
|
||||
from app.services import llm_service
|
||||
prompt = f"""请根据以下法规内容回答用户问题,并在回答中标注引用的法规条款。
|
||||
|
||||
法规内容:
|
||||
{state['context']}
|
||||
|
||||
用户问题:{state['query']}
|
||||
|
||||
请给出准确、简洁的回答,并引用相关法规条款。"""
|
||||
|
||||
answer = ""
|
||||
for chunk in llm_service.generate_stream(prompt):
|
||||
answer += chunk
|
||||
|
||||
return {"answer": answer}
|
||||
|
||||
|
||||
# 构建工作流图
|
||||
rag_graph = StateGraph(RagState)
|
||||
|
||||
rag_graph.add_node("embed", embed_query)
|
||||
rag_graph.add_node("retrieve", retrieve_docs)
|
||||
rag_graph.add_node("build_context", build_context)
|
||||
rag_graph.add_node("generate", generate_answer)
|
||||
|
||||
rag_graph.set_entry_point("embed")
|
||||
rag_graph.add_edge("embed", "retrieve")
|
||||
rag_graph.add_edge("retrieve", "build_context")
|
||||
rag_graph.add_edge("build_context", "generate")
|
||||
rag_graph.add_edge("generate", END)
|
||||
|
||||
rag_workflow = rag_graph.compile()
|
||||
|
||||
|
||||
async def run_rag_workflow(query: str) -> RagState:
|
||||
"""运行RAG工作流"""
|
||||
initial_state: RagState = {"query": query}
|
||||
result = rag_workflow.invoke(initial_state)
|
||||
return result
|
||||
|
||||
|
||||
def stream_rag_workflow(query: str):
|
||||
"""流式运行RAG工作流"""
|
||||
from app.services import llm_service
|
||||
|
||||
# 先完成检索阶段
|
||||
state: RagState = {"query": query}
|
||||
state.update(embed_query(state))
|
||||
state.update(retrieve_docs(state))
|
||||
state.update(build_context(state))
|
||||
|
||||
# 流式生成阶段
|
||||
prompt = f"""请根据以下法规内容回答用户问题,并在回答中标注引用的法规条款。
|
||||
|
||||
法规内容:
|
||||
{state['context']}
|
||||
|
||||
用户问题:{state['query']}
|
||||
|
||||
请给出准确、简洁的回答,并引用相关法规条款。"""
|
||||
|
||||
for chunk in llm_service.generate_stream(prompt):
|
||||
yield {"type": "chunk", "text": chunk}
|
||||
|
||||
yield {"type": "done", "sources": state["sources"]}
|
||||
1
data/raw/compliance_task-32e64724_test_doc.txt
Normal file
1
data/raw/compliance_task-32e64724_test_doc.txt
Normal file
@@ -0,0 +1 @@
|
||||
test content
|
||||
2
data/raw/doc-3b47abd7_requirement.txt
Normal file
2
data/raw/doc-3b47abd7_requirement.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
apache-flink==1.13.2
|
||||
PyMySQL>=1.1.0
|
||||
2
data/raw/doc-9b01a78a_requirement.txt
Normal file
2
data/raw/doc-9b01a78a_requirement.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
apache-flink==1.13.2
|
||||
PyMySQL>=1.1.0
|
||||
6
main.py
Normal file
6
main.py
Normal file
@@ -0,0 +1,6 @@
|
||||
def main():
|
||||
print("Hello from backend!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
7
pyproject.toml
Normal file
7
pyproject.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = []
|
||||
34
requirements.txt
Normal file
34
requirements.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
# Web框架
|
||||
fastapi>=0.110.0
|
||||
uvicorn>=0.27.0
|
||||
|
||||
# LangGraph & LangChain
|
||||
langgraph>=0.0.40
|
||||
langchain>=0.2.0
|
||||
langchain-community>=0.2.0
|
||||
|
||||
# DashScope
|
||||
dashscope>=1.14.0
|
||||
|
||||
# Milvus
|
||||
pymilvus>=2.3.0
|
||||
|
||||
# 文档解析
|
||||
pypdf2>=3.0.0
|
||||
python-docx>=1.1.0
|
||||
pdfplumber>=0.10.0
|
||||
|
||||
# Pydantic配置
|
||||
pydantic>=2.0.0
|
||||
pydantic-settings>=2.0.0
|
||||
|
||||
# 工具
|
||||
python-multipart>=0.0.9
|
||||
sse-starlette>=1.8.0
|
||||
python-dotenv>=1.0.0
|
||||
tiktoken>=0.5.0
|
||||
httpx>=0.25.0
|
||||
|
||||
# 测试
|
||||
pytest>=7.4.0
|
||||
pytest-asyncio>=0.21.0
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user