first commit

This commit is contained in:
2026-04-23 09:58:47 +08:00
commit 448e078d99
49 changed files with 5188 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 00_install_docker_ubuntu.sh
# Ubuntu 22.04 LTS 安装 Docker CE + nvidia-container-toolkit
# 用法bash scripts/00_install_docker_ubuntu.sh
# ══════════════════════════════════════════════════
set -euo pipefail
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
# ── 检查 root 权限 ──────────────────────────────
if [[ $EUID -ne 0 ]]; then
error "请以 root 或 sudo 运行sudo bash scripts/00_install_docker_ubuntu.sh"
fi
# ── 检测 Ubuntu 版本 ────────────────────────────
. /etc/os-release
info "检测到 OS$NAME $VERSION_ID"
if [[ "$ID" != "ubuntu" ]]; then
warn "非 Ubuntu 系统脚本可能不适用。继续y/n"
read -r ans; [[ "$ans" != "y" ]] && exit 0
fi
# ── Step 1换国内源可选──────────────────────
info "Step 1/5配置 APT 源..."
if [[ "${USE_MIRROR:-false}" == "true" ]]; then
sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list
ok "已切换到阿里云镜像"
fi
apt-get update -qq
# ── Step 2安装依赖 ────────────────────────────
info "Step 2/5安装依赖包..."
apt-get install -y -qq \
ca-certificates \
curl \
gnupg \
lsb-release \
apt-transport-https
# ── Step 3安装 Docker CE ──────────────────────
info "Step 3/5安装 Docker CE..."
if command -v docker &>/dev/null; then
DOCKER_VER=$(docker --version)
warn "Docker 已安装:$DOCKER_VER"
warn "跳过 Docker 安装。如需重装请先运行apt-get remove docker docker-engine docker.io containerd"
else
# 添加 Docker 官方 GPG 密钥
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \
gpg --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
# 添加 Docker 仓库
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \
https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update -qq
apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# 启动并设置开机自启
systemctl enable docker
systemctl start docker
ok "Docker CE 安装完成"
fi
# 验证
docker --version
docker compose version
# ── Step 4将当前用户加入 docker 组 ────────────
info "Step 4/5配置 Docker 用户组..."
CURRENT_USER=${SUDO_USER:-$USER}
if [[ -n "$CURRENT_USER" && "$CURRENT_USER" != "root" ]]; then
usermod -aG docker "$CURRENT_USER"
ok "用户 $CURRENT_USER 已加入 docker 组(重新登录后生效)"
fi
# ── Step 5安装 nvidia-container-toolkit可选
info "Step 5/5检查 NVIDIA GPU..."
if command -v nvidia-smi &>/dev/null; then
info "检测到 NVIDIA GPU安装 nvidia-container-toolkit..."
nvidia-smi --query-gpu=name --format=csv,noheader
# 添加 NVIDIA 仓库
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update -qq
apt-get install -y -qq nvidia-container-toolkit
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
ok "nvidia-container-toolkit 安装完成"
else
warn "未检测到 NVIDIA GPU跳过 nvidia-container-toolkit 安装"
warn "如有 GPU 请手动安装驱动后重新运行本脚本"
fi
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} Docker 安装完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo " Docker 版本:$(docker --version)"
echo " Compose 版本:$(docker compose version)"
echo ""
echo -e "${YELLOW} 注意:${NC}请重新登录以使 docker 组权限生效"
echo " 验证命令docker run hello-world"

View File

@@ -0,0 +1,105 @@
# ══════════════════════════════════════════════════
# 00_install_docker_windows.ps1
# Windows 11 安装 Docker Desktop + WSL2 配置
# 用法:以管理员身份运行 PowerShell执行
# .\scripts\00_install_docker_windows.ps1
# ══════════════════════════════════════════════════
#Requires -RunAsAdministrator
$ErrorActionPreference = "Stop"
function Write-Info { Write-Host "[INFO] $args" -ForegroundColor Cyan }
function Write-Ok { Write-Host "[OK] $args" -ForegroundColor Green }
function Write-Warn { Write-Host "[WARN] $args" -ForegroundColor Yellow }
function Write-Err { Write-Host "[ERR] $args" -ForegroundColor Red; exit 1 }
Write-Info "============================================"
Write-Info "AI合规智能中枢 — Windows Docker 环境安装"
Write-Info "============================================"
# ── Step 1启用 WSL2 ──────────────────────────
Write-Info "Step 1/4检查并启用 WSL2..."
$wslFeature = Get-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux
$vmFeature = Get-WindowsOptionalFeature -Online -FeatureName VirtualMachinePlatform
if ($wslFeature.State -ne "Enabled") {
Write-Info "启用 WSL 功能..."
Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux -NoRestart
}
if ($vmFeature.State -ne "Enabled") {
Write-Info "启用虚拟机平台..."
Enable-WindowsOptionalFeature -Online -FeatureName VirtualMachinePlatform -NoRestart
}
# 更新 WSL 内核
Write-Info "更新 WSL2 内核..."
wsl --update
wsl --set-default-version 2
Write-Ok "WSL2 配置完成"
# ── Step 2安装 Ubuntu WSL 发行版 ─────────────
Write-Info "Step 2/4检查 Ubuntu WSL..."
$wslList = wsl --list --quiet 2>$null
if ($wslList -notmatch "Ubuntu") {
Write-Info "安装 Ubuntu 22.04..."
wsl --install -d Ubuntu-22.04
Write-Ok "Ubuntu 22.04 安装完成(首次运行需要设置用户名和密码)"
} else {
Write-Ok "Ubuntu WSL 已安装"
wsl --list --verbose
}
# ── Step 3安装 Docker Desktop ────────────────
Write-Info "Step 3/4检查 Docker Desktop..."
$dockerCmd = Get-Command docker -ErrorAction SilentlyContinue
if ($dockerCmd) {
Write-Ok "Docker 已安装:$(docker --version)"
} else {
# 尝试用 winget 安装
$winget = Get-Command winget -ErrorAction SilentlyContinue
if ($winget) {
Write-Info "通过 winget 安装 Docker Desktop..."
winget install -e --id Docker.DockerDesktop --accept-package-agreements --accept-source-agreements
Write-Ok "Docker Desktop 安装完成"
} else {
Write-Warn "未找到 winget请手动安装 Docker Desktop"
Write-Warn "下载地址https://www.docker.com/products/docker-desktop/"
Write-Warn "安装时勾选Use WSL 2 instead of Hyper-V"
Start-Process "https://www.docker.com/products/docker-desktop/"
Read-Host "安装完成后按 Enter 继续"
}
}
# ── Step 4配置 Docker Desktop WSL 集成 ───────
Write-Info "Step 4/4提示 Docker Desktop 配置..."
Write-Warn ""
Write-Warn "请确认 Docker Desktop 已进行以下配置:"
Write-Warn " 1. Settings → General → 勾选 'Use WSL 2 based engine'"
Write-Warn " 2. Settings → Resources → WSL Integration → 开启 Ubuntu-22.04"
Write-Warn " 3. 如有 NVIDIA GPU"
Write-Warn " Settings → General → 勾选 'Use GPU with WSL 2'"
Write-Warn ""
# ── 验证 ───────────────────────────────────────
Write-Info "验证安装..."
try {
$dockerVer = docker --version
$composeVer = docker compose version
Write-Ok "Docker: $dockerVer"
Write-Ok "Compose: $composeVer"
} catch {
Write-Warn "Docker 命令不可用,可能需要重启后再验证"
Write-Warn "重启后运行docker run hello-world"
}
Write-Host ""
Write-Host "============================================" -ForegroundColor Green
Write-Host " 安装完成!" -ForegroundColor Green
Write-Host "============================================" -ForegroundColor Green
Write-Host ""
Write-Host "后续步骤(在 WSL2 Ubuntu 中执行):" -ForegroundColor Yellow
Write-Host " 1. 打开 Ubuntu WSL 终端"
Write-Host " 2. cd /mnt/c/Projects/AIProjects/AIRegulations/Depolyment"
Write-Host " 3. bash scripts/01_setup_project.sh"
Write-Host ""
Write-Host "如需重启系统请现在重启,然后继续操作。" -ForegroundColor Yellow

View File

@@ -0,0 +1,73 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 01_setup_project.sh
# 初始化项目:创建目录、生成 .env 文件
# 用法bash scripts/01_setup_project.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
cd "$PROJECT_DIR"
info "项目目录:$PROJECT_DIR"
# ── 创建运行时目录 ──────────────────────────────
info "创建运行时目录..."
mkdir -p data/uploads data/parsed logs models
mkdir -p services/embedding services/mcp-server
mkdir -p services/compliance-backend/app/{core,api,services,models}
ok "目录结构创建完成"
# ── 复制 .env 文件 ──────────────────────────────
if [[ ! -f ".env" ]]; then
cp .env.example .env
warn "已创建 .env 文件,请编辑并填写必要配置:"
warn " 必填DEEPSEEK_API_KEY或 DASHSCOPE_API_KEY"
warn " 可选:修改各组件密码"
echo ""
echo -e "${YELLOW}是否现在编辑 .env 文件?(y/n)${NC}"
read -r ans
if [[ "$ans" == "y" ]]; then
${EDITOR:-nano} .env
fi
else
ok ".env 文件已存在,跳过复制"
fi
# ── 验证 .env 关键字段 ──────────────────────────
info "验证 .env 配置..."
source .env 2>/dev/null || true
if [[ -z "${DEEPSEEK_API_KEY:-}" && -z "${DASHSCOPE_API_KEY:-}" ]]; then
warn "⚠️ 未设置 LLM API Key"
warn " 请在 .env 中设置 DEEPSEEK_API_KEY 或 DASHSCOPE_API_KEY"
warn " DeepSeek 申请https://platform.deepseek.com"
else
ok "LLM API Key 已配置"
fi
# ── 验证 Docker ─────────────────────────────────
info "检查 Docker 环境..."
if ! command -v docker &>/dev/null; then
warn "Docker 未安装请先运行bash scripts/00_install_docker_ubuntu.sh"
exit 1
fi
docker compose version > /dev/null
ok "Docker Compose 可用:$(docker compose version)"
# ── 显示下一步 ──────────────────────────────────
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 项目初始化完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo "下一步操作:"
echo " 1. 拉取镜像可选较慢bash scripts/02_pull_images.sh"
echo " 2. 启动全部服务: bash scripts/06_start_all.sh"
echo " 3. 检查健康状态: bash scripts/check_health.sh"

46
scripts/02_pull_images.sh Normal file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 02_pull_images.sh
# 预拉取所有 Docker 镜像(离线/弱网环境准备)
# 用法bash scripts/02_pull_images.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
BLUE='\033[0;34m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
# 所有基础镜像列表
IMAGES=(
"pgvector/pgvector:pg16"
"redis:7-alpine"
"quay.io/coreos/etcd:v3.5.5"
"minio/minio:RELEASE.2023-03-13T19-46-17Z"
"milvusdb/milvus:v2.4.13"
"neo4j:5.20-community"
"nginx:1.25-alpine"
"grafana/grafana:11.0.0"
"prom/prometheus:v2.51.0"
)
info "开始拉取 ${#IMAGES[@]} 个基础镜像..."
echo ""
for img in "${IMAGES[@]}"; do
info "拉取:$img"
docker pull "$img"
ok "完成:$img"
echo ""
done
info "所有基础镜像拉取完成"
echo ""
info "自定义服务镜像embedding/mcp/backend将在 build 时自动拉取基础层"
echo ""
echo -e "${YELLOW}提示:如在国内网络环境下 quay.io 或 milvusdb 拉取慢,${NC}"
echo -e "${YELLOW}可配置 Docker 镜像加速器:/etc/docker/daemon.json${NC}"
echo ' {"registry-mirrors": ["https://docker.mirrors.ustc.edu.cn"]}'

93
scripts/03_start_infra.sh Normal file
View File

@@ -0,0 +1,93 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 03_start_infra.sh
# 分步启动基础设施(含健康等待),顺序:
# PostgreSQL + Redis → etcd + MinIO → Milvus → Neo4j
# 用法bash scripts/03_start_infra.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
# 等待服务健康的函数
wait_healthy() {
local service=$1
local max_wait=${2:-120}
local interval=5
local elapsed=0
info "等待 $service 健康就绪..."
while [[ $elapsed -lt $max_wait ]]; do
local status
status=$(docker compose ps --format json "$service" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('Health','unknown'))" 2>/dev/null || echo "unknown")
if [[ "$status" == "healthy" ]]; then
ok "$service 已就绪"
return 0
fi
echo -n "."
sleep $interval
elapsed=$((elapsed + interval))
done
echo ""
error "$service 等待超时(${max_wait}s请检查docker compose logs $service"
}
info "══════════════════════════════════════════"
info " 启动基础设施层"
info "══════════════════════════════════════════"
# ── Step 1PostgreSQL + Redis ──────────────────
info "Step 1/4启动 PostgreSQL 和 Redis..."
docker compose up -d postgres redis
wait_healthy postgres 90
wait_healthy redis 30
ok "数据层就绪"
# ── Step 2etcd + MinIOMilvus 依赖)─────────
info "Step 2/4启动 etcd 和 MinIOMilvus 依赖)..."
docker compose up -d etcd minio
wait_healthy etcd 60
wait_healthy minio 60
ok "对象存储层就绪"
# ── Step 3Milvus ──────────────────────────────
info "Step 3/4启动 Milvus向量数据库..."
docker compose up -d milvus
info "Milvus 初始化需要约 60 秒,请耐心等待..."
wait_healthy milvus 180
ok "Milvus 就绪"
# ── Step 4Neo4j ───────────────────────────────
info "Step 4/4启动 Neo4j知识图谱..."
docker compose up -d neo4j
wait_healthy neo4j 120
ok "Neo4j 就绪"
# ── 汇总 ────────────────────────────────────────
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 基础设施启动完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo " PostgreSQL : localhost:5432"
echo " Redis : localhost:6379"
echo " Milvus : localhost:19530 (gRPC), localhost:9091 (HTTP)"
echo " Neo4j : localhost:7474 (Browser), localhost:7687 (Bolt)"
echo " MinIO 控制台: localhost:9001 (admin/minioadmin)"
echo ""
echo "下一步bash scripts/04_build_services.sh"

View File

@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 04_build_services.sh
# 构建自定义服务 Docker 镜像
# embedding-service / mcp-server / compliance-backend
# 用法bash scripts/04_build_services.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
BLUE='\033[0;34m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
info "══════════════════════════════════════════"
info " 构建自定义服务镜像"
info "══════════════════════════════════════════"
warn "首次构建较慢(需下载 Python 依赖 + AI 模型)"
warn "BGE-M3 模型约 2.5GBMinerU 模型约 2GB"
echo ""
# ── 构建嵌入服务 ────────────────────────────────
info "构建 embedding-serviceBGE-M3..."
START=$(date +%s)
docker compose build embedding-service
END=$(date +%s)
ok "embedding-service 构建完成($(( END - START ))s"
echo ""
# ── 构建 MinerU 解析服务 ────────────────────────
info "构建 mcp-serverMinerU..."
START=$(date +%s)
docker compose build mcp-server
END=$(date +%s)
ok "mcp-server 构建完成($(( END - START ))s"
echo ""
# ── 构建业务后端 ────────────────────────────────
info "构建 compliance-backend..."
START=$(date +%s)
docker compose build compliance-backend
END=$(date +%s)
ok "compliance-backend 构建完成($(( END - START ))s"
echo ""
# ── 列出构建的镜像 ──────────────────────────────
info "已构建的镜像:"
docker images | grep -E "compliance-(embedding|mcp|backend)" || true
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 所有服务镜像构建完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo "下一步bash scripts/05_init_db.sh"

124
scripts/05_init_db.sh Normal file
View File

@@ -0,0 +1,124 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 05_init_db.sh
# 初始化数据库PostgreSQL Schema + Milvus Collections + Neo4j Constraints
# 用法bash scripts/05_init_db.sh
# 前提postgres / milvus / neo4j 已运行且健康
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
source .env 2>/dev/null || true
POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-compliance123}
NEO4J_PASSWORD=${NEO4J_PASSWORD:-neo4j123}
# ── Step 1PostgreSQL Schema ───────────────────
info "Step 1/3初始化 PostgreSQL Schema..."
if docker compose ps postgres | grep -q "healthy"; then
docker compose exec -T postgres psql \
-U compliance -d compliance_db \
-f /docker-entrypoint-initdb.d/01_init_schema.sql \
2>&1 | tail -5 || warn "SQL 可能部分已存在IF NOT EXISTS这是正常的"
ok "PostgreSQL Schema 初始化完成"
else
error "PostgreSQL 未运行请先执行bash scripts/03_start_infra.sh"
fi
# ── Step 2Milvus Collections ──────────────────
info "Step 2/3初始化 Milvus Collections..."
if docker compose ps milvus | grep -q "healthy"; then
docker compose run --rm --no-deps compliance-backend \
python3 -c "
import asyncio
from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType, utility
connections.connect(host='milvus', port='19530')
print('Milvus 连接成功')
def create_collection(name, description):
if utility.has_collection(name):
print(f' Collection {name} 已存在,跳过')
return
fields = [
FieldSchema(name='id', dtype=DataType.VARCHAR, is_primary=True, max_length=128),
FieldSchema(name='file_id', dtype=DataType.VARCHAR, max_length=128),
FieldSchema(name='workspace_id', dtype=DataType.VARCHAR, max_length=128),
FieldSchema(name='chunk_idx', dtype=DataType.INT64),
FieldSchema(name='content', dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name='dense_vec', dtype=DataType.FLOAT_VECTOR, dim=1024), # BGE-M3 dense
FieldSchema(name='metadata', dtype=DataType.JSON),
]
schema = CollectionSchema(fields, description=description)
col = Collection(name, schema)
# 创建向量索引HNSW适合调研阶段
index_params = {
'metric_type': 'COSINE',
'index_type': 'HNSW',
'params': {'M': 16, 'efConstruction': 200}
}
col.create_index('dense_vec', index_params)
col.load()
print(f' Collection {name} 创建完成')
create_collection('regulation_chunks', '法规条款向量库')
create_collection('doc_chunks', '企业文档向量库')
create_collection('case_library', '行业案例库')
print('Milvus 初始化完成')
" 2>&1
ok "Milvus Collections 初始化完成"
else
error "Milvus 未运行请先执行bash scripts/03_start_infra.sh"
fi
# ── Step 3Neo4j 约束和索引 ────────────────────
info "Step 3/3初始化 Neo4j 约束和索引..."
sleep 5 # Neo4j 可能还在预热
docker compose exec -T neo4j cypher-shell \
-u neo4j -p "$NEO4J_PASSWORD" \
--format plain <<'CYPHER'
// 节点约束(唯一性)
CREATE CONSTRAINT regulation_id IF NOT EXISTS
FOR (r:Regulation) REQUIRE r.id IS UNIQUE;
CREATE CONSTRAINT clause_id IF NOT EXISTS
FOR (c:Clause) REQUIRE c.id IS UNIQUE;
CREATE CONSTRAINT obligation_id IF NOT EXISTS
FOR (o:Obligation) REQUIRE o.id IS UNIQUE;
// 全文索引(模糊查询)
CREATE FULLTEXT INDEX regulation_fulltext IF NOT EXISTS
FOR (r:Regulation) ON EACH [r.title, r.code, r.domain];
CREATE FULLTEXT INDEX clause_fulltext IF NOT EXISTS
FOR (c:Clause) ON EACH [c.content, c.title];
// 插入示例节点(验证连通性)
MERGE (d:Domain {name: 'vehicle_safety', label: '车辆安全法规'});
MERGE (d:Domain {name: 'data_security', label: '数据安全法规'});
MERGE (d:Domain {name: 'ehs', label: 'EHS安全法规'});
MERGE (d:Domain {name: 'carbon', label: '碳排放法规'});
RETURN '初始化完成' AS result;
CYPHER
ok "Neo4j 约束和索引初始化完成"
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 数据库初始化完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo " PostgreSQL: 所有表已创建"
echo " Milvus: regulation_chunks / doc_chunks / case_library"
echo " Neo4j: 约束 + 全文索引 + 基础域节点"
echo ""
echo "下一步bash scripts/06_start_all.sh"

98
scripts/06_start_all.sh Normal file
View File

@@ -0,0 +1,98 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 06_start_all.sh
# 一键启动所有服务(完整流程)
# 用法bash scripts/06_start_all.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
echo ""
echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ AI合规智能中枢 — 全服务启动 ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
echo ""
# ── 前置检查 ────────────────────────────────────
if [[ ! -f ".env" ]]; then
error ".env 文件不存在请先运行bash scripts/01_setup_project.sh"
fi
source .env 2>/dev/null || true
if [[ -z "${DEEPSEEK_API_KEY:-}" && -z "${DASHSCOPE_API_KEY:-}" ]]; then
warn "⚠️ 未设置 LLM API KeyLLM 功能将不可用"
warn "请在 .env 中设置 DEEPSEEK_API_KEY 或 DASHSCOPE_API_KEY"
echo ""
fi
# ── Phase 1基础设施 ────────────────────────────
info "Phase 1/4启动基础设施..."
bash "$SCRIPT_DIR/03_start_infra.sh"
echo ""
# ── Phase 2构建服务镜像 ────────────────────────
info "Phase 2/4构建服务镜像首次较慢..."
docker compose build embedding-service mcp-server compliance-backend 2>&1 | \
grep -E "(Step|Successfully|=>|ERROR)" || true
ok "镜像构建完成"
echo ""
# ── Phase 3初始化数据库 ────────────────────────
info "Phase 3/4初始化数据库..."
bash "$SCRIPT_DIR/05_init_db.sh"
echo ""
# ── Phase 4启动所有服务 ────────────────────────
info "Phase 4/4启动 AI 模型服务和业务服务..."
docker compose up -d embedding-service mcp-server
info "等待 AI 模型加载BGE-M3/MinerU 约需 2-3 分钟)..."
sleep 30
# 等待嵌入服务就绪
for i in {1..20}; do
if curl -sf http://localhost:8010/health > /dev/null 2>&1; then
ok "embedding-service 就绪"
break
fi
echo -n "."
sleep 10
done
docker compose up -d compliance-backend celery-worker celery-beat nginx
info "等待业务服务启动..."
sleep 15
for i in {1..12}; do
if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
ok "compliance-backend 就绪"
break
fi
echo -n "."
sleep 5
done
# ── 最终状态 ────────────────────────────────────
echo ""
echo -e "${GREEN}╔══════════════════════════════════════════╗${NC}"
echo -e "${GREEN}║ 所有服务启动完成! ║${NC}"
echo -e "${GREEN}╚══════════════════════════════════════════╝${NC}"
echo ""
docker compose ps --format "table {{.Service}}\t{{.Status}}\t{{.Ports}}"
echo ""
echo -e "${BLUE}访问地址:${NC}"
echo " API 网关 : http://localhost"
echo " API 文档 : http://localhost/docs"
echo " Neo4j 浏览器 : http://localhost:7474"
echo " MinIO 控制台 : http://localhost:9001"
echo ""
echo -e "${YELLOW}运行冒烟测试:${NC}"
echo " bash scripts/07_smoke_test.sh"

183
scripts/07_smoke_test.sh Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# 07_smoke_test.sh
# 端到端冒烟测试:验证三条业务闭环
# 用法bash scripts/07_smoke_test.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[✓]${NC} $*"; }
fail() { echo -e "${RED}[✗]${NC} $*"; FAILED=$((FAILED+1)); }
warn() { echo -e "${YELLOW}[~]${NC} $*"; }
FAILED=0
API_BASE="http://localhost"
echo ""
echo -e "${BLUE}══════════════════════════════════════════${NC}"
echo -e "${BLUE} AI合规智能中枢 端到端冒烟测试${NC}"
echo -e "${BLUE}══════════════════════════════════════════${NC}"
echo ""
# ── 基础健康检查 ────────────────────────────────
info "=== 基础设施健康检查 ==="
check_service() {
local name=$1; local url=$2
if curl -sf "$url" > /dev/null 2>&1; then
ok "$name"
else
fail "$name$url 不可达)"
fi
}
check_service "API 网关 (Nginx)" "http://localhost/health"
check_service "业务后端 (FastAPI)" "http://localhost:8000/health"
check_service "嵌入服务 (BGE-M3)" "http://localhost:8010/health"
check_service "解析服务 (MinerU)" "http://localhost:8011/health"
check_service "Milvus HTTP" "http://localhost:9091/healthz"
check_service "Neo4j Browser" "http://localhost:7474"
echo ""
# ── 嵌入服务测试 ────────────────────────────────
info "=== 嵌入服务测试 ==="
EMBED_RESP=$(curl -sf -X POST http://localhost:8010/embed \
-H "Content-Type: application/json" \
-d '{"texts": ["GB 18384 电动汽车碰撞安全要求"], "batch_size": 1}' 2>/dev/null || echo "{}")
if echo "$EMBED_RESP" | python3 -c "import sys,json; d=json.load(sys.stdin); assert len(d.get('dense',[])[0])==1024" 2>/dev/null; then
ok "BGE-M3 嵌入:返回 1024 维向量"
else
fail "BGE-M3 嵌入失败,响应:${EMBED_RESP:0:200}"
fi
echo ""
# ── 创建测试 PDF ────────────────────────────────
info "=== 创建测试文档 ==="
TEST_PDF="$PROJECT_DIR/data/uploads/test_regulation.txt"
cat > "$TEST_PDF" << 'EOF'
GB 18384-2020 电动汽车安全要求
第一章 总则
本标准规定了电动汽车的安全要求适用于M1类纯电动汽车。
第二章 电气安全
2.1 绝缘电阻要求
直流电路绝缘电阻不得低于100Ω/V。
2.2 碰撞安全
车辆碰撞后,高压电系统应自动断电。
碰撞后5秒内高压系统电压应降至60V以下。
第三章 防水要求
高压系统防护等级应达到IP67。
EOF
ok "测试文档创建:$TEST_PDF"
echo ""
# ── 闭环①:文件上传 → 向量化 → 问答 ───────────
info "=== 闭环①:法规入库 → 检索问答 ==="
# 创建工作空间
WORKSPACE_RESP=$(curl -sf -X POST "$API_BASE/api/kb/workspaces" \
-H "Content-Type: application/json" \
-d '{"name": "测试法规库", "domain": "vehicle_safety"}' 2>/dev/null || echo "{}")
WS_ID=$(echo "$WORKSPACE_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
if [[ -n "$WS_ID" ]]; then
ok "工作空间创建:$WS_ID"
else
warn "工作空间创建失败(可能接口未完全实现),跳过后续上传测试"
WS_ID="test-workspace"
fi
# 上传文件
UPLOAD_RESP=$(curl -sf -X POST "$API_BASE/api/kb/files/upload" \
-F "file=@$TEST_PDF" \
-F "workspace_id=$WS_ID" 2>/dev/null || echo "{}")
TASK_ID=$(echo "$UPLOAD_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('task_id',''))" 2>/dev/null || echo "")
if [[ -n "$TASK_ID" ]]; then
ok "文件上传任务已创建:$TASK_ID"
# 轮询任务状态最多等待120秒
info "等待向量化完成..."
for i in {1..24}; do
TASK_STATUS=$(curl -sf "$API_BASE/api/kb/tasks/$TASK_ID" 2>/dev/null | \
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))" 2>/dev/null || echo "unknown")
if [[ "$TASK_STATUS" == "completed" ]]; then
ok "向量化完成(${i}×5s"
break
elif [[ "$TASK_STATUS" == "failed" ]]; then
fail "向量化失败"
break
fi
echo -n "."
sleep 5
done
echo ""
# 检索问答
QA_RESP=$(curl -sf -X POST "$API_BASE/api/kb/qa" \
-H "Content-Type: application/json" \
-d "{\"query\": \"碰撞后高压系统电压要求\", \"workspace_id\": \"$WS_ID\", \"top_k\": 3}" 2>/dev/null || echo "{}")
ANSWER=$(echo "$QA_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('answer','')[:100])" 2>/dev/null || echo "")
if [[ -n "$ANSWER" ]]; then
ok "问答成功:${ANSWER}..."
else
warn "问答返回空LLM API 可能未配置或响应缓慢)"
fi
else
warn "文件上传失败(接口可能未实现)"
fi
echo ""
# ── 闭环②:合规审查 ────────────────────────────
info "=== 闭环②:文档上传 → 合规审查 ==="
CHECK_RESP=$(curl -sf -X POST "$API_BASE/api/compliance/check" \
-H "Content-Type: application/json" \
-d '{"query": "供应商文件是否符合GB 18384碰撞安全要求", "domains": ["vehicle_safety"]}' 2>/dev/null || echo "{}")
RISK=$(echo "$CHECK_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('risk_level','unknown'))" 2>/dev/null || echo "unknown")
if [[ "$RISK" != "unknown" && -n "$RISK" ]]; then
ok "合规审查完成,风险等级:$RISK"
else
warn "合规审查接口返回空(功能可能未完全实现)"
fi
echo ""
# ── 闭环③:法规监控 ────────────────────────────
info "=== 闭环③:法规监控源配置 ==="
SOURCE_RESP=$(curl -sf -X POST "$API_BASE/api/regulation/sources" \
-H "Content-Type: application/json" \
-d '{"name": "测试监控源", "url": "https://std.samr.gov.cn", "domain": "vehicle_safety"}' 2>/dev/null || echo "{}")
SOURCE_ID=$(echo "$SOURCE_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
if [[ -n "$SOURCE_ID" ]]; then
ok "监控源配置成功:$SOURCE_ID"
else
warn "监控源配置返回空(功能可能未完全实现)"
fi
echo ""
# ── 汇总 ────────────────────────────────────────
echo ""
echo -e "${BLUE}══════════════════════════════════════════${NC}"
if [[ $FAILED -eq 0 ]]; then
echo -e "${GREEN} 全部检查通过!${NC}"
else
echo -e "${YELLOW} 完成,${FAILED} 项失败${NC}(部分功能可能尚未实现)"
fi
echo -e "${BLUE}══════════════════════════════════════════${NC}"
echo ""
echo "查看服务日志:"
echo " docker compose logs -f compliance-backend"
echo " docker compose logs -f celery-worker"

66
scripts/check_health.sh Normal file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# check_health.sh
# 检查所有服务的健康状态和资源使用
# 用法bash scripts/check_health.sh
# ══════════════════════════════════════════════════
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
echo ""
echo -e "${BLUE}══════════════════════════════════════════${NC}"
echo -e "${BLUE} 服务健康检查报告${NC}"
echo -e "${BLUE}══════════════════════════════════════════${NC}"
echo ""
# Docker 服务状态
echo -e "${BLUE}【Docker Compose 服务状态】${NC}"
docker compose ps --format "table {{.Service}}\t{{.Status}}\t{{.Ports}}"
echo ""
# HTTP 端点检查
echo -e "${BLUE}【HTTP 健康端点】${NC}"
check_http() {
local name=$1; local url=$2
if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then
echo -e " ${GREEN}[OK]${NC} $name ($url)"
else
echo -e " ${RED}[FAIL]${NC} $name ($url)"
fi
}
check_http "API 网关" "http://localhost/health"
check_http "业务后端" "http://localhost:8000/health"
check_http "嵌入服务" "http://localhost:8010/health"
check_http "解析服务" "http://localhost:8011/health"
check_http "Milvus" "http://localhost:9091/healthz"
check_http "Neo4j" "http://localhost:7474"
echo ""
# 资源使用
echo -e "${BLUE}【容器资源使用】${NC}"
docker stats --no-stream --format \
"table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" \
2>/dev/null | head -15
echo ""
# 磁盘使用
echo -e "${BLUE}【磁盘使用】${NC}"
df -h . | tail -1 | awk '{print " 项目目录:已用 "$3",可用 "$4"" $5 " 使用率)"}'
docker system df 2>/dev/null | head -6
echo ""
# LLM 配置检查
echo -e "${BLUE}【LLM API 配置】${NC}"
source .env 2>/dev/null || true
if [[ -n "${DEEPSEEK_API_KEY:-}" ]]; then
echo -e " ${GREEN}[OK]${NC} DeepSeek API Key 已配置"
elif [[ -n "${DASHSCOPE_API_KEY:-}" ]]; then
echo -e " ${GREEN}[OK]${NC} DashScope (Qwen) API Key 已配置"
else
echo -e " ${YELLOW}[WARN]${NC} 未配置 LLM API KeyLLM 功能不可用)"
fi
echo ""

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# download_models.sh
# 预下载 AI 模型到 ./models 目录(加速容器启动)
# 支持 HuggingFace 镜像加速(国内网络)
# 用法bash scripts/download_models.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
BLUE='\033[0;34m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
MODELS_DIR="$PROJECT_DIR/models"
mkdir -p "$MODELS_DIR"
# 设置镜像加速
export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}"
export HF_HOME="$MODELS_DIR"
info "HuggingFace 镜像:$HF_ENDPOINT"
info "模型保存路径:$MODELS_DIR"
echo ""
# ── 方法1通过 huggingface_hub 下载 ────────────
download_hf() {
local repo=$1; local local_name=$2
info "下载 $repo..."
if python3 -c "
import os
os.environ['HF_ENDPOINT'] = '${HF_ENDPOINT}'
os.environ['HF_HOME'] = '${MODELS_DIR}'
from huggingface_hub import snapshot_download
snapshot_download(repo_id='$repo', cache_dir='${MODELS_DIR}')
print('下载完成')
" 2>&1; then
ok "$repo 下载成功"
else
warn "$repo HuggingFace 下载失败,尝试 ModelScope..."
download_modelscope "$repo" "$local_name"
fi
}
# ── 方法2通过 ModelScope 下载(备用)──────────
download_modelscope() {
local hf_name=$1
local ms_name=${2:-$1}
python3 -c "
try:
from modelscope import snapshot_download
snapshot_download(model_id='$ms_name', cache_dir='${MODELS_DIR}/modelscope')
print('ModelScope 下载完成')
except ImportError:
print('ModelScope 未安装,跳过')
except Exception as e:
print(f'ModelScope 下载失败: {e}')
" 2>&1 || warn "ModelScope 下载也失败,模型将在容器启动时自动下载"
}
# ── 检查 Python 环境 ────────────────────────────
if ! python3 -c "import huggingface_hub" 2>/dev/null; then
warn "未安装 huggingface_hub尝试安装..."
pip3 install -q huggingface_hub modelscope 2>/dev/null || \
warn "安装失败,模型将在容器首次启动时下载"
fi
# ── 下载模型列表 ────────────────────────────────
info "=== 下载 BGE-M3 嵌入模型(约 2.5GB==="
download_hf "BAAI/bge-m3" "BAAI/bge-m3"
echo ""
info "=== 下载 BGE-Reranker 精排模型(约 1.1GB==="
download_hf "BAAI/bge-reranker-v2-m3" "BAAI/bge-reranker-v2-m3"
echo ""
# MinerU 模型通过容器内脚本下载(依赖 magic-pdf 配置)
info "=== MinerU 模型说明 ==="
warn "MinerU 模型(约 2GB将在 mcp-server 容器首次启动时自动下载"
warn "如需预下载,请在 mcp-server 容器内运行mineru-models-download"
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 模型下载完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo "已下载到:$MODELS_DIR"
du -sh "$MODELS_DIR" 2>/dev/null || true

37
scripts/reset_all.sh Normal file
View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# reset_all.sh
# ⚠️ 危险操作:停止所有服务并删除所有数据(慎用!)
# 用法bash scripts/reset_all.sh
# ══════════════════════════════════════════════════
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
echo ""
echo -e "${RED}╔══════════════════════════════════════════╗${NC}"
echo -e "${RED}║ ⚠️ 警告:此操作将删除所有数据! ║${NC}"
echo -e "${RED}║ 包括PostgreSQL / Milvus / Neo4j 数据 ║${NC}"
echo -e "${RED}║ 以及所有上传的文件和日志 ║${NC}"
echo -e "${RED}╚══════════════════════════════════════════╝${NC}"
echo ""
echo -e "${YELLOW}确认要重置所有数据吗?(输入 'yes' 确认,其他取消)${NC}"
read -r CONFIRM
if [[ "$CONFIRM" != "yes" ]]; then
echo "已取消"
exit 0
fi
echo ""
echo "停止所有服务..."
docker compose down --volumes --remove-orphans
echo "清理数据目录..."
rm -rf data/uploads/* data/parsed/* logs/*
echo "✓ 数据目录已清空(保留目录结构)"
echo ""
echo -e "${YELLOW}重置完成。重新启动bash scripts/06_start_all.sh${NC}"