#!/usr/bin/env bash # ══════════════════════════════════════════════════ # 05_init_db.sh # 初始化数据库:PostgreSQL Schema + Milvus Collections + Neo4j Constraints # 用法:bash scripts/05_init_db.sh # 前提:postgres / milvus / neo4j 已运行且健康 # ══════════════════════════════════════════════════ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" cd "$PROJECT_DIR" RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m' info() { echo -e "${BLUE}[INFO]${NC} $*"; } ok() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } source .env 2>/dev/null || true POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-compliance123} NEO4J_PASSWORD=${NEO4J_PASSWORD:-neo4j123} # ── Step 1:PostgreSQL Schema ─────────────────── info "Step 1/3:初始化 PostgreSQL Schema..." if docker compose ps postgres | grep -q "healthy"; then docker compose exec -T postgres psql \ -U compliance -d compliance_db \ -f /docker-entrypoint-initdb.d/01_init_schema.sql \ 2>&1 | tail -5 || warn "SQL 可能部分已存在(IF NOT EXISTS),这是正常的" ok "PostgreSQL Schema 初始化完成" else error "PostgreSQL 未运行,请先执行:bash scripts/03_start_infra.sh" fi # ── Step 2:Milvus Collections ────────────────── info "Step 2/3:初始化 Milvus Collections..." if docker compose ps milvus | grep -q "healthy"; then docker compose run --rm --no-deps compliance-backend \ python3 -c " import asyncio from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType, utility connections.connect(host='milvus', port='19530') print('Milvus 连接成功') def create_collection(name, description): if utility.has_collection(name): print(f' Collection {name} 已存在,跳过') return fields = [ FieldSchema(name='id', dtype=DataType.VARCHAR, is_primary=True, max_length=128), FieldSchema(name='file_id', dtype=DataType.VARCHAR, max_length=128), FieldSchema(name='workspace_id', dtype=DataType.VARCHAR, max_length=128), FieldSchema(name='chunk_idx', dtype=DataType.INT64), FieldSchema(name='content', dtype=DataType.VARCHAR, max_length=65535), FieldSchema(name='dense_vec', dtype=DataType.FLOAT_VECTOR, dim=1024), # BGE-M3 dense FieldSchema(name='metadata', dtype=DataType.JSON), ] schema = CollectionSchema(fields, description=description) col = Collection(name, schema) # 创建向量索引(HNSW,适合调研阶段) index_params = { 'metric_type': 'COSINE', 'index_type': 'HNSW', 'params': {'M': 16, 'efConstruction': 200} } col.create_index('dense_vec', index_params) col.load() print(f' Collection {name} 创建完成') create_collection('regulation_chunks', '法规条款向量库') create_collection('doc_chunks', '企业文档向量库') create_collection('case_library', '行业案例库') print('Milvus 初始化完成') " 2>&1 ok "Milvus Collections 初始化完成" else error "Milvus 未运行,请先执行:bash scripts/03_start_infra.sh" fi # ── Step 3:Neo4j 约束和索引 ──────────────────── info "Step 3/3:初始化 Neo4j 约束和索引..." sleep 5 # Neo4j 可能还在预热 docker compose exec -T neo4j cypher-shell \ -u neo4j -p "$NEO4J_PASSWORD" \ --format plain <<'CYPHER' // 节点约束(唯一性) CREATE CONSTRAINT regulation_id IF NOT EXISTS FOR (r:Regulation) REQUIRE r.id IS UNIQUE; CREATE CONSTRAINT clause_id IF NOT EXISTS FOR (c:Clause) REQUIRE c.id IS UNIQUE; CREATE CONSTRAINT obligation_id IF NOT EXISTS FOR (o:Obligation) REQUIRE o.id IS UNIQUE; // 全文索引(模糊查询) CREATE FULLTEXT INDEX regulation_fulltext IF NOT EXISTS FOR (r:Regulation) ON EACH [r.title, r.code, r.domain]; CREATE FULLTEXT INDEX clause_fulltext IF NOT EXISTS FOR (c:Clause) ON EACH [c.content, c.title]; // 插入示例节点(验证连通性) MERGE (d:Domain {name: 'vehicle_safety', label: '车辆安全法规'}); MERGE (d:Domain {name: 'data_security', label: '数据安全法规'}); MERGE (d:Domain {name: 'ehs', label: 'EHS安全法规'}); MERGE (d:Domain {name: 'carbon', label: '碳排放法规'}); RETURN '初始化完成' AS result; CYPHER ok "Neo4j 约束和索引初始化完成" echo "" echo -e "${GREEN}══════════════════════════════════════════${NC}" echo -e "${GREEN} 数据库初始化完成!${NC}" echo -e "${GREEN}══════════════════════════════════════════${NC}" echo "" echo " PostgreSQL: 所有表已创建" echo " Milvus: regulation_chunks / doc_chunks / case_library" echo " Neo4j: 约束 + 全文索引 + 基础域节点" echo "" echo "下一步:bash scripts/06_start_all.sh"