Files
AIRegulation-Deployment/init-sql/01_init_schema.sql

193 lines
10 KiB
MySQL
Raw Normal View History

2026-04-23 09:58:47 +08:00
-- AI合规智能中枢 — PostgreSQL 初始化 Schema
-- 执行时机:容器首次启动时自动执行
-- 启用扩展
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS vector; -- pgvectorpgvector/pgvector:pg16 镜像已内置)
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- 全文检索支持
-- ══════════════════════════════════════════════════
-- 工作空间(知识库)
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS workspaces (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255) NOT NULL,
description TEXT,
domain VARCHAR(100), -- vehicle_safety / data_security / ehs / carbon
created_by VARCHAR(255),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ══════════════════════════════════════════════════
-- 文件记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS files (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
workspace_id UUID REFERENCES workspaces(id) ON DELETE CASCADE,
filename VARCHAR(500) NOT NULL,
original_name VARCHAR(500) NOT NULL,
file_type VARCHAR(50), -- pdf / docx / xlsx
file_size BIGINT,
storage_path TEXT, -- data/uploads/相对路径
parsed_path TEXT, -- data/parsed/相对路径
status VARCHAR(50) DEFAULT 'uploaded', -- uploaded/parsing/parsed/vectorized/failed
error_msg TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_files_workspace ON files(workspace_id);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
-- ══════════════════════════════════════════════════
-- 异步任务记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS tasks (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
task_type VARCHAR(100) NOT NULL, -- parse / vectorize / compliance_check / regulation_fetch
status VARCHAR(50) DEFAULT 'pending', -- pending/running/completed/failed
payload JSONB DEFAULT '{}',
result JSONB,
error_msg TEXT,
progress INTEGER DEFAULT 0, -- 0-100
file_id UUID REFERENCES files(id),
celery_task_id VARCHAR(255),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status);
CREATE INDEX IF NOT EXISTS idx_tasks_type ON tasks(task_type);
CREATE INDEX IF NOT EXISTS idx_tasks_file ON tasks(file_id);
-- ══════════════════════════════════════════════════
-- 合规审查报告
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS compliance_reports (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
file_id UUID REFERENCES files(id),
regulation_domains TEXT[], -- 适用法规域
overall_risk_level VARCHAR(20), -- high / medium / low
risk_score DECIMAL(5,2), -- 0-100
findings JSONB DEFAULT '[]', -- 问题列表
recommendations JSONB DEFAULT '[]', -- 整改建议
report_markdown TEXT, -- 完整报告Markdown格式
llm_model VARCHAR(100), -- 生成时使用的模型
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_reports_file ON compliance_reports(file_id);
CREATE INDEX IF NOT EXISTS idx_reports_risk ON compliance_reports(overall_risk_level);
-- ══════════════════════════════════════════════════
-- 法规监控源
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS regulation_sources (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255) NOT NULL,
url TEXT NOT NULL,
source_type VARCHAR(50) DEFAULT 'webpage', -- webpage / rss / api
domain VARCHAR(100), -- vehicle_safety / ehs 等
fetch_interval INTEGER DEFAULT 86400, -- 抓取间隔(秒),默认每天
is_active BOOLEAN DEFAULT TRUE,
last_fetched_at TIMESTAMPTZ,
last_hash VARCHAR(64), -- 内容hash用于变更检测
fetch_config JSONB DEFAULT '{}', -- 抓取配置CSS选择器等
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_sources_active ON regulation_sources(is_active);
CREATE INDEX IF NOT EXISTS idx_sources_domain ON regulation_sources(domain);
-- ══════════════════════════════════════════════════
-- 法规变更记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS regulation_updates (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
source_id UUID REFERENCES regulation_sources(id),
title VARCHAR(500),
url TEXT,
change_type VARCHAR(50), -- new / revised / revoked / notice
summary TEXT, -- AI生成的变更摘要
raw_content TEXT, -- 原始抓取内容
diff_content TEXT, -- 与上次内容的差异
is_notified BOOLEAN DEFAULT FALSE,
importance VARCHAR(20) DEFAULT 'normal', -- high / normal / low
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
published_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_updates_source ON regulation_updates(source_id);
CREATE INDEX IF NOT EXISTS idx_updates_notified ON regulation_updates(is_notified);
CREATE INDEX IF NOT EXISTS idx_updates_fetched ON regulation_updates(fetched_at DESC);
-- ══════════════════════════════════════════════════
-- 推送订阅
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS subscriptions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255),
channel VARCHAR(50) NOT NULL, -- email / webhook / feishu / dingtalk
target TEXT NOT NULL, -- 邮件地址 或 Webhook URL
domains TEXT[], -- 订阅的法规域,为空则订阅全部
importance_min VARCHAR(20) DEFAULT 'normal',
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ══════════════════════════════════════════════════
-- 全链路审计日志
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS audit_logs (
id BIGSERIAL PRIMARY KEY,
action VARCHAR(100) NOT NULL, -- upload / query / compliance_check / etc
resource VARCHAR(100),
resource_id UUID,
user_id VARCHAR(255),
ip_address INET,
request JSONB,
response JSONB,
duration_ms INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_logs(action);
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_logs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_audit_user ON audit_logs(user_id);
-- ══════════════════════════════════════════════════
-- 更新时间自动维护
-- ══════════════════════════════════════════════════
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_workspaces_updated_at
BEFORE UPDATE ON workspaces
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_files_updated_at
BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_tasks_updated_at
BEFORE UPDATE ON tasks
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- ══════════════════════════════════════════════════
-- 初始数据:预置监控源
-- ══════════════════════════════════════════════════
INSERT INTO regulation_sources (name, url, domain, fetch_interval) VALUES
('国家标准全文公开系统', 'https://std.samr.gov.cn', 'vehicle_safety', 86400),
('工信部政策法规', 'https://www.miit.gov.cn/jgsj/fgs/zcfg/index.html', 'vehicle_safety', 86400),
('应急管理部政策法规', 'https://www.mem.gov.cn/gk/zcfg/', 'ehs', 86400),
('生态环境部政策法规', 'https://www.mee.gov.cn/ywgz/fgbz/fl/', 'carbon', 86400)
ON CONFLICT DO NOTHING;