Files
AIRegulation-Deployment/init-sql/01_init_schema.sql
2026-04-23 09:58:47 +08:00

193 lines
10 KiB
PL/PgSQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- AI合规智能中枢 — PostgreSQL 初始化 Schema
-- 执行时机:容器首次启动时自动执行
-- 启用扩展
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS vector; -- pgvectorpgvector/pgvector:pg16 镜像已内置)
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- 全文检索支持
-- ══════════════════════════════════════════════════
-- 工作空间(知识库)
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS workspaces (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255) NOT NULL,
description TEXT,
domain VARCHAR(100), -- vehicle_safety / data_security / ehs / carbon
created_by VARCHAR(255),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ══════════════════════════════════════════════════
-- 文件记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS files (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
workspace_id UUID REFERENCES workspaces(id) ON DELETE CASCADE,
filename VARCHAR(500) NOT NULL,
original_name VARCHAR(500) NOT NULL,
file_type VARCHAR(50), -- pdf / docx / xlsx
file_size BIGINT,
storage_path TEXT, -- data/uploads/相对路径
parsed_path TEXT, -- data/parsed/相对路径
status VARCHAR(50) DEFAULT 'uploaded', -- uploaded/parsing/parsed/vectorized/failed
error_msg TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_files_workspace ON files(workspace_id);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
-- ══════════════════════════════════════════════════
-- 异步任务记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS tasks (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
task_type VARCHAR(100) NOT NULL, -- parse / vectorize / compliance_check / regulation_fetch
status VARCHAR(50) DEFAULT 'pending', -- pending/running/completed/failed
payload JSONB DEFAULT '{}',
result JSONB,
error_msg TEXT,
progress INTEGER DEFAULT 0, -- 0-100
file_id UUID REFERENCES files(id),
celery_task_id VARCHAR(255),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status);
CREATE INDEX IF NOT EXISTS idx_tasks_type ON tasks(task_type);
CREATE INDEX IF NOT EXISTS idx_tasks_file ON tasks(file_id);
-- ══════════════════════════════════════════════════
-- 合规审查报告
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS compliance_reports (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
file_id UUID REFERENCES files(id),
regulation_domains TEXT[], -- 适用法规域
overall_risk_level VARCHAR(20), -- high / medium / low
risk_score DECIMAL(5,2), -- 0-100
findings JSONB DEFAULT '[]', -- 问题列表
recommendations JSONB DEFAULT '[]', -- 整改建议
report_markdown TEXT, -- 完整报告Markdown格式
llm_model VARCHAR(100), -- 生成时使用的模型
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_reports_file ON compliance_reports(file_id);
CREATE INDEX IF NOT EXISTS idx_reports_risk ON compliance_reports(overall_risk_level);
-- ══════════════════════════════════════════════════
-- 法规监控源
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS regulation_sources (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255) NOT NULL,
url TEXT NOT NULL,
source_type VARCHAR(50) DEFAULT 'webpage', -- webpage / rss / api
domain VARCHAR(100), -- vehicle_safety / ehs 等
fetch_interval INTEGER DEFAULT 86400, -- 抓取间隔(秒),默认每天
is_active BOOLEAN DEFAULT TRUE,
last_fetched_at TIMESTAMPTZ,
last_hash VARCHAR(64), -- 内容hash用于变更检测
fetch_config JSONB DEFAULT '{}', -- 抓取配置CSS选择器等
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_sources_active ON regulation_sources(is_active);
CREATE INDEX IF NOT EXISTS idx_sources_domain ON regulation_sources(domain);
-- ══════════════════════════════════════════════════
-- 法规变更记录
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS regulation_updates (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
source_id UUID REFERENCES regulation_sources(id),
title VARCHAR(500),
url TEXT,
change_type VARCHAR(50), -- new / revised / revoked / notice
summary TEXT, -- AI生成的变更摘要
raw_content TEXT, -- 原始抓取内容
diff_content TEXT, -- 与上次内容的差异
is_notified BOOLEAN DEFAULT FALSE,
importance VARCHAR(20) DEFAULT 'normal', -- high / normal / low
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
published_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_updates_source ON regulation_updates(source_id);
CREATE INDEX IF NOT EXISTS idx_updates_notified ON regulation_updates(is_notified);
CREATE INDEX IF NOT EXISTS idx_updates_fetched ON regulation_updates(fetched_at DESC);
-- ══════════════════════════════════════════════════
-- 推送订阅
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS subscriptions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(255),
channel VARCHAR(50) NOT NULL, -- email / webhook / feishu / dingtalk
target TEXT NOT NULL, -- 邮件地址 或 Webhook URL
domains TEXT[], -- 订阅的法规域,为空则订阅全部
importance_min VARCHAR(20) DEFAULT 'normal',
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ══════════════════════════════════════════════════
-- 全链路审计日志
-- ══════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS audit_logs (
id BIGSERIAL PRIMARY KEY,
action VARCHAR(100) NOT NULL, -- upload / query / compliance_check / etc
resource VARCHAR(100),
resource_id UUID,
user_id VARCHAR(255),
ip_address INET,
request JSONB,
response JSONB,
duration_ms INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_logs(action);
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_logs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_audit_user ON audit_logs(user_id);
-- ══════════════════════════════════════════════════
-- 更新时间自动维护
-- ══════════════════════════════════════════════════
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_workspaces_updated_at
BEFORE UPDATE ON workspaces
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_files_updated_at
BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_tasks_updated_at
BEFORE UPDATE ON tasks
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- ══════════════════════════════════════════════════
-- 初始数据:预置监控源
-- ══════════════════════════════════════════════════
INSERT INTO regulation_sources (name, url, domain, fetch_interval) VALUES
('国家标准全文公开系统', 'https://std.samr.gov.cn', 'vehicle_safety', 86400),
('工信部政策法规', 'https://www.miit.gov.cn/jgsj/fgs/zcfg/index.html', 'vehicle_safety', 86400),
('应急管理部政策法规', 'https://www.mem.gov.cn/gk/zcfg/', 'ehs', 86400),
('生态环境部政策法规', 'https://www.mee.gov.cn/ywgz/fgbz/fl/', 'carbon', 86400)
ON CONFLICT DO NOTHING;