4 Commits

Author SHA1 Message Date
wangwei
9212747e1b update for 1. 优化 2.中英切换 2026-06-10 11:10:36 +08:00
wangwei
e7963b267e fix somethings 2026-06-08 11:16:28 +08:00
wangwei
9fea9c6a53 1. Add 登陆功能
2. 调整字体大小
3. 新增部分功能
2026-06-05 18:00:31 +08:00
wangwei
06e0967128 add 2026-06-05 09:00:36 +08:00
109 changed files with 22426 additions and 862 deletions

32
.env
View File

@@ -48,8 +48,16 @@ CHUNK_OVERLAP=50
MAX_FILE_SIZE_MB=100
PARSER_BACKEND=aliyun
CHUNK_BACKEND=aliyun
# 文档元数据存储后端:json默认或 postgres
DOCUMENT_REPOSITORY_BACKEND=json
# 文档元数据存储后端:启用 postgres 以激活合规分析历史记录Direction B及 Finding Chat 持久化Direction C
DOCUMENT_REPOSITORY_BACKEND=postgres
# Set to true only when a Celery worker is actually running (./dev.sh start worker).
# Default false: processing runs in FastAPI's threadpool — no external worker needed.
USE_CELERY_WORKER=false
# ===== 法规感知爬取配置 =====
PERCEPTION_CRAWL_TIMEOUT_SECONDS=120
PERCEPTION_MAX_EVENTS_PER_SOURCE=100
PERCEPTION_DIFF_SIMILARITY_THRESHOLD=0.85
# ===== API配置 =====
API_HOST=0.0.0.0
@@ -92,3 +100,23 @@ ALIYUN_LLM_ENHANCEMENT=true
ALIYUN_ENHANCEMENT_MODE=VLM
DOCUMENT_PARSE_ARTIFACT_PREFIX=artifacts
PARSER_FAILURE_MODE=fail
# ===== Reranker 配置 =====
RERANKER_ENABLED=true
RERANKER_BASE_URL=http://6.86.80.4:30080/v1
RERANKER_MODEL=BAAI/bge-reranker-v2-m3
RERANKER_API_KEY=
RERANKER_TOP_K=5
# ===== 会话持久化 =====
SESSION_BACKEND=redis
# ===== 认证配置 =====
# 生产环境请修改为强随机密钥: python -c "import secrets; print(secrets.token_hex(32))"
AUTH_SECRET_KEY=ai-compliance-hub-jwt-secret-2026-tsystems
AUTH_ALGORITHM=HS256
AUTH_TOKEN_EXPIRE_MINUTES=480
AUTH_ENABLED=true
# ===== CORS =====
CORS_ALLOW_ORIGINS=http://localhost:5173

View File

@@ -31,5 +31,5 @@ POSTGRES_PASSWORD=postgresql123456
POSTGRES_DB=compliance_db
# ===== 文档元数据后端 =====
# 改为 postgres 以启用 PG 持久化structure_nodes + semantic_blocks 入库
# 改为 postgres 以启用合规分析历史记录Direction B和 Finding ChatDirection C
DOCUMENT_REPOSITORY_BACKEND=json

View File

@@ -50,7 +50,19 @@ DOCUMENT_METADATA_PATH=backend/data/documents.json
PARSER_BACKEND=aliyun
CHUNK_BACKEND=aliyun
# 文档元数据存储后端json默认无需数据库或 postgres启用 PG 持久化)
# ⚠ 以下功能需要 postgres设为 json 时功能静默降级或报 500
# - Direction B: 合规分析历史记录 (/compliance/history/*)
# - Direction B: DOCX 报告下载
# - Direction C: Finding Chat 消息持久化
DOCUMENT_REPOSITORY_BACKEND=json
# Set to true only when a Celery worker is running (./dev.sh start worker).
# Default false: document processing runs in FastAPI's threadpool (no external worker needed).
USE_CELERY_WORKER=false
# ===== 法规感知爬取配置 =====
PERCEPTION_CRAWL_TIMEOUT_SECONDS=120
PERCEPTION_MAX_EVENTS_PER_SOURCE=100
PERCEPTION_DIFF_SIMILARITY_THRESHOLD=0.85
# ===== 阿里云文档解析 =====
ALIBABA_ACCESS_KEY_ID=your_aliyun_access_key_id
@@ -96,11 +108,15 @@ RAG_TOP_K=10
RAG_RETRIEVAL_TOP_K=20
RAG_MAX_CONTEXT_TOKENS=4000
RAG_SUMMARY_MAX_TOKENS=1024
RAG_SKILLS_MAX_TOKENS=2048
# ===== Reranker配置(Cross-Encoder精排,默认关闭)=====
# 设置 RERANKER_ENABLED=true 并配置 RERANKER_BASE_URL 以启用精排
RERANKER_ENABLED=false
RERANKER_BASE_URL=
# ── Reranker (Cross-Encoder) ──────────────────────────────────────────────────
# Set RERANKER_ENABLED=true and point to a TEI or Cohere-compatible rerank API.
# Recommended model: BAAI/bge-reranker-v2.5-gemma2-lightweight (lighter) or
# BAAI/bge-reranker-v2-m3 (heavier, higher quality).
# The endpoint must expose POST /rerank (TEI style) or POST /v1/rerank (Cohere style).
RERANKER_ENABLED=true
RERANKER_BASE_URL=http://6.86.80.4:30080/v1
RERANKER_MODEL=BAAI/bge-reranker-v2-m3
RERANKER_API_KEY=
RERANKER_TOP_K=5
@@ -108,3 +124,20 @@ RERANKER_TOP_K=5
# ===== 会话配置 =====
SESSION_MAX_SESSIONS=100
SESSION_TIMEOUT_MINUTES=30
# SESSION_BACKEND=redis 启用 Redis 持久化会话(需要 Redis 可用,推荐生产环境)
# SESSION_BACKEND=memory 使用内存会话(重启丢失,适合本地开发)
SESSION_BACKEND=memory
# ===== 认证配置 (Auth) =====
# 生产环境必须替换为强随机密钥:
# python -c "import secrets; print(secrets.token_hex(32))"
AUTH_SECRET_KEY=change-me-in-production-must-be-32-or-more-characters-long
AUTH_ALGORITHM=HS256
# Token 有效期(分钟),默认 8 小时
AUTH_TOKEN_EXPIRE_MINUTES=480
# 设为 false 可跳过认证(仅限本地开发调试,生产必须 true
AUTH_ENABLED=true
# ===== CORS =====
# 逗号分隔的允许跨域来源列表,生产环境绝不能使用 *
CORS_ALLOW_ORIGINS=http://localhost:5173

View File

@@ -0,0 +1,56 @@
<h2>Compliance Analysis — 哪个方向最值得优化?</h2>
<p class="subtitle">基于代码深度分析,发现了 4 个有价值的改进方向。选择你最希望深入的那个。</p>
<div class="options">
<div class="option" data-choice="A" onclick="toggleSelect(this)">
<div class="letter">A</div>
<div class="content">
<h3>⚡ 分析质量提升</h3>
<p>并行子句处理(速度 35×、跨编码器重排序、置信度过滤、修复 highlight_terms 失效 Bug、减少 LLM 静默失败。</p>
<div class="pros-cons" style="margin-top:10px">
<div class="pros"><h4>收益</h4><ul><li>更快、更准确的分析</li><li>消除当前 Bug</li></ul></div>
<div class="cons"><h4>难度</h4><ul><li>需要改造 pipeline.py</li></ul></div>
</div>
</div>
</div>
<div class="option" data-choice="B" onclick="toggleSelect(this)">
<div class="letter">B</div>
<div class="content">
<h3>📋 分析历史 &amp; 专业报告</h3>
<p>持久化分析记录PostgreSQL、历史对比、PDF/DOCX 专业报告导出、分析版本追踪。</p>
<div class="pros-cons" style="margin-top:10px">
<div class="pros"><h4>收益</h4><ul><li>结果不再丢失</li><li>可交付给客户的报告</li></ul></div>
<div class="cons"><h4>难度</h4><ul><li>需要新增数据库表</li></ul></div>
</div>
</div>
</div>
<div class="option" data-choice="C" onclick="toggleSelect(this)">
<div class="letter">C</div>
<div class="content">
<h3>💬 深度 Chat 增强</h3>
<p>每个 Finding 独立对话线程持久化、Chat 上下文绑定真实检索到的法规原文、多轮追问记忆、快捷建议问句生成。</p>
<div class="pros-cons" style="margin-top:10px">
<div class="pros"><h4>收益</h4><ul><li>Finding 解读深度大幅提升</li><li>用户粘性强</li></ul></div>
<div class="cons"><h4>难度</h4><ul><li>需重构 chat 端点</li></ul></div>
</div>
</div>
</div>
<div class="option" data-choice="D" onclick="toggleSelect(this)">
<div class="letter">D</div>
<div class="content">
<h3>📑 自定义规则 &amp; 模板</h3>
<p>用户自定义合规规则库、按行业预设模板(汽车/金融/医疗、Prompt 版本管理、A/B 测试不同提示策略。</p>
<div class="pros-cons" style="margin-top:10px">
<div class="pros"><h4>收益</h4><ul><li>适应不同行业场景</li><li>可配置,无需改代码</li></ul></div>
<div class="cons"><h4>难度</h4><ul><li>需要规则管理 UI</li></ul></div>
</div>
</div>
</div>
</div>
<p class="subtitle" style="margin-top:20px">💡 也可以多选,或者在终端告诉我你有其他想法。</p>

View File

@@ -0,0 +1,3 @@
{"type":"click","text":"C\n \n 💬 深度 Chat 增强\n 每个 Finding 独立对话线程持久化、Chat 上下文绑定真实检索到的法规原文、多轮追问记忆、快捷建议问句生成。\n \n 收益Finding 解读深度大幅提升用户粘性强\n 难度需重构 chat 端点","choice":"C","id":null,"timestamp":1780897984866}
{"type":"click","text":"B\n \n 📋 分析历史 & 专业报告\n 持久化分析记录PostgreSQL、历史对比、PDF/DOCX 专业报告导出、分析版本追踪。\n \n 收益结果不再丢失可交付给客户的报告\n 难度需要新增数据库表","choice":"B","id":null,"timestamp":1780897985879}
{"type":"click","text":"A\n \n ⚡ 分析质量提升\n 并行子句处理(速度 35×、跨编码器重排序、置信度过滤、修复 highlight_terms 失效 Bug、减少 LLM 静默失败。\n \n 收益更快、更准确的分析消除当前 Bug\n 难度需要改造 pipeline.py","choice":"A","id":null,"timestamp":1780897986554}

View File

@@ -0,0 +1 @@
{"reason":"idle timeout","timestamp":1780894411095}

View File

@@ -0,0 +1 @@
1055

View File

@@ -0,0 +1,5 @@
"""FastAPI dependency functions for authentication and authorisation.
Import `get_current_user` or `require_role` into route modules to protect
endpoints. Both use the shared JWTHandler wired through bootstrap.
"""

View File

@@ -0,0 +1,72 @@
"""FastAPI dependencies for JWT authentication.
Usage in a route:
from app.api.dependencies.auth import get_current_user, require_role
from app.domain.auth.models import UserRole
@router.get("/protected")
async def protected(user: UserClaims = Depends(get_current_user)):
return {"user": user.username}
@router.delete("/admin-only")
async def admin_only(user: UserClaims = Depends(require_role(UserRole.ADMIN))):
...
"""
from __future__ import annotations
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from app.config.settings import settings
from app.domain.auth.models import UserClaims, UserRole
from app.shared.bootstrap import get_jwt_handler
# Use Bearer token scheme — client sends `Authorization: Bearer <token>`.
_bearer = HTTPBearer(auto_error=False)
async def get_current_user(
credentials: HTTPAuthorizationCredentials | None = Depends(_bearer),
) -> UserClaims:
"""Extract and validate the JWT from the Authorization header.
Returns the decoded UserClaims on success.
Raises HTTP 401 when the token is missing, expired, or invalid.
When auth_enabled=False (development), returns a synthetic admin user.
"""
if not settings.auth_enabled:
# Development bypass — never enable this in production.
return UserClaims(user_id="dev", username="dev-admin", role=UserRole.ADMIN)
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing authentication token",
headers={"WWW-Authenticate": "Bearer"},
)
try:
return get_jwt_handler().decode_token(credentials.credentials)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=str(exc),
headers={"WWW-Authenticate": "Bearer"},
) from exc
def require_role(*roles: UserRole):
"""Return a dependency that enforces one of the given roles.
Example:
Depends(require_role(UserRole.ADMIN, UserRole.LEGAL))
"""
async def _check(user: UserClaims = Depends(get_current_user)) -> UserClaims:
"""Verify the user holds one of the required roles."""
if user.role not in roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Role '{user.role}' is not permitted. Required: {[r.value for r in roles]}",
)
return user
return _check

View File

@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from loguru import logger
from app.api.middleware.audit import AuditMiddleware
from app.api.models import ErrorResponse
from app.api.routes import api_router
from app.config.logging import setup_logging
@@ -46,14 +47,23 @@ app = FastAPI(
redoc_url="/redoc",
)
# Tighten CORS — only allow configured origins.
# Set CORS_ALLOW_ORIGINS in .env to the real frontend URL in production.
_ORIGINS = [o.strip() for o in settings.cors_allow_origins.split(",") if o.strip()]
if not _ORIGINS:
_ORIGINS = ["http://localhost:5173"]
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_origins=_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Audit middleware logs every authenticated API call for compliance traceability.
app.add_middleware(AuditMiddleware)
app.include_router(api_router, prefix="/api/v1")

View File

@@ -0,0 +1 @@
"""HTTP middleware for cross-cutting concerns: audit logging."""

View File

@@ -0,0 +1,56 @@
"""Audit logging middleware.
Logs every API request with method, path, status code, response time,
and the authenticated user identity (extracted from the JWT when present).
Log lines are structured so they can be ingested by ELK / Loki.
"""
from __future__ import annotations
import time
from fastapi import Request, Response
from loguru import logger
from starlette.middleware.base import BaseHTTPMiddleware
class AuditMiddleware(BaseHTTPMiddleware):
"""Log all API calls. Skips health/docs paths to reduce noise."""
# Paths that produce no audit log entry.
_SKIP_PREFIXES = ("/health", "/docs", "/redoc", "/openapi.json")
async def dispatch(self, request: Request, call_next) -> Response:
"""Intercept the request, call the handler, and log the outcome."""
path = request.url.path
if path == "/" or any(path == p or path.startswith(p + "/") for p in self._SKIP_PREFIXES):
return await call_next(request)
start = time.perf_counter()
response = await call_next(request)
elapsed_ms = int((time.perf_counter() - start) * 1000)
# Extract user identity from JWT header for structured audit records.
# The token is not re-validated here — auth dependencies do that upstream.
user_id = "anonymous"
username = "anonymous"
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
try:
from app.shared.bootstrap import get_jwt_handler
claims = get_jwt_handler().decode_token(auth_header[7:])
user_id = claims.user_id
username = claims.username
except Exception:
pass
logger.info(
"AUDIT method={} path={} status={} elapsed_ms={} user_id={} username={}",
request.method,
path,
response.status_code,
elapsed_ms,
user_id,
username,
)
return response

View File

@@ -1,6 +1,7 @@
"""Initialize the app.api.routes package."""
from fastapi import APIRouter
from .auth import router as auth_router
from .compliance import router as compliance_router
from .documents import router as documents_router
from .knowledge import router as knowledge_router
@@ -14,7 +15,8 @@ from .rag import router as rag_router
# Keep package boundaries explicit so backend imports stay predictable.
api_router = APIRouter()
# Keep package boundaries explicit so backend imports stay predictable.
# Auth routes first so /auth/token is easy to discover.
api_router.include_router(auth_router)
api_router.include_router(documents_router)
api_router.include_router(knowledge_router)
api_router.include_router(agent_router)
@@ -25,6 +27,7 @@ api_router.include_router(rag_router)
__all__ = [
"api_router",
"auth_router",
"documents_router",
"knowledge_router",
"agent_router",

View File

@@ -0,0 +1,63 @@
"""Authentication routes — token issuance only.
POST /auth/token — exchange username + password for a JWT.
GET /auth/me — return the current user identity (requires token).
"""
from __future__ import annotations
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from pydantic import BaseModel
from app.api.dependencies.auth import get_current_user
from app.config.settings import settings
from app.domain.auth.models import UserClaims
from app.shared.bootstrap import get_jwt_handler, get_user_store
router = APIRouter(prefix="/auth", tags=["认证"])
class TokenResponse(BaseModel):
"""JWT token response body."""
access_token: str
token_type: str = "bearer"
expires_in: int
@router.post("/token", response_model=TokenResponse)
async def login(form: OAuth2PasswordRequestForm = Depends()):
"""Issue a JWT for valid username + password credentials.
Uses standard OAuth2 password grant form fields — compatible with
Swagger UI Authorize button.
"""
user = get_user_store().authenticate(form.username, form.password)
if user is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
token = get_jwt_handler().create_access_token(
user_id=user.id,
username=user.username,
role=user.role,
)
return TokenResponse(
access_token=token,
token_type="bearer",
expires_in=settings.auth_token_expire_minutes * 60,
)
@router.get("/me")
async def get_me(current_user: UserClaims = Depends(get_current_user)):
"""Return the identity of the currently authenticated user."""
return {
"user_id": current_user.user_id,
"username": current_user.username,
"role": current_user.role.value,
}

View File

@@ -5,17 +5,21 @@ from __future__ import annotations
import asyncio
import json
from pathlib import Path
from typing import AsyncGenerator
from typing import AsyncGenerator, Optional
from fastapi import APIRouter, File, UploadFile
from fastapi import APIRouter, Depends, File, Form, UploadFile
from fastapi.responses import StreamingResponse
from loguru import logger
from app.api.dependencies.auth import get_current_user
from app.domain.auth.models import UserClaims
from app.schemas.compliance import (
AnalyzeResponse,
ComplianceChatRequest,
)
from app.services.mock_data import generate_task_id, get_mock_compliance_result
from app.shared.bootstrap import get_agent_conversation_service
from app.shared.bootstrap import get_agent_conversation_service, get_retrieval_service
from app.config.settings import settings
router = APIRouter(prefix="/compliance", tags=["合规分析"])
@@ -62,6 +66,172 @@ async def get_result(task_id: str):
return task["result"]
def _sse(data: dict) -> str:
return f"event: message\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
@router.post("/analyze-stream")
async def analyze_stream(
text: Optional[str] = Form(None),
doc_id: Optional[str] = Form(None),
file: Optional[UploadFile] = File(None),
domains: Optional[str] = Form(None),
title: Optional[str] = Form(None),
current_user: UserClaims = Depends(get_current_user),
):
"""Stream compliance analysis as SSE events.
Stages: clause_split → retrieval (per clause) → gap_check → conclusion
Events: stage | source | finding | done | error
"""
from app.application.compliance.pipeline import (
extract_text_from_doc_id,
extract_text_from_file,
run_clauses_parallel,
split_into_clauses,
synthesize_conclusion,
)
from app.services.llm.llm_factory import get_llm_client
from app.shared.bootstrap import get_retrieval_service
# Read file content eagerly (before async generator)
file_content: bytes | None = None
file_name: str | None = None
if file is not None:
file_content = await file.read()
file_name = file.filename
async def generate() -> AsyncGenerator[str, None]:
try:
client = get_llm_client(provider=settings.llm_provider, model=settings.llm_model)
retrieval_service = get_retrieval_service()
# ── Stage 1: extract text ─────────────────────────────────────
yield _sse({"type": "stage", "stage": "extracting", "label": "Extracting text…"})
await asyncio.sleep(0)
if text:
para_text = text.strip()
elif doc_id:
try:
para_text = await asyncio.to_thread(extract_text_from_doc_id, doc_id)
except Exception as exc:
yield _sse({"type": "error", "text": f"Document not found: {exc}"})
return
elif file_content is not None:
para_text = await asyncio.to_thread(
extract_text_from_file, file_content, file_name or "upload"
)
else:
yield _sse({"type": "error", "text": "No input provided"})
return
if not para_text.strip():
yield _sse({"type": "error", "text": "Could not extract text from the provided input"})
return
# ── Stage 2: split into clauses ───────────────────────────────
yield _sse({"type": "stage", "stage": "splitting", "label": "Splitting into clauses…"})
await asyncio.sleep(0)
clauses: list[str] = await asyncio.to_thread(split_into_clauses, para_text, client)
# ── Stage 3: retrieve + gap check (parallel across all clauses) ────────────
findings: list[dict] = []
yield _sse({
"type": "stage",
"stage": "analyzing",
"label": f"Analyzing {len(clauses)} clauses in parallel…",
})
await asyncio.sleep(0)
clause_results = await run_clauses_parallel(
clauses, retrieval_service, client,
top_k=5,
domains=domains or None,
)
for res in clause_results:
i = res["index"]
chunks = res["chunks"]
finding = res["finding"]
# Emit source events for this clause
for chunk in chunks[:3]:
yield _sse({
"type": "source",
"standard": getattr(chunk, "doc_title", "") or getattr(chunk, "doc_name", ""),
"clause": getattr(chunk, "section_title", "") or "",
"score": round(float(getattr(chunk, "score", 0)), 3),
"status": "retrieved",
"full_content": (getattr(chunk, "text", "") or "")[:300],
})
if finding:
findings.append(finding)
yield _sse({"type": "finding", **finding})
await asyncio.sleep(0)
# ── Stage 4: synthesize conclusion ────────────────────────────
yield _sse({"type": "stage", "stage": "concluding", "label": "Generating conclusion…"})
await asyncio.sleep(0)
conclusion_data = await asyncio.to_thread(
synthesize_conclusion, para_text, findings, client
)
yield _sse({"type": "done", **conclusion_data})
# Auto-save analysis to database
try:
from app.shared.bootstrap import get_compliance_repository
from app.domain.compliance.ports import AnalysisRecord, FindingRecord
from datetime import datetime
repo = get_compliance_repository()
finding_records = [
FindingRecord(
id="",
analysis_id="",
seq=i,
title=f.get("title", ""),
description=f.get("desc", ""),
status=f.get("status", "ok"),
clause_ref=f.get("clause_ref"),
)
for i, f in enumerate(findings)
]
record = AnalysisRecord(
id="",
created_at=datetime.utcnow(),
created_by=current_user.username if hasattr(current_user, "username") else None,
doc_name=file_name or (title or "Pasted text"),
standard_name=title or "",
risk_score=conclusion_data.get("risk_score", 0),
conclusion=conclusion_data.get("conclusion", ""),
actions=conclusion_data.get("actions", []),
para_text=conclusion_data.get("para_text", ""),
highlight_terms=conclusion_data.get("highlight_terms", []),
findings=finding_records,
)
analysis_id = await asyncio.to_thread(repo.save_analysis, record)
yield _sse({"type": "saved", "analysis_id": analysis_id})
except NotImplementedError:
pass # No postgres backend configured — skip saving
except Exception as exc:
logger.warning("Failed to auto-save compliance analysis: {}", exc)
except Exception as exc:
logger.exception("analyze-stream pipeline error")
yield _sse({"type": "error", "text": str(exc)})
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
)
@router.post("/chat/{segment_id}")
async def compliance_chat(segment_id: int, request: ComplianceChatRequest):
"""Stream compliance Q&A grounded in real vector retrieval."""
@@ -98,3 +268,226 @@ async def compliance_chat(segment_id: int, request: ComplianceChatRequest):
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
)
@router.get("/history")
async def list_history(
limit: int = 20,
offset: int = 0,
current_user: UserClaims = Depends(get_current_user),
):
"""Return paginated list of saved compliance analyses (newest first)."""
from app.shared.bootstrap import get_compliance_repository
try:
repo = get_compliance_repository()
records = await asyncio.to_thread(repo.list_analyses, limit, offset)
return [
{
"id": r.id,
"created_at": r.created_at.isoformat(),
"created_by": r.created_by,
"doc_name": r.doc_name,
"standard_name": r.standard_name,
"risk_score": r.risk_score,
"finding_count": len(r.findings),
}
for r in records
]
except NotImplementedError:
return []
@router.get("/history/{analysis_id}")
async def get_history_item(
analysis_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Return full analysis record including findings."""
from app.shared.bootstrap import get_compliance_repository
from fastapi import HTTPException
repo = get_compliance_repository()
record = await asyncio.to_thread(repo.get_analysis, analysis_id)
if not record:
raise HTTPException(status_code=404, detail="Analysis not found")
return {
"id": record.id,
"created_at": record.created_at.isoformat(),
"created_by": record.created_by,
"doc_name": record.doc_name,
"standard_name": record.standard_name,
"risk_score": record.risk_score,
"conclusion": record.conclusion,
"actions": record.actions,
"para_text": record.para_text,
"highlight_terms": record.highlight_terms,
"findings": [
{
"id": f.id,
"seq": f.seq,
"title": f.title,
"description": f.description,
"status": f.status,
"clause_ref": f.clause_ref,
}
for f in record.findings
],
}
@router.delete("/history/{analysis_id}", status_code=204)
async def delete_history_item(
analysis_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Delete a saved analysis (cascade removes findings and chat messages)."""
from app.shared.bootstrap import get_compliance_repository
repo = get_compliance_repository()
await asyncio.to_thread(repo.delete_analysis, analysis_id)
@router.get("/history/{analysis_id}/download")
async def download_history_docx(
analysis_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Return a DOCX compliance report for the given analysis."""
from app.shared.bootstrap import get_compliance_repository
from app.infrastructure.compliance.docx_export import generate_docx
from fastapi import HTTPException
from fastapi.responses import Response
repo = get_compliance_repository()
record = await asyncio.to_thread(repo.get_analysis, analysis_id)
if not record:
raise HTTPException(status_code=404, detail="Analysis not found")
docx_bytes = await asyncio.to_thread(generate_docx, record)
safe_name = (record.doc_name or "report").replace(" ", "_")[:50]
filename = f"compliance_{safe_name}_{record.created_at.strftime('%Y%m%d')}.docx"
return Response(
content=docx_bytes,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@router.get("/analyses/{analysis_id}/findings/{finding_id}/chat")
async def get_finding_chat_history(
analysis_id: str,
finding_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Return persisted chat messages for a finding thread, oldest first."""
from app.shared.bootstrap import get_compliance_repository
try:
repo = get_compliance_repository()
messages = await asyncio.to_thread(repo.get_messages, finding_id)
return messages
except NotImplementedError:
return []
@router.post("/analyses/{analysis_id}/findings/{finding_id}/suggestions")
async def get_finding_suggestions(
analysis_id: str,
finding_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Generate 3 LLM-powered follow-up question suggestions for a finding."""
from app.application.compliance.pipeline import generate_suggestions
from app.shared.bootstrap import get_compliance_repository
from app.services.llm.llm_factory import get_llm_client
from fastapi import HTTPException
repo = get_compliance_repository()
analysis = await asyncio.to_thread(repo.get_analysis, analysis_id)
if not analysis:
raise HTTPException(status_code=404, detail="Analysis not found")
finding = next((f for f in analysis.findings if f.id == finding_id), None)
if not finding:
raise HTTPException(status_code=404, detail="Finding not found")
client = get_llm_client(provider=settings.llm_provider, model=settings.llm_model)
questions = await asyncio.to_thread(generate_suggestions, finding, analysis, client)
return {"questions": questions}
@router.post("/analyses/{analysis_id}/findings/{finding_id}/chat")
async def finding_chat(
analysis_id: str,
finding_id: str,
request: ComplianceChatRequest,
current_user: UserClaims = Depends(get_current_user),
):
"""Stream a grounded chat response for a specific finding.
Loads the finding and analysis from DB to build grounded context.
Persists both user message and assistant response to finding_chat_messages.
"""
from app.application.compliance.pipeline import build_finding_context
from app.shared.bootstrap import get_compliance_repository
from fastapi import HTTPException
repo = get_compliance_repository()
analysis = await asyncio.to_thread(repo.get_analysis, analysis_id)
if not analysis:
raise HTTPException(status_code=404, detail="Analysis not found")
finding = next((f for f in analysis.findings if f.id == finding_id), None)
if not finding:
raise HTTPException(status_code=404, detail="Finding not found")
# Persist user message
await asyncio.to_thread(
repo.save_message, analysis_id, finding_id, "user", request.query
)
# Build message history (last 10 messages = 5 turns)
history = await asyncio.to_thread(repo.get_messages, finding_id)
history_messages = [
{"role": m["role"], "content": m["content"]}
for m in history[-10:]
]
# Build grounded system context
system_context = build_finding_context(finding, analysis)
full_query = f"[Compliance Finding Context]\n{system_context}\n\nUser question: {request.query}"
assistant_buffer: list[str] = []
async def generate() -> AsyncGenerator[str, None]:
try:
_, event_stream = get_agent_conversation_service().stream_chat(
query=full_query,
top_k=5,
prompt_template="compliance_qa",
)
for event in event_stream:
event_type = event.get("event", "")
if event_type == "content":
text = event.get("data", "")
if text:
assistant_buffer.append(text)
yield _sse({"type": "chunk", "text": text})
elif event_type == "done":
yield _sse({"type": "done"})
await asyncio.sleep(0)
except Exception as exc:
logger.exception("finding_chat stream error")
yield _sse({"type": "error", "text": str(exc)})
finally:
# Persist assistant response after stream completes
full_response = "".join(assistant_buffer)
if full_response:
try:
await asyncio.to_thread(
repo.save_message, analysis_id, finding_id, "assistant", full_response
)
except Exception as exc:
logger.warning("Failed to persist assistant message: {}", exc)
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
)

View File

@@ -5,12 +5,15 @@ from __future__ import annotations
from io import BytesIO
from urllib.parse import quote
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
from loguru import logger
from app.api.dependencies.auth import get_current_user
from app.api.models import DocumentUploadResponse
from app.application.documents import DocumentProcessResult
from app.config.settings import settings
from app.domain.auth.models import UserClaims
from app.shared.bootstrap import get_document_command_service, get_document_query_service
# Keep route handlers close to their transport-layer wiring for easier auditing.
@@ -31,16 +34,60 @@ def _document_response(result: DocumentProcessResult) -> DocumentUploadResponse:
)
def _run_process_in_background(
*,
doc_id: str,
file_name: str,
final_doc_name: str,
content: bytes,
regulation_type: str,
version: str,
generate_summary: bool,
run_id: str | None,
) -> None:
"""Run document processing synchronously inside a FastAPI BackgroundTask thread.
FastAPI executes BackgroundTasks in a threadpool executor, so blocking I/O
(parser API calls, embedding, Milvus upsert) is safe here.
"""
try:
svc = get_document_command_service()
svc._process_document(
doc_id=doc_id,
file_name=file_name,
final_doc_name=final_doc_name,
content=content,
regulation_type=regulation_type,
version=version,
generate_summary=generate_summary,
run_id=run_id,
)
except Exception:
logger.exception("BackgroundTask document processing failed: doc_id={}", doc_id)
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(
background_tasks: BackgroundTasks,
file: UploadFile = File(..., description="上传的文档文件"),
doc_id: str | None = Form(None, description="客户端预分配的文档ID不传则自动生成"),
doc_name: str | None = Form(None, description="文档名称"),
regulation_type: str | None = Form(None, description="法规类型"),
version: str | None = Form(None, description="文档版本"),
generate_summary: bool = Form(False, description="是否生成摘要"),
sync: bool = Form(False, description="同步处理(演示/测试用,默认异步处理)"),
current_user: UserClaims = Depends(get_current_user),
):
"""Handle upload document."""
"""Upload a document and process it asynchronously.
Default path (sync=false):
1. Store binary to MinIO immediately — returns within seconds.
2. Schedule parse→embed→index as a FastAPI BackgroundTask (same process,
threadpool) OR enqueue to Celery workers when USE_CELERY_WORKER=true.
3. Poll GET /documents/status/{doc_id} for progress.
sync=true path: full inline processing, blocks until complete (demo / CI use).
"""
content = await file.read()
if not file.filename:
raise HTTPException(status_code=400, detail="文件名不能为空")
@@ -48,7 +95,11 @@ async def upload_document(
raise HTTPException(status_code=400, detail="上传文件为空")
try:
result = get_document_command_service().upload_and_process(
svc = get_document_command_service()
if sync:
# Synchronous fallback: full inline processing.
result = svc.upload_and_process(
doc_id=doc_id,
file_name=file.filename,
content=content,
@@ -58,9 +109,59 @@ async def upload_document(
version=version or "",
generate_summary=generate_summary,
)
else:
# Step 1: store binary and create the document record (fast, sync).
stored_doc_id, run_id = svc.store_document(
doc_id=doc_id,
file_name=file.filename,
content=content,
content_type=file.content_type or "application/octet-stream",
doc_name=doc_name,
regulation_type=regulation_type or "",
version=version or "",
generate_summary=generate_summary,
)
final_doc_name = doc_name or file.filename
# Step 2: schedule processing via Celery worker OR FastAPI BackgroundTask.
if settings.use_celery_worker:
from app.infrastructure.tasks.document_tasks import process_document_task
process_document_task.delay(
doc_id=stored_doc_id,
file_name=file.filename,
doc_name=final_doc_name,
regulation_type=regulation_type or "",
version=version or "",
generate_summary=generate_summary,
run_id=run_id,
)
processing_note = "已入 Celery 队列,由 Worker 处理。"
else:
# Default: run in FastAPI's threadpool — no external worker needed.
background_tasks.add_task(
_run_process_in_background,
doc_id=stored_doc_id,
file_name=file.filename,
final_doc_name=final_doc_name,
content=content,
regulation_type=regulation_type or "",
version=version or "",
generate_summary=generate_summary,
run_id=run_id,
)
processing_note = "正在后台处理。"
result = DocumentProcessResult(
doc_id=stored_doc_id,
doc_name=final_doc_name,
status="stored",
message=f"文件已存储,{processing_note}请轮询 GET /documents/status/{{doc_id}} 查看进度。",
)
if result.status == "failed":
raise HTTPException(status_code=500, detail=result.message)
return _document_response(result)
except HTTPException:
raise
except Exception as exc:
@@ -106,7 +207,7 @@ async def download_document(doc_id: str):
@router.get("/list")
async def list_documents():
async def list_documents(current_user: UserClaims = Depends(get_current_user)):
"""List documents."""
documents = get_document_query_service().list_documents()
return {
@@ -148,7 +249,7 @@ async def get_document_management_list():
@router.delete("/{doc_id}")
async def delete_document(doc_id: str):
async def delete_document(doc_id: str, current_user: UserClaims = Depends(get_current_user)):
"""Delete a document and its associated data."""
deleted = get_document_command_service().delete(doc_id)
if not deleted:

View File

@@ -4,10 +4,12 @@ from __future__ import annotations
import json
from fastapi import APIRouter, Query
from fastapi import APIRouter, Depends, Query
from fastapi.responses import StreamingResponse
from app.shared.bootstrap import get_perception_service
from app.shared.bootstrap import get_crawl_service, get_event_store, get_perception_service
from app.api.dependencies.auth import get_current_user
from app.domain.auth.models import UserClaims
from app.shared.async_utils import iter_in_thread
router = APIRouter(prefix="/perception", tags=["智能感知"])
@@ -65,3 +67,77 @@ async def analyze_event(event_id: str):
"X-Accel-Buffering": "no",
},
)
@router.post("/crawl")
async def run_crawl(
body: dict = None,
current_user: UserClaims = Depends(get_current_user),
):
"""Trigger manual crawl of regulatory sources. Streams SSE progress.
Body (optional): {"sources": ["CATARC", "国标委·强制性", "EUR-Lex"]}
Omit sources to crawl all registered sources.
"""
sources: list[str] | None = (body or {}).get("sources")
crawl_svc = get_crawl_service()
async def crawl_stream():
async for item in iter_in_thread(crawl_svc.run_crawl(sources=sources)):
event_name = item.get("event", "message")
data = item.get("data", "")
if isinstance(data, (dict, list)):
data = json.dumps(data, ensure_ascii=False)
yield f"event: {event_name}\ndata: {data}\n\n"
return StreamingResponse(
crawl_stream(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
@router.post("/events/{event_id}/process")
async def process_event(
event_id: str,
current_user: UserClaims = Depends(get_current_user),
):
"""Trigger LLM pipeline (extract + assess + diff) for a single event."""
from datetime import UTC, datetime
from app.infrastructure.perception.llm_pipeline import LlmPipeline
from app.shared.bootstrap import get_retrieval_service
event = get_perception_service().get_event(event_id)
if not event:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
store = get_event_store()
pipeline = LlmPipeline()
structure = pipeline.extract_structure(event)
event.update(structure)
event["affected_docs"] = pipeline.assess_impact(event, get_retrieval_service())
event["processed_at"] = datetime.now(UTC).isoformat()
store.upsert(event)
return {"status": "ok", "event_id": event_id, "processed_at": event["processed_at"]}
@router.get("/events/{event_id}/diff")
async def get_event_diff(event_id: str):
"""Return semantic diff detail for an event (only available if previously crawled twice)."""
event = get_perception_service().get_event(event_id)
if not event:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
if not event.get("change_summary"):
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="No diff available for this event")
return {
"event_id": event_id,
"change_summary": event.get("change_summary"),
"changed_sections": event.get("changed_sections") or [],
"previous_hash": event.get("previous_hash"),
"content_hash": event.get("content_hash"),
}

View File

@@ -5,10 +5,12 @@ from __future__ import annotations
import json
from typing import AsyncGenerator
from fastapi import APIRouter
from fastapi import APIRouter, Depends
from fastapi.responses import StreamingResponse
from app.api.dependencies.auth import get_current_user
from app.config.settings import settings
from app.domain.auth.models import UserClaims
from app.schemas.rag import RagChatRequest, QuickQuestionsResponse, QuickQuestion
from app.shared.async_utils import iter_in_thread
from app.shared.bootstrap import get_agent_conversation_service
@@ -27,7 +29,10 @@ _DEFAULT_QUICK_QUESTIONS = [
@router.post("/chat")
async def rag_chat(request: RagChatRequest):
async def rag_chat(
request: RagChatRequest,
current_user: UserClaims = Depends(get_current_user),
):
"""Stream RAG Q&A using the real agent service."""
session_id, event_stream = get_agent_conversation_service().stream_chat(
query=request.query,

View File

@@ -0,0 +1 @@
"""Compliance application layer."""

View File

@@ -0,0 +1,370 @@
"""Compliance analysis pipeline helpers.
All functions are synchronous — call them via asyncio.to_thread() in async SSE generators.
"""
from __future__ import annotations
import asyncio
import json
import os
import re
import tempfile
from typing import TYPE_CHECKING
from loguru import logger
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
# Shared retry policy for LLM calls: 3 attempts, exponential back-off 14 s.
_llm_retry = retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=4),
retry=retry_if_exception_type((ValueError, TimeoutError, ConnectionError)),
reraise=True,
)
if TYPE_CHECKING:
from app.application.knowledge import KnowledgeRetrievalService
from app.domain.retrieval import RetrievedChunk
from app.domain.compliance.ports import AnalysisRecord, FindingRecord
from app.services.llm.base_client import BaseLLMClient
def _extract_json(text: str):
"""Extract JSON from LLM response, tolerating markdown wrappers."""
stripped = text.strip()
match = re.search(r"```(?:json)?\s*([\s\S]*?)```", stripped)
if match:
stripped = match.group(1).strip()
try:
return json.loads(stripped)
except json.JSONDecodeError:
pass
for pattern in (r"(\[[\s\S]*\])", r"(\{[\s\S]*\})"):
m = re.search(pattern, stripped)
if m:
try:
return json.loads(m.group(1))
except json.JSONDecodeError:
continue
raise ValueError(f"No valid JSON found in LLM response: {text[:300]}")
def extract_text_from_doc_id(doc_id: str) -> str:
from app.shared.bootstrap import get_document_query_service, get_retrieval_service
doc = get_document_query_service().get(doc_id)
if not doc:
raise ValueError(f"Document '{doc_id}' not found")
service = get_retrieval_service()
chunks = service.retrieve(query=doc.doc_name, top_k=30)
doc_chunks = [c for c in chunks if c.doc_id == doc_id]
if not doc_chunks:
doc_chunks = chunks[:15]
return "\n\n".join(c.text for c in doc_chunks[:15])
def extract_text_from_file(content: bytes, filename: str) -> str:
from app.shared.bootstrap import get_document_command_service
suffix = os.path.splitext(filename or "doc.pdf")[1] or ".pdf"
tmp_path = ""
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(content)
tmp_path = tmp.name
service = get_document_command_service()
parsed = service.parser.parse(file_path=tmp_path, doc_id="tmp_analysis", doc_name=filename)
if parsed.raw_text:
return parsed.raw_text[:4000]
return "\n".join(
b.get("text", "") for b in parsed.semantic_blocks[:30] if b.get("text")
)[:4000]
except Exception as exc:
logger.warning("File text extraction failed: {}", exc)
return ""
finally:
if tmp_path:
try: os.unlink(tmp_path)
except OSError: pass
def split_into_clauses(text: str, client: "BaseLLMClient") -> list[str]:
prompt = (
"You are a compliance analysis expert. Split the following text into 3-8 "
"semantically complete compliance clauses. Each clause should be an independent "
"compliance requirement or technical statement.\n"
"Return as JSON array of strings, e.g.:\n"
'["Clause one...", "Clause two..."]\n'
"Return ONLY the JSON array.\n\n"
f"Text:\n{text[:2000]}"
)
response = client.chat([{"role": "user", "content": prompt}], max_tokens=1000)
if response.is_success:
try:
result = _extract_json(response.content)
if isinstance(result, list):
clauses = [str(c).strip() for c in result if str(c).strip()]
if clauses:
return clauses[:8]
except (ValueError, TypeError):
logger.warning("Clause split JSON parse failed, using fallback")
sentences = re.split(r"[.?!;\n]+", text)
return [s.strip() for s in sentences if len(s.strip()) > 20][:6]
def retrieve_for_clause(
clause: str,
retrieval_service: "KnowledgeRetrievalService",
top_k: int = 5,
domains: str | None = None,
) -> list["RetrievedChunk"]:
return retrieval_service.retrieve(query=clause, top_k=top_k, filters=domains)
def process_single_clause(
clause: str,
index: int,
retrieval_service: "KnowledgeRetrievalService",
client: "BaseLLMClient",
top_k: int = 5,
domains: str | None = None,
) -> dict:
"""Process one clause: retrieve relevant regulations then check compliance.
Returns a dict with keys: index, chunks, finding (may be None on LLM failure).
Designed to run inside asyncio.to_thread() for parallel execution.
"""
chunks = retrieve_for_clause(clause, retrieval_service, top_k, domains)
finding = check_clause_compliance(clause, chunks, client)
return {"index": index, "chunks": chunks, "finding": finding}
async def run_clauses_parallel(
clauses: list[str],
retrieval_service: "KnowledgeRetrievalService",
client: "BaseLLMClient",
top_k: int = 5,
domains: str | None = None,
) -> list[dict]:
"""Run all clauses through retrieve+gap-check in parallel.
Results are returned in the original clause order even though processing
is concurrent. Exceptions in individual clauses are caught and returned as
dicts with finding=None so the stream continues for remaining clauses.
Both retrieval_service and client must be thread-safe — they are shared
across all asyncio.to_thread() calls without locking.
"""
tasks = [
asyncio.to_thread(
process_single_clause,
clause, i, retrieval_service, client, top_k, domains,
)
for i, clause in enumerate(clauses)
]
raw = await asyncio.gather(*tasks, return_exceptions=True)
results = []
for i, r in enumerate(raw):
if isinstance(r, Exception):
logger.warning("Clause {} processing failed: {}", i, r)
results.append({"index": i, "chunks": [], "finding": None})
else:
results.append(r)
return results
def check_clause_compliance(
clause: str,
chunks: list["RetrievedChunk"],
client: "BaseLLMClient",
) -> dict | None:
reg_context = "\n".join(
f"[{i+1}] {c.doc_title} {c.section_title or ''}: {c.text[:300]}"
for i, c in enumerate(chunks[:5])
) if chunks else "(no regulatory context retrieved)"
prompt = (
"You are a compliance expert. Judge whether the following business clause "
"complies with the retrieved regulations.\n\n"
f"Business clause:\n{clause}\n\n"
f"Retrieved regulations:\n{reg_context}\n\n"
"Return JSON:\n"
"{\n"
' "status": "ok" | "warn" | "risk",\n'
' "title": "Short finding title (max 30 chars)",\n'
' "desc": "Description (50-120 chars)",\n'
' "clause_ref": "Regulation clause reference e.g. Art.9.1 or Sec.3.1"\n'
"}\n"
"status: ok=compliant, warn=gap exists, risk=critical/missing\n"
"Return ONLY the JSON object."
)
def _do_check():
resp = client.chat([{"role": "user", "content": prompt}], max_tokens=500)
if not resp.is_success:
raise ValueError("LLM returned non-success for gap check")
return resp
try:
response = _llm_retry(_do_check)()
except Exception as exc:
logger.warning("check_clause_compliance LLM call failed after retries: {}", exc)
return None
try:
result = _extract_json(response.content)
if isinstance(result, dict) and "status" in result:
return {
"title": str(result.get("title", "Compliance finding")),
"desc": str(result.get("desc", "")),
"status": result.get("status", "info"),
"clause_ref": result.get("clause_ref"),
}
except (ValueError, TypeError) as exc:
logger.warning("Gap check JSON parse failed: {}", exc)
return None
def synthesize_conclusion(
para_text: str,
findings: list[dict],
client: "BaseLLMClient",
) -> dict:
if not findings:
return {
"conclusion": "No significant compliance gaps found. Continue monitoring regulation updates.",
"actions": [{"label": "Next action", "value": "Monitor regulation updates"}],
"risk_score": 10,
"highlight_terms": [],
"para_text": para_text[:800],
}
findings_text = "\n".join(
f"- [{f['status'].upper()}] {f['title']}: {f['desc']}"
for f in findings
)
prompt = (
"You are a compliance analysis expert. Generate a summary report "
"based on the following compliance findings.\n\n"
f"Original text (first 600 chars):\n{para_text[:600]}\n\n"
f"Findings:\n{findings_text}\n\n"
"Return JSON:\n"
"{\n"
' "conclusion": "Overall compliance conclusion (100-200 chars)",\n'
' "actions": [\n'
' {"label": "Action label", "value": "Description"},\n'
' {"label": "Priority", "value": "High/Medium/Low", "risk": true}\n'
' ],\n'
' "risk_score": 0-100 (integer, higher=riskier),\n'
' "highlight_terms": ["term1", "term2"], // up to 10 key technical/legal terms actually present in the text\n'
' "para_text": "Original text or summary (max 600 chars)"\n'
"}\n"
"Return ONLY the JSON object."
)
fallback = {
"conclusion": "Compliance analysis complete. Review findings and create remediation plan.",
"actions": [
{"label": "Next action", "value": "Review critical findings"},
{"label": "Escalation", "value": "Legal review required", "risk": True},
],
"risk_score": 60,
"highlight_terms": [],
"para_text": para_text[:800],
}
def _do_synthesize():
resp = client.chat([{"role": "user", "content": prompt}], max_tokens=1200)
if not resp.is_success:
raise ValueError("LLM returned non-success for synthesis")
return resp
try:
response = _llm_retry(_do_synthesize)()
except Exception as exc:
logger.warning("synthesize_conclusion LLM call failed after retries: {}", exc)
return fallback
try:
result = _extract_json(response.content)
if isinstance(result, dict):
return {
"conclusion": str(result.get("conclusion", fallback["conclusion"])),
"actions": result.get("actions", fallback["actions"]),
"risk_score": int(result.get("risk_score", 60)),
"highlight_terms": result.get("highlight_terms", []),
"para_text": str(result.get("para_text", para_text[:800])),
}
except (ValueError, TypeError) as exc:
logger.warning("Conclusion synthesis JSON parse failed: {}", exc)
return fallback
_SUGGESTION_FOCUS = {
"risk": "Focus on remediation steps, required certifications, and timeline to resolve.",
"warn": "Focus on identifying the specific compliance gap and how to close it.",
"ok": "Focus on maintaining compliance evidence and monitoring future changes.",
}
_SUGGESTION_FALLBACK = {
"risk": [
"What specific certifications or documents are required to remediate this finding?",
"What is the typical remediation timeline for this type of non-compliance?",
"Which regulation clause defines the exact requirement?",
],
"warn": [
"What is the exact gap between the current state and the requirement?",
"What evidence would demonstrate partial compliance?",
"Which regulation clause applies to this warning?",
],
"ok": [
"What documentation should be maintained to evidence this compliance?",
"How should this area be monitored as regulations evolve?",
"Are there related clauses that may affect this compliant area?",
],
}
def build_finding_context(finding: "FindingRecord", analysis: "AnalysisRecord") -> str:
"""Build a grounded system context string for a finding chat thread.
Combines finding details with analysis metadata so the LLM has full
context without relying on the frontend to pass segment_context.
"""
return (
f"Document: {analysis.doc_name}\n"
f"Standard: {analysis.standard_name}\n"
f"Finding [{finding.seq + 1}]: {finding.title}\n"
f"Status: {finding.status}\n"
f"Clause reference: {finding.clause_ref or 'N/A'}\n"
f"Description: {finding.description}\n"
f"Overall conclusion: {analysis.conclusion}\n"
)
def generate_suggestions(
finding: "FindingRecord",
analysis: "AnalysisRecord",
client: "BaseLLMClient",
) -> list[str]:
"""Generate 3 context-aware follow-up questions for a finding chat thread.
Returns exactly 3 question strings. Falls back to static templates on error.
"""
fallback = _SUGGESTION_FALLBACK.get(finding.status, _SUGGESTION_FALLBACK["warn"])
context = build_finding_context(finding, analysis)
focus = _SUGGESTION_FOCUS.get(finding.status, _SUGGESTION_FOCUS["warn"])
prompt = (
f"{context}\n\n"
f"Task: {focus}\n"
"Generate exactly 3 concise follow-up questions a compliance analyst would ask.\n"
'Return JSON: {"questions": ["question 1", "question 2", "question 3"]}\n'
"Return ONLY the JSON object."
)
response = client.chat([{"role": "user", "content": prompt}], max_tokens=300)
if not response.is_success:
return fallback
try:
result = _extract_json(response.content)
questions = result.get("questions", [])
if isinstance(questions, list) and len(questions) >= 3:
return [str(q) for q in questions[:3]]
except (ValueError, TypeError) as exc:
logger.warning("generate_suggestions JSON parse failed: {}", exc)
return fallback

View File

@@ -277,7 +277,6 @@ class DocumentCommandService:
message="Document record created",
)
temp_path = ""
try:
self.binary_store.save(
object_name=object_name,
@@ -297,117 +296,20 @@ class DocumentCommandService:
stage="store",
message="Source file stored",
)
suffix = os.path.splitext(file_name)[1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
temp_file.write(content)
temp_path = temp_file.name
parsed_document = self.parser.parse(
file_path=temp_path,
# Delegate parse → embed → index to the shared processing method.
# This same method is invoked by the Celery worker for async processing.
return self._process_document(
doc_id=doc_id,
doc_name=final_doc_name,
)
self._safe_mark_run_parsed(doc_id=doc_id, run_id=run_id, parsed_document=parsed_document)
artifact_keys: dict[str, str] = {}
try:
artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document)
except Exception:
logger.warning("Parse artifact binary persistence failed for doc_id={}", doc_id)
self.document_repository.update_status(
doc_id,
DocumentStatus.PARSED,
parser_name=parsed_document.parser_name,
metadata={
"parser_backend": parsed_document.parser_name,
"parse_task_id": parsed_document.metadata.get("task_id", ""),
"layout_count": parsed_document.metadata.get("layout_count", len(parsed_document.raw_layouts)),
"structure_node_count": len(parsed_document.structure_nodes),
"semantic_block_count": len(parsed_document.semantic_blocks),
"vector_chunk_count": len(parsed_document.vector_chunks),
"artifact_keys": artifact_keys,
"processing_stage": "parsed",
},
)
current_status = DocumentStatus.PARSED
current_stage = "embed"
self._safe_replace_processing_artifacts(doc_id=doc_id, run_id=run_id, artifact_keys=artifact_keys)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=DocumentStatus.STORED.value,
to_status=DocumentStatus.PARSED.value,
stage="parse",
message="Document parsed",
metadata={"artifact_count": len(artifact_keys)},
)
if self.parse_artifact_store:
try:
self.parse_artifact_store.save(
doc_id,
parsed_document.structure_nodes,
parsed_document.semantic_blocks,
)
except Exception:
logger.warning("ParseArtifactStore.save failed for doc_id={}", doc_id)
chunks = self.chunk_builder.build(
parsed_document=parsed_document,
file_name=file_name,
final_doc_name=final_doc_name,
content=content,
regulation_type=regulation_type,
version=version,
)
if not chunks:
raise ValueError("解析完成但没有生成可入库的 chunks")
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
current_stage = "index"
inserted = self.vector_index.upsert(chunks, vectors)
if inserted != len(chunks):
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
health = self.vector_index.health()
self.document_repository.update_status(
doc_id,
DocumentStatus.INDEXED,
chunk_count=len(chunks),
summary="",
summary_latency_ms=0,
index_name=health.get("collection_name", ""),
metadata={
"index_collection": health.get("collection_name", ""),
"processing_stage": "indexed",
},
)
current_status = DocumentStatus.INDEXED
index_name = health.get("collection_name", "")
self._safe_mark_run_indexed(
doc_id=doc_id,
generate_summary=generate_summary,
run_id=run_id,
chunk_count=len(chunks),
index_name=index_name,
)
self._safe_append_status_event(
doc_id=doc_id,
run_id=run_id,
from_status=DocumentStatus.PARSED.value,
to_status=DocumentStatus.INDEXED.value,
stage="index",
message="Document indexed",
metadata={"chunk_count": len(chunks), "index_name": index_name},
)
stored = self.document_repository.get(doc_id)
return DocumentProcessResult(
doc_id=doc_id,
doc_name=final_doc_name,
status=(stored.status.value if stored else DocumentStatus.INDEXED.value),
message="处理成功",
num_chunks=len(chunks),
summary=stored.summary if stored else "",
summary_latency_ms=stored.summary_latency_ms if stored else 0,
)
except Exception as exc:
logger.exception("文档处理失败: doc_id={}", doc_id)
logger.exception("文档存储失败: doc_id={}", doc_id)
failure_stage = current_stage
self.document_repository.update_status(
doc_id,
@@ -439,6 +341,183 @@ class DocumentCommandService:
status=DocumentStatus.FAILED.value,
message=f"文档处理失败: {exc}",
)
def store_document(
self,
*,
doc_id: str | None = None,
file_name: str,
content: bytes,
content_type: str,
doc_name: str | None,
regulation_type: str,
version: str,
generate_summary: bool,
) -> tuple[str, str | None]:
"""Store the binary file and create the Document record.
Returns (doc_id, run_id). Does NOT parse, embed, or index.
This is the fast synchronous first step; processing is enqueued separately.
The caller is responsible for enqueuing the follow-up process_document_task.
"""
doc_id = doc_id or str(uuid.uuid4())[:8]
final_doc_name = doc_name or file_name
object_name = f"{doc_id}/{file_name}"
document = Document(
doc_id=doc_id,
doc_name=final_doc_name,
file_name=file_name,
object_name=object_name,
content_type=content_type,
size_bytes=len(content),
regulation_type=regulation_type,
version=version,
metadata={"generate_summary": generate_summary},
)
self.document_repository.create(document)
run_id = self._safe_create_processing_run(
doc_id=doc_id, trigger_type="upload", generate_summary=generate_summary
)
self.binary_store.save(
object_name=object_name, data=content,
content_type=content_type, metadata={"doc_id": doc_id},
)
self.document_repository.update_status(doc_id, DocumentStatus.STORED)
self._safe_mark_run_stored(doc_id=doc_id, run_id=run_id)
self._safe_append_status_event(
doc_id=doc_id, run_id=run_id,
from_status=DocumentStatus.PENDING.value, to_status=DocumentStatus.STORED.value,
stage="store", message="Source file stored",
)
return doc_id, run_id
def _process_document(
self,
*,
doc_id: str,
file_name: str,
final_doc_name: str,
content: bytes,
regulation_type: str,
version: str,
generate_summary: bool,
run_id: str | None = None,
) -> DocumentProcessResult:
"""Run parse → chunk → embed → index for a document that is already stored.
Called both synchronously (from upload_and_process) and asynchronously
(from the Celery process_document_task worker). All side-effects write
through DocumentProcessingStore so callers can poll progress.
"""
current_status = DocumentStatus.STORED
current_stage = "parse"
temp_path = ""
try:
suffix = os.path.splitext(file_name)[1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
temp_file.write(content)
temp_path = temp_file.name
parsed_document = self.parser.parse(
file_path=temp_path,
doc_id=doc_id,
doc_name=final_doc_name,
)
self._safe_mark_run_parsed(doc_id=doc_id, run_id=run_id, parsed_document=parsed_document)
artifact_keys: dict[str, str] = {}
try:
artifact_keys = self._save_parse_artifacts(doc_id=doc_id, parsed_document=parsed_document)
except Exception:
logger.warning("Parse artifact binary persistence failed for doc_id={}", doc_id)
self.document_repository.update_status(
doc_id,
DocumentStatus.PARSED,
parser_name=parsed_document.parser_name,
metadata={
"parser_backend": parsed_document.parser_name,
"parse_task_id": parsed_document.metadata.get("task_id", ""),
"layout_count": parsed_document.metadata.get("layout_count", len(parsed_document.raw_layouts)),
"structure_node_count": len(parsed_document.structure_nodes),
"semantic_block_count": len(parsed_document.semantic_blocks),
"vector_chunk_count": len(parsed_document.vector_chunks),
"artifact_keys": artifact_keys,
"processing_stage": "parsed",
},
)
current_status = DocumentStatus.PARSED
current_stage = "embed"
self._safe_replace_processing_artifacts(doc_id=doc_id, run_id=run_id, artifact_keys=artifact_keys)
self._safe_append_status_event(
doc_id=doc_id, run_id=run_id,
from_status=DocumentStatus.STORED.value, to_status=DocumentStatus.PARSED.value,
stage="parse", message="Document parsed", metadata={"artifact_count": len(artifact_keys)},
)
if self.parse_artifact_store:
try:
self.parse_artifact_store.save(
doc_id, parsed_document.structure_nodes, parsed_document.semantic_blocks,
)
except Exception:
logger.warning("ParseArtifactStore.save failed for doc_id={}", doc_id)
chunks = self.chunk_builder.build(
parsed_document=parsed_document,
regulation_type=regulation_type,
version=version,
)
if not chunks:
raise ValueError("解析完成但没有生成可入库的 chunks")
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
current_stage = "index"
inserted = self.vector_index.upsert(chunks, vectors)
if inserted != len(chunks):
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
health = self.vector_index.health()
index_name = health.get("collection_name", "")
self.document_repository.update_status(
doc_id, DocumentStatus.INDEXED,
chunk_count=len(chunks), summary="", summary_latency_ms=0,
index_name=index_name,
metadata={"index_collection": index_name, "processing_stage": "indexed"},
)
self._safe_mark_run_indexed(doc_id=doc_id, run_id=run_id, chunk_count=len(chunks), index_name=index_name)
self._safe_append_status_event(
doc_id=doc_id, run_id=run_id,
from_status=DocumentStatus.PARSED.value, to_status=DocumentStatus.INDEXED.value,
stage="index", message="Document indexed",
metadata={"chunk_count": len(chunks), "index_name": index_name},
)
stored = self.document_repository.get(doc_id)
return DocumentProcessResult(
doc_id=doc_id, doc_name=final_doc_name,
status=(stored.status.value if stored else DocumentStatus.INDEXED.value),
message="处理成功", num_chunks=len(chunks),
summary=stored.summary if stored else "",
summary_latency_ms=stored.summary_latency_ms if stored else 0,
)
except Exception as exc:
logger.exception("文档处理失败: doc_id={}", doc_id)
self.document_repository.update_status(
doc_id, DocumentStatus.FAILED, error_message=str(exc),
metadata={"failure_reason": str(exc), "processing_stage": "failed", "failure_stage": current_stage},
)
self._safe_mark_run_failed(
doc_id=doc_id, run_id=run_id, failure_stage=current_stage, error_message=str(exc)
)
self._safe_append_status_event(
doc_id=doc_id, run_id=run_id,
from_status=current_status.value, to_status=DocumentStatus.FAILED.value,
stage=current_stage, message=str(exc),
)
return DocumentProcessResult(
doc_id=doc_id, doc_name=final_doc_name,
status=DocumentStatus.FAILED.value, message=f"文档处理失败: {exc}",
)
finally:
if temp_path and os.path.exists(temp_path):
try:
@@ -446,7 +525,6 @@ class DocumentCommandService:
except OSError:
logger.warning("临时文件清理失败: {}", temp_path)
def delete(self, doc_id: str) -> bool:
"""Delete document record, binary file, and vector chunks."""
document = self.document_repository.get(doc_id)

View File

@@ -0,0 +1,147 @@
"""Orchestrates regulatory source crawlers and LLM enrichment pipeline."""
from __future__ import annotations
import hashlib
from typing import Any, Generator
from loguru import logger
from app.infrastructure.perception.base_event_store import BaseEventStore
from app.infrastructure.perception.crawlers.base import BaseCrawler, RawEvent
from app.infrastructure.perception.llm_pipeline import LlmPipeline
def _event_id(source: str, standard_code: str) -> str:
"""Deterministic 12-char ID from source + standard_code."""
return hashlib.sha256(f"{source}-{standard_code}".encode()).hexdigest()[:12]
def _content_hash(raw_text: str) -> str:
return hashlib.sha256(raw_text.encode()).hexdigest()
def _raw_to_dict(raw: RawEvent, event_id: str, content_hash: str) -> dict:
return {
"id": event_id,
"source": raw.source,
"source_label": raw.source_label,
"standard_code": raw.standard_code,
"title": raw.title,
"summary": raw.summary,
"full_text_url": raw.full_text_url,
"status": raw.status,
"impact_level": "medium",
"published_at": raw.published_at,
"effective_at": raw.effective_at,
"category": raw.category,
"tags": raw.tags,
"content_hash": content_hash,
"previous_hash": None,
}
class CrawlService:
"""Orchestrate crawlers, hash-based change detection, and LLM enrichment."""
def __init__(
self,
crawlers: dict[str, BaseCrawler],
event_store: BaseEventStore,
llm_pipeline: LlmPipeline,
retrieval_service: Any,
) -> None:
self._crawlers = crawlers
self._store = event_store
self._pipeline = llm_pipeline
self._retrieval = retrieval_service
def run_crawl(
self, sources: list[str] | None = None
) -> Generator[dict, None, None]:
"""Run crawl for selected sources. Yields SSE-ready progress dicts."""
targets = sources or list(self._crawlers.keys())
total_new = 0
total_updated = 0
for source_key in targets:
crawler = self._crawlers.get(source_key)
if not crawler:
yield {"event": "error", "data": f"Unknown source: {source_key}"}
continue
yield {"event": "progress", "data": {"source": source_key, "stage": "fetching"}}
try:
raw_events = crawler.fetch(limit=100)
except Exception as exc:
logger.exception("Crawler failed source={}", source_key)
yield {"event": "error", "data": {"source": source_key, "message": str(exc)}}
continue
yield {
"event": "progress",
"data": {"source": source_key, "stage": "processing", "fetched": len(raw_events)},
}
new_count = 0
updated_count = 0
for raw in raw_events:
eid = _event_id(raw.source, raw.standard_code)
new_hash = _content_hash(raw.raw_text or raw.title)
existing = self._store.get(eid)
if existing and existing.get("content_hash") == new_hash:
continue
is_update = existing is not None
old_text = existing.get("summary", "") if is_update else ""
previous_hash = existing.get("content_hash") if is_update else None
event_dict = _raw_to_dict(raw, eid, new_hash)
event_dict["previous_hash"] = previous_hash
try:
structure = self._pipeline.extract_structure(event_dict)
event_dict.update(structure)
except Exception as exc:
logger.warning("Structure extraction failed id={} err={}", eid, exc)
try:
affected = self._pipeline.assess_impact(event_dict, self._retrieval)
event_dict["affected_docs"] = affected
except Exception as exc:
logger.warning("Impact assessment failed id={} err={}", eid, exc)
if is_update and old_text and raw.raw_text:
try:
diff = self._pipeline.compute_diff(old_text, raw.raw_text)
event_dict["change_summary"] = diff.get("change_summary")
event_dict["changed_sections"] = diff.get("changed_sections")
except Exception as exc:
logger.warning("Diff failed id={} err={}", eid, exc)
self._store.upsert(event_dict)
if is_update:
updated_count += 1
else:
new_count += 1
total_new += new_count
total_updated += updated_count
yield {
"event": "progress",
"data": {
"source": source_key,
"stage": "done",
"new": new_count,
"updated": updated_count,
},
}
yield {
"event": "done",
"data": {"total_new": total_new, "total_updated": total_updated},
}

View File

@@ -6,7 +6,7 @@ import json
from typing import Generator
from app.application.knowledge.services import KnowledgeRetrievalService
from app.infrastructure.perception.mock_event_store import MockEventStore
from app.infrastructure.perception.base_event_store import BaseEventStore
from app.services.llm.llm_factory import get_llm_client
from app.config.settings import settings
@@ -22,7 +22,7 @@ class PerceptionService:
def __init__(
self,
event_store: MockEventStore,
event_store: BaseEventStore,
retrieval_service: KnowledgeRetrievalService,
) -> None:
self._store = event_store

View File

@@ -82,6 +82,22 @@ class Settings(BaseSettings):
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")
# When True, document processing is enqueued to Celery workers via Redis.
# When False (default), processing runs in a FastAPI BackgroundTask in the same process —
# no external worker needed. Switch to True only when a Celery worker is running.
use_celery_worker: bool = Field(default=False, description="使用 Celery Worker 异步处理文档 (需要 Worker 运行中)")
# ── Perception crawl ──────────────────────────────────────────────────────
perception_crawl_timeout_seconds: int = Field(
default=120, description="HTTP timeout for regulatory source crawlers."
)
perception_max_events_per_source: int = Field(
default=100, description="Maximum events fetched per source per crawl run."
)
perception_diff_similarity_threshold: float = Field(
default=0.85,
description="Cosine similarity below which a paragraph is flagged as changed.",
)
# Keep configuration setup explicit so runtime behavior is easy to reason about.
api_host: str = Field(default="0.0.0.0", description="API服务地址")
@@ -109,6 +125,7 @@ class Settings(BaseSettings):
rag_retrieval_top_k: int = Field(default=20, description="精排前召回候选数量reranker 启用时生效)")
rag_max_context_tokens: int = Field(default=2000, description="RAG最大上下文token数")
rag_summary_max_tokens: int = Field(default=10240, description="文档摘要最大token数")
rag_skills_max_tokens: int = Field(default=2048, description="技能类 RAG 最大 token 数")
reranker_enabled: bool = Field(default=False, description="是否启用 Cross-Encoder 精排")
reranker_base_url: str = Field(default="", description="Reranker API 地址")
@@ -124,6 +141,26 @@ class Settings(BaseSettings):
# Keep configuration setup explicit so runtime behavior is easy to reason about.
session_max_sessions: int = Field(default=100, description="最大会话数量")
session_timeout_minutes: int = Field(default=30, description="会话超时时间(分钟)")
session_backend: str = Field(
default="memory",
description="会话存储后端 (memory | redis)。redis 需要 Redis 可用。",
)
# ── Auth ──────────────────────────────────────────────────────────────────
# Generate a strong secret: python -c "import secrets; print(secrets.token_hex(32))"
auth_secret_key: str = Field(
default="change-me-in-production-must-be-32-or-more-characters-long",
description="JWT signing secret. MUST be changed in production.",
)
auth_algorithm: str = Field(default="HS256", description="JWT signing algorithm.")
auth_token_expire_minutes: int = Field(default=480, description="JWT TTL in minutes (default 8 hours).")
auth_enabled: bool = Field(default=True, description="Set False to bypass auth (development only).")
# ── CORS ──────────────────────────────────────────────────────────────────
cors_allow_origins: str = Field(
default="http://localhost:5173",
description="Comma-separated allowed CORS origins. Never use * in production.",
)
@lru_cache
def get_settings() -> Settings:

View File

@@ -0,0 +1,10 @@
"""Auth domain: role definitions and token claim models.
The domain layer defines what a user identity looks like (UserClaims) and
what roles exist (UserRole). Infrastructure details (JWT, bcrypt, PostgreSQL)
live under infrastructure/auth and never leak into this package.
"""
from .models import UserClaims, UserRole
__all__ = ["UserClaims", "UserRole"]

View File

@@ -0,0 +1,42 @@
"""Auth domain models: roles and token claims.
UserRole defines the four roles from PPT Slide 12.
UserClaims is what the JWT decodes to — it is the identity object passed
through FastAPI dependency injection to route handlers.
"""
from __future__ import annotations
import enum
from dataclasses import dataclass
class UserRole(str, enum.Enum):
"""Access roles mirroring the four-role RBAC matrix from the product spec.
ADMIN — full platform access including system management.
LEGAL — knowledge query, document review, compliance checks.
EHS — knowledge query, perception/regulatory signals.
READONLY — knowledge query only.
"""
ADMIN = "admin"
LEGAL = "legal"
EHS = "ehs"
READONLY = "readonly"
@dataclass
class UserClaims:
"""Decoded JWT payload representing an authenticated user.
Instances are created by JWTHandler.decode_token() and injected into
route handlers via the get_current_user FastAPI dependency.
"""
# Unique user identifier (UUID string stored in PostgreSQL users table).
user_id: str
# Display name used for audit log entries.
username: str
# Role determines which resources the user may access.
role: UserRole

View File

@@ -0,0 +1,66 @@
"""Domain ports for compliance history persistence."""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
@dataclass
class FindingRecord:
"""Single finding row linked to an analysis."""
id: str
analysis_id: str
seq: int
title: str
description: str
status: str # "ok" | "warn" | "risk"
clause_ref: Optional[str] = None
@dataclass
class AnalysisRecord:
"""Full compliance analysis record with nested findings."""
id: str # UUID string; empty string means not yet persisted
created_at: datetime
created_by: Optional[str]
doc_name: str
standard_name: str
risk_score: int
conclusion: str
actions: list # list[dict] — serialised action items
para_text: str
highlight_terms: list # list[str]
findings: list[FindingRecord] = field(default_factory=list)
class ComplianceRepository(ABC):
"""Port for persisting and retrieving compliance analysis records."""
@abstractmethod
def save_analysis(self, record: AnalysisRecord) -> str:
"""Persist a new analysis record and return the assigned UUID string."""
@abstractmethod
def list_analyses(self, limit: int = 50, offset: int = 0) -> list[AnalysisRecord]:
"""Return analyses ordered by created_at DESC, without nested findings."""
@abstractmethod
def get_analysis(self, analysis_id: str) -> Optional[AnalysisRecord]:
"""Return a single analysis with all nested findings, or None."""
@abstractmethod
def delete_analysis(self, analysis_id: str) -> None:
"""Delete an analysis and all related findings and chat messages (cascade)."""
@abstractmethod
def save_message(self, analysis_id: str, finding_id: str, role: str, content: str) -> str:
"""Persist a chat message and return its UUID string."""
@abstractmethod
def get_messages(self, finding_id: str) -> list[dict]:
"""Return chat messages for a finding ordered by created_at ASC.
Each dict has keys: id, role, content, created_at (ISO string).
"""

View File

@@ -0,0 +1,5 @@
"""JWT token creation and validation infrastructure.
JWTHandler is the only component in this package. It is wired through
shared/bootstrap.py and injected into FastAPI dependencies.
"""

View File

@@ -0,0 +1,82 @@
"""JWT access token creation and decoding.
Uses python-jose for HS256 token signing. Token expiry is enforced at
decode time so expired tokens are rejected even if the signature is valid.
"""
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from typing import Any
from jose import JWTError, jwt
from loguru import logger
from app.domain.auth.models import UserClaims, UserRole
class JWTHandler:
"""Create and validate HS256 JWT access tokens.
A single shared instance is wired by bootstrap.py. Use
get_jwt_handler() from shared.bootstrap for all token operations.
"""
def __init__(
self,
*,
secret_key: str,
algorithm: str = "HS256",
expire_minutes: int = 480,
) -> None:
"""Initialise the handler with signing credentials and token lifetime."""
self._secret = secret_key
self._algorithm = algorithm
self._expire_minutes = expire_minutes
def create_access_token(
self,
*,
user_id: str,
username: str,
role: str,
) -> str:
"""Return a signed JWT containing user identity and role claims."""
now = datetime.now(UTC)
payload: dict[str, Any] = {
"sub": user_id,
"username": username,
"role": role,
"iat": now,
"exp": now + timedelta(minutes=self._expire_minutes),
}
return jwt.encode(payload, self._secret, algorithm=self._algorithm)
def decode_token(self, token: str) -> UserClaims:
"""Decode and validate a JWT, returning UserClaims.
Raises ValueError with a descriptive message on expiry, tampering,
or any other validation failure so callers do not need to know jose.
"""
try:
payload = jwt.decode(token, self._secret, algorithms=[self._algorithm])
except JWTError as exc:
msg = str(exc).lower()
if "expired" in msg:
raise ValueError("Token expired") from exc
raise ValueError(f"Invalid token: {exc}") from exc
user_id = payload.get("sub")
username = payload.get("username", "")
role_str = payload.get("role", UserRole.READONLY.value)
if not user_id:
raise ValueError("Token missing subject claim")
try:
role = UserRole(role_str)
except ValueError:
logger.warning("Unknown role in token: {}, defaulting to readonly", role_str)
role = UserRole.READONLY
return UserClaims(user_id=user_id, username=username, role=role)

View File

@@ -0,0 +1,113 @@
"""PostgreSQL-backed user store for authentication.
Manages a `users` table with hashed passwords and roles.
Provides lookup by username for the login flow.
Table DDL is auto-applied on first connection.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
import psycopg2
import psycopg2.extras
from loguru import logger
from passlib.context import CryptContext
from app.config.settings import settings
# bcrypt context — work factor 12 is a good production default.
_PWD_CTX = CryptContext(schemes=["bcrypt"], deprecated="auto")
# DDL executed once to ensure the table exists.
_CREATE_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
username VARCHAR(100) UNIQUE NOT NULL,
hashed_pw TEXT NOT NULL,
role VARCHAR(50) NOT NULL DEFAULT 'readonly',
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
"""
@dataclass
class UserRecord:
"""A single row from the users table."""
id: str
username: str
hashed_pw: str
role: str
is_active: bool
class PostgresUserStore:
"""Read and verify users stored in the PostgreSQL users table.
The connection is opened on first use and shared for the lifetime
of the singleton instance wired by bootstrap.
"""
def __init__(self) -> None:
"""Initialise the store and ensure the users table exists."""
self._conn = psycopg2.connect(
host=settings.postgres_host,
port=settings.postgres_port,
user=settings.postgres_user,
password=settings.postgres_password,
dbname=settings.postgres_db,
cursor_factory=psycopg2.extras.RealDictCursor,
)
self._conn.autocommit = True
self._ensure_table()
def _ensure_table(self) -> None:
"""Create the users table if it does not already exist."""
with self._conn.cursor() as cur:
# Enable pgcrypto so gen_random_uuid() is available for UUID primary keys.
try:
cur.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;")
except Exception:
self._conn.rollback()
cur.execute(_CREATE_TABLE_SQL)
def get_by_username(self, username: str) -> Optional[UserRecord]:
"""Return a UserRecord for the given username, or None if not found."""
with self._conn.cursor() as cur:
cur.execute(
"SELECT id, username, hashed_pw, role, is_active "
"FROM users WHERE username = %s",
(username,),
)
row = cur.fetchone()
if row is None:
return None
return UserRecord(
id=str(row["id"]),
username=row["username"],
hashed_pw=row["hashed_pw"],
role=row["role"],
is_active=row["is_active"],
)
def verify_password(self, plain: str, hashed: str) -> bool:
"""Return True if `plain` matches the stored bcrypt hash."""
return _PWD_CTX.verify(plain, hashed)
def authenticate(self, username: str, password: str) -> Optional[UserRecord]:
"""Return the UserRecord if credentials are valid, else None."""
user = self.get_by_username(username)
if user is None or not user.is_active:
return None
if not self.verify_password(password, user.hashed_pw):
return None
return user
@staticmethod
def hash_password(plain: str) -> str:
"""Hash a plain-text password with bcrypt."""
return _PWD_CTX.hash(plain)

View File

@@ -0,0 +1,101 @@
"""DOCX report generator for compliance analysis results.
Uses python-docx (already in requirements.txt). Returns raw bytes so the
caller can stream the response without writing to disk.
"""
from __future__ import annotations
from datetime import datetime, timezone
from io import BytesIO
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from app.domain.compliance.ports import AnalysisRecord
_STATUS_LABEL = {"ok": "Compliant", "warn": "Warning", "risk": "Non-Compliant"}
_STATUS_COLOR = {
"ok": RGBColor(0x22, 0x8B, 0x22),
"warn": RGBColor(0xFF, 0x8C, 0x00),
"risk": RGBColor(0xDC, 0x14, 0x3C),
}
def generate_docx(record: AnalysisRecord) -> bytes:
"""Generate a compliance report DOCX and return its raw bytes.
Structure:
- Cover: document name, standard, date, risk score
- Executive summary (conclusion)
- Findings table
- Recommended actions
- Footer note
"""
doc = Document()
# ── Cover ──────────────────────────────────────────────────────────────────
title_para = doc.add_heading("Compliance Analysis Report", level=0)
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_paragraph("")
meta_table = doc.add_table(rows=4, cols=2)
meta_table.style = "Table Grid"
labels = ["Document", "Standard", "Date", "Risk Score"]
values = [
record.doc_name,
record.standard_name,
record.created_at.strftime("%Y-%m-%d %H:%M UTC") if record.created_at else "",
f"{record.risk_score} / 100",
]
for i, (label, value) in enumerate(zip(labels, values)):
meta_table.cell(i, 0).text = label
meta_table.cell(i, 1).text = value
# ── Executive Summary ──────────────────────────────────────────────────────
doc.add_heading("Executive Summary", level=1)
doc.add_paragraph(record.conclusion)
# ── Findings ───────────────────────────────────────────────────────────────
doc.add_heading("Findings", level=1)
if record.findings:
table = doc.add_table(rows=1, cols=4)
table.style = "Table Grid"
hdr = table.rows[0].cells
for i, h in enumerate(["#", "Status", "Title", "Description / Clause"]):
hdr[i].text = h
for run in hdr[i].paragraphs[0].runs:
run.bold = True
for f in record.findings:
row = table.add_row().cells
row[0].text = str(f.seq + 1)
row[1].text = _STATUS_LABEL.get(f.status, f.status)
row[2].text = f.title
desc = f.description
if f.clause_ref:
desc += f"\n[{f.clause_ref}]"
row[3].text = desc
else:
doc.add_paragraph("No findings recorded.")
# ── Recommended Actions ────────────────────────────────────────────────────
doc.add_heading("Recommended Actions", level=1)
for i, action in enumerate(record.actions, start=1):
label = action.get("label", "Action")
value = action.get("value", "")
doc.add_paragraph(f"{i}. {label}: {value}", style="List Number")
# ── Footer note ────────────────────────────────────────────────────────────
doc.add_paragraph("")
footer = doc.add_paragraph(
f"Generated by AI Regulation Analysis System — {datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
footer.alignment = WD_ALIGN_PARAGRAPH.CENTER
for run in footer.runs:
run.font.size = Pt(8)
run.font.color.rgb = RGBColor(0x88, 0x88, 0x88)
buf = BytesIO()
doc.save(buf)
return buf.getvalue()

View File

@@ -0,0 +1,280 @@
# backend/app/infrastructure/compliance/repository.py
"""PostgreSQL-backed compliance analysis repository.
Follows the same psycopg2 pattern as PostgresDocumentRepository:
ThreadedConnectionPool + RealDictCursor for reads, _ensure_schema on init.
"""
from __future__ import annotations
import json
from contextlib import contextmanager
from datetime import datetime
from typing import Optional
import psycopg2
import psycopg2.extras
import psycopg2.pool
from loguru import logger
from app.domain.compliance.ports import (
AnalysisRecord,
ComplianceRepository,
FindingRecord,
)
class PostgresComplianceRepository(ComplianceRepository):
"""Stores compliance analyses, findings, and finding chat messages in PostgreSQL."""
def __init__(
self,
host: str,
port: int,
user: str,
password: str,
dbname: str,
minconn: int = 1,
maxconn: int = 5,
) -> None:
self._pool = psycopg2.pool.ThreadedConnectionPool(
minconn=minconn,
maxconn=maxconn,
host=host,
port=port,
user=user,
password=password,
dbname=dbname,
)
self._ensure_schema()
@contextmanager
def _conn(self):
conn = self._pool.getconn()
try:
yield conn
finally:
self._pool.putconn(conn)
def _ensure_schema(self) -> None:
"""Create tables if they do not exist."""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS compliance_analyses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_by VARCHAR(255),
doc_name VARCHAR(500),
standard_name VARCHAR(500),
risk_score INTEGER,
conclusion TEXT,
actions JSONB,
para_text TEXT,
highlight_terms JSONB
);
""")
cur.execute("""
CREATE TABLE IF NOT EXISTS compliance_findings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
analysis_id UUID NOT NULL REFERENCES compliance_analyses(id) ON DELETE CASCADE,
seq INTEGER NOT NULL,
title VARCHAR(500),
description TEXT,
status VARCHAR(50),
clause_ref VARCHAR(200)
);
""")
cur.execute("""
CREATE TABLE IF NOT EXISTS finding_chat_messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
analysis_id UUID NOT NULL REFERENCES compliance_analyses(id) ON DELETE CASCADE,
finding_id UUID NOT NULL REFERENCES compliance_findings(id) ON DELETE CASCADE,
role VARCHAR(20) NOT NULL,
content TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
""")
conn.commit()
def save_analysis(self, record: AnalysisRecord) -> str:
"""Insert analysis + findings; return the new analysis UUID."""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"""
INSERT INTO compliance_analyses
(created_by, doc_name, standard_name, risk_score,
conclusion, actions, para_text, highlight_terms)
VALUES
(%(created_by)s, %(doc_name)s, %(standard_name)s, %(risk_score)s,
%(conclusion)s, %(actions)s, %(para_text)s, %(highlight_terms)s)
RETURNING id
""",
{
"created_by": record.created_by,
"doc_name": record.doc_name,
"standard_name": record.standard_name,
"risk_score": record.risk_score,
"conclusion": record.conclusion,
"actions": json.dumps(record.actions, ensure_ascii=False),
"para_text": record.para_text,
"highlight_terms": json.dumps(record.highlight_terms, ensure_ascii=False),
},
)
row = cur.fetchone()
analysis_id = str(row["id"])
if record.findings:
with conn.cursor() as cur:
for f in record.findings:
cur.execute(
"""
INSERT INTO compliance_findings
(analysis_id, seq, title, description, status, clause_ref)
VALUES
(%(analysis_id)s, %(seq)s, %(title)s, %(desc)s, %(status)s, %(clause_ref)s)
""",
{
"analysis_id": analysis_id,
"seq": f.seq,
"title": f.title,
"desc": f.description,
"status": f.status,
"clause_ref": f.clause_ref,
},
)
conn.commit()
return analysis_id
def list_analyses(self, limit: int = 50, offset: int = 0) -> list[AnalysisRecord]:
"""Return analyses without nested findings, ordered newest first."""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"""
SELECT id, created_at, created_by, doc_name, standard_name,
risk_score, conclusion, actions, para_text, highlight_terms
FROM compliance_analyses
ORDER BY created_at DESC
LIMIT %(limit)s OFFSET %(offset)s
""",
{"limit": limit, "offset": offset},
)
rows = cur.fetchall()
return [self._row_to_record(dict(r)) for r in rows]
def get_analysis(self, analysis_id: str) -> Optional[AnalysisRecord]:
"""Return analysis with nested findings list."""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"SELECT * FROM compliance_analyses WHERE id = %(id)s",
{"id": analysis_id},
)
row = cur.fetchone()
if not row:
return None
record = self._row_to_record(dict(row))
cur.execute(
"""
SELECT id, analysis_id, seq, title, description, status, clause_ref
FROM compliance_findings
WHERE analysis_id = %(id)s
ORDER BY seq
""",
{"id": analysis_id},
)
findings = [
FindingRecord(
id=str(r["id"]),
analysis_id=str(r["analysis_id"]),
seq=r["seq"],
title=r["title"] or "",
description=r["description"] or "",
status=r["status"] or "ok",
clause_ref=r["clause_ref"],
)
for r in cur.fetchall()
]
record.findings = findings
return record
def delete_analysis(self, analysis_id: str) -> None:
"""Delete analysis; findings and chat messages cascade automatically."""
with self._conn() as conn:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM compliance_analyses WHERE id = %(id)s",
{"id": analysis_id},
)
conn.commit()
def save_message(self, analysis_id: str, finding_id: str, role: str, content: str) -> str:
"""Persist a chat message; return its UUID."""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"""
INSERT INTO finding_chat_messages
(analysis_id, finding_id, role, content)
VALUES
(%(analysis_id)s, %(finding_id)s, %(role)s, %(content)s)
RETURNING id
""",
{
"analysis_id": analysis_id,
"finding_id": finding_id,
"role": role,
"content": content,
},
)
row = cur.fetchone()
conn.commit()
return str(row["id"])
def get_messages(self, finding_id: str) -> list[dict]:
"""Return messages for a finding, oldest first."""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"""
SELECT id, role, content, created_at
FROM finding_chat_messages
WHERE finding_id = %(finding_id)s
ORDER BY created_at ASC
""",
{"finding_id": finding_id},
)
rows = cur.fetchall()
return [
{
"id": str(r["id"]),
"role": r["role"],
"content": r["content"],
"created_at": r["created_at"].isoformat() if r["created_at"] else "",
}
for r in rows
]
def _row_to_record(self, row: dict) -> AnalysisRecord:
"""Convert a RealDictCursor row to an AnalysisRecord (no findings)."""
actions = row.get("actions") or []
if isinstance(actions, str):
actions = json.loads(actions)
highlight_terms = row.get("highlight_terms") or []
if isinstance(highlight_terms, str):
highlight_terms = json.loads(highlight_terms)
return AnalysisRecord(
id=str(row["id"]),
created_at=row["created_at"] if isinstance(row["created_at"], datetime) else datetime.utcnow(),
created_by=row.get("created_by"),
doc_name=row.get("doc_name") or "",
standard_name=row.get("standard_name") or "",
risk_score=int(row.get("risk_score") or 0),
conclusion=row.get("conclusion") or "",
actions=actions,
para_text=row.get("para_text") or "",
highlight_terms=highlight_terms,
findings=[],
)

View File

@@ -0,0 +1,39 @@
"""Abstract base class for regulatory event stores."""
from __future__ import annotations
from abc import ABC, abstractmethod
class BaseEventStore(ABC):
"""Port interface for regulatory event persistence."""
@abstractmethod
def all(self) -> list[dict]:
"""Return all events, most-recent first."""
@abstractmethod
def get(self, event_id: str) -> dict | None:
"""Return a single event by ID, or None."""
@abstractmethod
def filter(
self,
*,
source: str | None = None,
impact_level: str | None = None,
limit: int = 50,
) -> list[dict]:
"""Return filtered events sorted by published_at descending."""
@abstractmethod
def stats(self) -> dict:
"""Return {total, high_impact, medium_impact, low_impact, recent_90d}."""
@abstractmethod
def upsert(self, event: dict) -> None:
"""Insert or update an event record."""
@abstractmethod
def get_by_standard_code(self, standard_code: str) -> dict | None:
"""Return the most-recent event with matching standard_code, or None."""

View File

@@ -0,0 +1,43 @@
"""Shared utility functions for crawlers."""
from __future__ import annotations
import re
from datetime import date
def parse_date(text: str) -> str:
"""Return YYYY-MM-DD from common Chinese date formats, or today's date."""
text = text.strip()
if not text:
return date.today().isoformat()
m = re.search(r"(\d{4})[/-](\d{1,2})[/-](\d{1,2})", text)
if m:
try:
return date(int(m.group(1)), int(m.group(2)), int(m.group(3))).isoformat()
except ValueError:
pass
m2 = re.search(r"(\d{4})年(\d{1,2})月(\d{1,2})日?", text)
if m2:
try:
return date(int(m2.group(1)), int(m2.group(2)), int(m2.group(3))).isoformat()
except ValueError:
pass
return date.today().isoformat()
def extract_tags(standard_code: str, title: str) -> list[str]:
"""Derive simple keyword tags from standard code and title."""
tags: list[str] = []
code_upper = standard_code.upper()
if "GB" in code_upper:
tags.append("国家标准")
if "/T" in code_upper:
tags.append("推荐性")
else:
tags.append("强制性")
keywords = ["电动", "安全", "自动驾驶", "充电", "智能网联", "碰撞", "排放", "网络安全"]
for kw in keywords:
if kw in title:
tags.append(kw)
return tags[:5]

View File

@@ -0,0 +1,32 @@
"""Shared contracts for regulatory source crawlers."""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
@dataclass
class RawEvent:
"""Raw regulatory event returned by a crawler before enrichment."""
source: str
source_label: str
standard_code: str
title: str
summary: str
full_text_url: str
status: str # 'enacted' | 'draft' | 'consultation'
published_at: str # YYYY-MM-DD string
effective_at: str | None
category: str
tags: list[str] = field(default_factory=list)
raw_text: str = "" # full crawled text for hashing + LLM
class BaseCrawler(ABC):
"""Abstract regulatory source crawler."""
@abstractmethod
def fetch(self, limit: int = 50) -> list[RawEvent]:
"""Fetch up to `limit` recent events from the data source."""

View File

@@ -0,0 +1,83 @@
"""Crawler for CATARC automotive standard catalogue."""
from __future__ import annotations
from urllib.parse import urljoin
import httpx
from bs4 import BeautifulSoup
from loguru import logger
from app.infrastructure.perception.crawlers.base import BaseCrawler, RawEvent
from ._utils import extract_tags, parse_date
_BASE_URL = "https://www.catarc.org.cn/bzzxd/qcbz/index.html"
_HOST = "https://www.catarc.org.cn"
_STATUS_MAP = {
"现行": "enacted",
"即将实施": "enacted",
"废止": "enacted",
"征求意见": "consultation",
"报批": "draft",
}
class CatarcCrawler(BaseCrawler):
"""Scrape the CATARC automotive standard list page."""
def fetch(self, limit: int = 50) -> list[RawEvent]:
events: list[RawEvent] = []
page = 1
max_pages = max(10, limit)
while len(events) < limit and page <= max_pages:
url = f"{_BASE_URL}?page={page}"
try:
resp = httpx.get(url, timeout=30, follow_redirects=True)
resp.raise_for_status()
except Exception as exc:
logger.warning("CATARC fetch failed page={} err={}", page, exc)
break
soup = BeautifulSoup(resp.text, "lxml")
rows = soup.select("table tr")
if not rows:
break
batch: list[RawEvent] = []
for row in rows:
cells = row.find_all("td")
if len(cells) < 3:
continue
link = cells[0].find("a")
standard_code = link.get_text(strip=True) if link else cells[0].get_text(strip=True)
title = cells[1].get_text(strip=True) if len(cells) > 1 else standard_code
date_text = cells[2].get_text(strip=True) if len(cells) > 2 else ""
published_at = parse_date(date_text)
status_text = cells[3].get_text(strip=True) if len(cells) > 3 else ""
status = _STATUS_MAP.get(status_text, "enacted")
detail_url = urljoin(_HOST, link["href"]) if link and link.get("href") else url
raw_text = f"{standard_code} {title}"
batch.append(RawEvent(
source="CATARC",
source_label="全国汽车标准化技术委员会",
standard_code=standard_code,
title=title,
summary=title,
full_text_url=detail_url,
status=status,
published_at=published_at,
effective_at=None,
category="汽车标准",
tags=extract_tags(standard_code, title),
raw_text=raw_text,
))
if not batch:
break
events.extend(batch)
page += 1
return events[:limit]

View File

@@ -0,0 +1,117 @@
"""Crawler for EUR-Lex RSS feeds covering EU AI Act and automotive regulations."""
from __future__ import annotations
import re
from email.utils import parsedate_to_datetime
import httpx
from bs4 import BeautifulSoup
from loguru import logger
from app.infrastructure.perception.crawlers.base import BaseCrawler, RawEvent
from ._utils import parse_date
_EURLEX_RSS_URLS = [
"https://eur-lex.europa.eu/rss-feed/OJ-L.rss",
]
_AUTOMOTIVE_KEYWORDS = [
"vehicle", "automotive", "motor", "tyre", "emission", "ADAS", "autonomous",
"AI Act", "artificial intelligence", "cybersecurity", "software update",
"R155", "R156", "汽车", "车辆",
]
_AUTOMOTIVE_KEYWORDS_LOWER = [kw.lower() for kw in _AUTOMOTIVE_KEYWORDS]
def _is_automotive_relevant(title: str, description: str) -> bool:
combined = (title + " " + description).lower()
return any(kw in combined for kw in _AUTOMOTIVE_KEYWORDS_LOWER)
def _extract_celex(url: str) -> str:
m = re.search(r"CELEX[:/]([0-9A-Z]+)", url)
return m.group(1) if m else ""
def _parse_rss_date(rfc2822: str) -> str:
try:
dt = parsedate_to_datetime(rfc2822)
return dt.date().isoformat()
except Exception:
return parse_date(rfc2822)
class EurlexCrawler(BaseCrawler):
"""Fetch automotive-relevant EU regulations from EUR-Lex RSS feeds."""
def fetch(self, limit: int = 50) -> list[RawEvent]:
events: list[RawEvent] = []
for rss_url in _EURLEX_RSS_URLS:
if len(events) >= limit:
break
try:
resp = httpx.get(rss_url, timeout=30, follow_redirects=True)
resp.raise_for_status()
except Exception as exc:
logger.warning("EUR-Lex RSS fetch failed url={} err={}", rss_url, exc)
continue
soup = BeautifulSoup(resp.content, "lxml-xml")
for item in soup.find_all("item"):
if len(events) >= limit:
break
title_tag = item.find("title")
title = title_tag.get_text(strip=True) if title_tag else ""
desc_tag = item.find("description")
description = desc_tag.get_text(strip=True) if desc_tag else ""
link_tag = item.find("link")
link = link_tag.get_text(strip=True) if link_tag else ""
pub_date_tag = item.find("pubDate")
pub_date = pub_date_tag.get_text(strip=True) if pub_date_tag else ""
if not _is_automotive_relevant(title, description):
continue
celex = _extract_celex(link)
standard_code = celex if celex else title[:60]
published_at = _parse_rss_date(pub_date) if pub_date else ""
events.append(RawEvent(
source="EUR-Lex",
source_label="欧盟官方公报",
standard_code=standard_code,
title=title,
summary=description[:500],
full_text_url=link,
status="enacted",
published_at=published_at,
effective_at=None,
category="EU法规",
tags=_extract_eurlex_tags(title, description),
raw_text=f"{title}\n{description}",
))
return events[:limit]
def _extract_eurlex_tags(title: str, description: str) -> list[str]:
combined = title + " " + description
tag_map = {
"AI Act": "EU AI Act",
"artificial intelligence": "EU AI Act",
"R155": "UN R155",
"R156": "UN R156",
"cybersecurity": "网络安全",
"emission": "排放",
"autonomous": "自动驾驶",
"ADAS": "ADAS",
}
combined_lower = combined.lower()
tags = []
for kw, tag in tag_map.items():
if kw.lower() in combined_lower:
tags.append(tag)
return tags[:5]

View File

@@ -0,0 +1,92 @@
"""Crawlers for the 国标委 (SAMR) standard information platform."""
from __future__ import annotations
import httpx
from loguru import logger
from app.infrastructure.perception.crawlers.base import BaseCrawler, RawEvent
from ._utils import extract_tags, parse_date
_BASE_URL = "https://openstd.samr.gov.cn/bzgk/std/std_list_type"
_HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; RegulatoryBot/1.0)"}
def _fetch_page(std_type: int, page: int, page_size: int) -> list[dict]:
params = {
"p.p1": std_type,
"p.p2": "",
"p.p90": "circulation_date",
"p.p91": "desc",
"p.p6": page,
"p.p7": page_size,
}
try:
resp = httpx.get(_BASE_URL, params=params, headers=_HEADERS, timeout=30)
resp.raise_for_status()
data = resp.json()
return data.get("rows", []) or []
except Exception as exc:
logger.warning("国标委 fetch failed type={} page={} err={}", std_type, page, exc)
return []
def _row_to_raw_event(row: dict, source_label: str) -> RawEvent:
standard_code = row.get("std_code", "")
title = row.get("std_name", standard_code)
published_at = parse_date(row.get("release_date", ""))
effective_at_raw = row.get("implement_date", "")
effective_at = parse_date(effective_at_raw) if effective_at_raw else None
status_text = row.get("std_status", "")
if "征求意见" in status_text:
status = "consultation"
elif "报批" in status_text or "草案" in status_text:
status = "draft"
else:
status = "enacted"
return RawEvent(
source="国标委",
source_label=source_label,
standard_code=standard_code,
title=title,
summary=title,
full_text_url=f"https://openstd.samr.gov.cn/bzgk/std/detail?id={row.get('id', '')}",
status=status,
published_at=published_at,
effective_at=effective_at,
category=row.get("std_type", "国家标准"),
tags=extract_tags(standard_code, title),
raw_text=f"{standard_code} {title}",
)
class GuobiaoMandatoryCrawler(BaseCrawler):
"""Fetch mandatory national standards (强制性) related to vehicles."""
def fetch(self, limit: int = 50) -> list[RawEvent]:
events: list[RawEvent] = []
page = 1
max_pages = max(10, limit)
while len(events) < limit and page <= max_pages:
rows = _fetch_page(std_type=1, page=page, page_size=20)
if not rows:
break
events.extend(_row_to_raw_event(r, "国标委·强制性") for r in rows)
page += 1
return events[:limit]
class GuobiaoRecommendedCrawler(BaseCrawler):
"""Fetch recommended national standards (推荐性) related to vehicles."""
def fetch(self, limit: int = 50) -> list[RawEvent]:
events: list[RawEvent] = []
page = 1
max_pages = max(10, limit)
while len(events) < limit and page <= max_pages:
rows = _fetch_page(std_type=2, page=page, page_size=20)
if not rows:
break
events.extend(_row_to_raw_event(r, "国标委·推荐性") for r in rows)
page += 1
return events[:limit]

View File

@@ -0,0 +1,241 @@
"""LLM-driven pipeline for regulatory event enrichment."""
from __future__ import annotations
import json
import math
from typing import Any
from loguru import logger
from app.config.settings import settings
from app.infrastructure.embedding.openai_compatible_embedding_provider import (
OpenAICompatibleEmbeddingProvider,
)
from app.services.llm.llm_factory import get_llm_client
_EXTRACT_SYSTEM = (
"You are a regulatory compliance expert specialising in automotive standards "
"(GB, UN-ECE, ISO, EU). Extract structured information from regulation text. "
"Return valid JSON only — no markdown fences, no extra keys."
)
_ASSESS_SYSTEM = (
"You are an automotive compliance analyst. Given a regulation and related document excerpts, "
"identify which documents are affected and what actions are required. "
"Return a JSON array only."
)
_DIFF_SYSTEM = (
"You are a regulatory change analyst. Given an old and new version of a regulation paragraph, "
"classify the type of change and summarise it. "
"Return JSON only: {\"change_type\": \"tightened|relaxed|added|removed\", \"summary\": \"...\"}"
)
_SIMILARITY_THRESHOLD = 0.85
def _cosine(a: list[float], b: list[float]) -> float:
dot = sum(x * y for x, y in zip(a, b))
norm_a = math.sqrt(sum(x * x for x in a))
norm_b = math.sqrt(sum(x * x for x in b))
if norm_a == 0 or norm_b == 0:
return 0.0
return dot / (norm_a * norm_b)
def _llm_json(client: Any, messages: list[dict]) -> Any:
"""Call LLM and parse JSON response; return None on failure."""
try:
resp = client.chat(messages)
text = (resp.content or "").strip()
if text.startswith("```"):
text = text.split("```")[1]
if text.startswith("json"):
text = text[4:]
return json.loads(text)
except Exception as exc:
logger.warning("LLM JSON parse failed: {}", exc)
return None
class LlmPipeline:
"""Three-step enrichment pipeline for crawled regulatory events."""
def __init__(self) -> None:
self._client = get_llm_client(
provider=settings.llm_provider,
model=settings.llm_model,
)
self._embedder = OpenAICompatibleEmbeddingProvider()
# ------------------------------------------------------------------
# Step 1: Structure extraction
# ------------------------------------------------------------------
def extract_structure(self, event: dict) -> dict:
"""Extract obligations, deadlines, scope, penalties, impact_level from event text."""
prompt = f"""Extract structured compliance information from this regulation:
Standard: {event.get('standard_code', '')}
Title: {event.get('title', '')}
Source: {event.get('source_label', '')}
Summary: {event.get('summary', '')}
Tags: {', '.join(event.get('tags') or [])}
Return JSON with exactly these keys:
{{
"obligations": [{{"text": "...", "deontic": "must|shall|may|prohibited", "subject": "...", "object": "...", "condition": ""}}],
"deadlines": [{{"date": "YYYY-MM-DD or null", "description": "..."}}],
"scope": "one sentence describing who/what this applies to",
"penalties": "one sentence on consequences of non-compliance, or null",
"impact_level": "high|medium|low"
}}"""
messages = [
{"role": "system", "content": _EXTRACT_SYSTEM},
{"role": "user", "content": prompt},
]
result = _llm_json(self._client, messages)
if not isinstance(result, dict):
return {
"obligations": [],
"deadlines": [],
"scope": "",
"penalties": "",
"impact_level": "medium",
}
return result
# ------------------------------------------------------------------
# Step 2: Impact assessment
# ------------------------------------------------------------------
def assess_impact(self, event: dict, retrieval_service: Any) -> list[dict]:
"""Use RAG to find affected documents and generate recommendations."""
obligations = event.get("obligations") or []
obligation_texts = " ".join(o.get("text", "") for o in obligations[:3])
query = f"{event.get('standard_code', '')} {event.get('title', '')} {obligation_texts}"
try:
chunks = retrieval_service.retrieve(query=query, top_k=5)
except Exception as exc:
logger.warning("RAG retrieval failed: {}", exc)
return []
if not chunks:
return []
seen: set[str] = set()
doc_excerpts: list[dict] = []
for chunk in chunks:
if chunk.doc_id not in seen:
seen.add(chunk.doc_id)
doc_excerpts.append({
"doc_id": chunk.doc_id,
"doc_name": chunk.doc_title,
"score": round(float(chunk.score if chunk.score is not None else 0), 4),
"snippet": (chunk.text or "")[:300],
"clause": getattr(chunk, "section_title", "") or "",
})
context = "\n".join(
f"[{d['doc_name']} {d['clause']}] score={d['score']}: {d['snippet']}"
for d in doc_excerpts
)
prompt = f"""Regulation: {event.get('standard_code')}{event.get('title')}
Obligations: {obligation_texts or event.get('summary', '')}
Affected documents found in knowledge base:
{context}
For each document, assess impact and recommend action. Return JSON array:
[{{"doc_id":"...","doc_name":"...","score":0.0,"key_clauses":"...","recommendation":"one sentence action"}}]"""
messages = [
{"role": "system", "content": _ASSESS_SYSTEM},
{"role": "user", "content": prompt},
]
result = _llm_json(self._client, messages)
if isinstance(result, list):
score_map = {d["doc_id"]: d["score"] for d in doc_excerpts}
for item in result:
if isinstance(item, dict) and item.get("doc_id") in score_map:
item["score"] = score_map[item["doc_id"]]
return result
return doc_excerpts
# ------------------------------------------------------------------
# Step 3: Semantic diff
# ------------------------------------------------------------------
def compute_diff(self, old_text: str, new_text: str) -> dict:
"""Compare old and new regulation text; return changed sections and summary."""
old_paras = [p.strip() for p in old_text.split("\n") if p.strip()]
new_paras = [p.strip() for p in new_text.split("\n") if p.strip()]
if not old_paras or not new_paras:
return {"changed_sections": [], "change_summary": "No comparable text."}
all_paras = old_paras + new_paras
try:
all_embeddings = self._embedder.embed_texts(all_paras)
except Exception as exc:
logger.warning("Embedding for diff failed: {}", exc)
return {"changed_sections": [], "change_summary": "Diff unavailable (embedding error)."}
old_embeddings = all_embeddings[: len(old_paras)]
new_embeddings = all_embeddings[len(old_paras):]
changed_sections: list[dict] = []
max_len = max(len(old_paras), len(new_paras))
for i in range(max_len):
if i >= len(old_paras):
# New paragraph added
changed_sections.append({
"old_text": "",
"new_text": new_paras[i][:300],
"similarity": 0.0,
"change_type": "added",
"summary": "New paragraph added.",
})
continue
if i >= len(new_paras):
# Old paragraph removed
changed_sections.append({
"old_text": old_paras[i][:300],
"new_text": "",
"similarity": 0.0,
"change_type": "removed",
"summary": "Paragraph removed.",
})
continue
# Both exist — compare via embeddings
sim = _cosine(old_embeddings[i], new_embeddings[i])
if sim < _SIMILARITY_THRESHOLD:
messages = [
{"role": "system", "content": _DIFF_SYSTEM},
{"role": "user", "content": f"OLD: {old_paras[i][:500]}\nNEW: {new_paras[i][:500]}"},
]
classification = _llm_json(self._client, messages) or {}
changed_sections.append({
"old_text": old_paras[i][:300],
"new_text": new_paras[i][:300],
"similarity": round(sim, 3),
"change_type": classification.get("change_type", "modified"),
"summary": classification.get("summary", ""),
})
if not changed_sections:
change_summary = "No substantive changes detected between versions."
else:
types = [s["change_type"] for s in changed_sections]
change_summary = (
f"{len(changed_sections)} paragraph(s) changed: "
+ ", ".join(f"{t}" for t in set(types))
+ ". "
+ (changed_sections[0].get("summary", "") if changed_sections else "")
)
return {"changed_sections": changed_sections, "change_summary": change_summary}

View File

@@ -4,6 +4,8 @@ from __future__ import annotations
from typing import Any
from app.infrastructure.perception.base_event_store import BaseEventStore
MOCK_EVENTS: list[dict[str, Any]] = [
# ------------------------------------------------------------------ HIGH
{
@@ -379,18 +381,18 @@ MOCK_EVENTS: list[dict[str, Any]] = [
},
]
# Index for fast lookup
_EVENT_INDEX: dict[str, dict] = {e["id"]: e for e in MOCK_EVENTS}
class MockEventStore:
class MockEventStore(BaseEventStore):
"""In-memory mock store for regulatory events."""
def __init__(self) -> None:
self._events: list[dict] = [dict(e) for e in MOCK_EVENTS]
self._index: dict[str, dict] = {e["id"]: e for e in self._events}
def all(self) -> list[dict]:
return list(MOCK_EVENTS)
return list(self._events)
def get(self, event_id: str) -> dict | None:
return _EVENT_INDEX.get(event_id)
return self._index.get(event_id)
def filter(
self,
@@ -399,23 +401,39 @@ class MockEventStore:
impact_level: str | None = None,
limit: int = 50,
) -> list[dict]:
events = list(MOCK_EVENTS)
events = list(self._events)
if source:
events = [e for e in events if e["source"] == source]
if impact_level:
events = [e for e in events if e["impact_level"] == impact_level]
events.sort(key=lambda e: e["published_at"], reverse=True)
events.sort(key=lambda e: e.get("published_at") or "", reverse=True)
return events[:limit]
def stats(self) -> dict:
from datetime import date, timedelta
events = MOCK_EVENTS
events = self._events
cutoff = (date.today() - timedelta(days=90)).isoformat()
return {
"total": len(events),
"high_impact": sum(1 for e in events if e["impact_level"] == "high"),
"medium_impact": sum(1 for e in events if e["impact_level"] == "medium"),
"low_impact": sum(1 for e in events if e["impact_level"] == "low"),
"recent_90d": sum(1 for e in events if e["published_at"] >= cutoff),
"recent_90d": sum(1 for e in events if (e.get("published_at") or "") >= cutoff),
}
def upsert(self, event: dict) -> None:
"""Insert or update event in the in-memory list (used in tests)."""
existing = self._index.get(event["id"])
if existing:
existing.update(event)
else:
self._events.append(event)
self._index[event["id"]] = event
def get_by_standard_code(self, standard_code: str) -> dict | None:
"""Return most-recent event with matching standard_code."""
matches = [e for e in self._events if e.get("standard_code") == standard_code]
if not matches:
return None
return max(matches, key=lambda e: e.get("published_at", ""))

View File

@@ -0,0 +1,225 @@
"""PostgreSQL-backed regulatory event store."""
from __future__ import annotations
import json
from contextlib import contextmanager
from datetime import UTC, date, datetime, timedelta
from typing import Any
import psycopg2
import psycopg2.extras
from psycopg2.pool import ThreadedConnectionPool
from app.config.settings import settings
from app.infrastructure.perception.base_event_store import BaseEventStore
_CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS regulation_events (
id TEXT PRIMARY KEY,
source TEXT NOT NULL,
source_label TEXT,
standard_code TEXT NOT NULL,
title TEXT NOT NULL,
summary TEXT,
full_text_url TEXT,
status TEXT,
impact_level TEXT,
published_at DATE,
effective_at DATE,
category TEXT,
tags TEXT[],
obligations JSONB,
deadlines JSONB,
scope TEXT,
penalties TEXT,
content_hash TEXT,
previous_hash TEXT,
change_summary TEXT,
changed_sections JSONB,
affected_docs JSONB,
crawled_at TIMESTAMPTZ DEFAULT now(),
processed_at TIMESTAMPTZ,
raw_storage_key TEXT
);
CREATE INDEX IF NOT EXISTS reg_events_source_date
ON regulation_events (source, published_at DESC);
CREATE INDEX IF NOT EXISTS reg_events_impact_date
ON regulation_events (impact_level, published_at DESC);
"""
_ALL_COLUMNS = (
"id", "source", "source_label", "standard_code", "title", "summary",
"full_text_url", "status", "impact_level", "published_at", "effective_at",
"category", "tags", "obligations", "deadlines", "scope", "penalties",
"content_hash", "previous_hash", "change_summary", "changed_sections",
"affected_docs", "crawled_at", "processed_at", "raw_storage_key",
)
def _row_to_dict(row: dict[str, Any]) -> dict:
"""Convert a psycopg2 RealDictRow to a plain dict with serialized JSON fields."""
d = dict(row)
for field in ("obligations", "deadlines", "changed_sections", "affected_docs"):
val = d.get(field)
if isinstance(val, str):
d[field] = json.loads(val)
for date_field in ("published_at", "effective_at"):
val = d.get(date_field)
if isinstance(val, datetime):
d[date_field] = val.date().isoformat()
elif isinstance(val, date):
d[date_field] = val.isoformat()
for ts_field in ("crawled_at", "processed_at"):
val = d.get(ts_field)
if isinstance(val, datetime):
d[ts_field] = val.isoformat()
return d
class PostgresEventStore(BaseEventStore):
"""Regulatory event store backed by PostgreSQL."""
def __init__(self) -> None:
self._pool = ThreadedConnectionPool(
minconn=1,
maxconn=5,
host=settings.postgres_host,
port=settings.postgres_port,
user=settings.postgres_user,
password=settings.postgres_password,
dbname=settings.postgres_db,
)
self._ensure_schema()
def _ensure_schema(self) -> None:
with self._conn() as conn:
try:
with conn.cursor() as cur:
cur.execute(_CREATE_TABLE)
conn.commit()
except Exception:
conn.rollback()
raise
@contextmanager
def _conn(self):
conn = None
try:
conn = self._pool.getconn()
yield conn
finally:
if conn is not None:
self._pool.putconn(conn)
def all(self) -> list[dict]:
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"SELECT * FROM regulation_events ORDER BY published_at DESC NULLS LAST"
)
return [_row_to_dict(r) for r in cur.fetchall()]
def get(self, event_id: str) -> dict | None:
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"SELECT * FROM regulation_events WHERE id = %s", (event_id,)
)
row = cur.fetchone()
return _row_to_dict(row) if row else None
def filter(
self,
*,
source: str | None = None,
impact_level: str | None = None,
limit: int = 50,
) -> list[dict]:
conditions: list[str] = []
params: list[Any] = []
if source:
conditions.append("source = %s")
params.append(source)
if impact_level:
conditions.append("impact_level = %s")
params.append(impact_level)
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
params.append(limit)
sql = f"""
SELECT * FROM regulation_events
{where}
ORDER BY published_at DESC NULLS LAST
LIMIT %s
"""
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
return [_row_to_dict(r) for r in cur.fetchall()]
def stats(self) -> dict:
cutoff = (date.today() - timedelta(days=90)).isoformat()
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute("SELECT COUNT(*) AS count FROM regulation_events")
total = (cur.fetchone() or {}).get("count", 0)
cur.execute(
"SELECT COUNT(*) AS count FROM regulation_events WHERE impact_level = 'high'"
)
high = (cur.fetchone() or {}).get("count", 0)
cur.execute(
"SELECT COUNT(*) AS count FROM regulation_events WHERE impact_level = 'medium'"
)
medium = (cur.fetchone() or {}).get("count", 0)
cur.execute(
"SELECT COUNT(*) AS count FROM regulation_events WHERE published_at >= %s",
(cutoff,),
)
recent = (cur.fetchone() or {}).get("count", 0)
return {
"total": int(total),
"high_impact": int(high),
"medium_impact": int(medium),
"recent_90d": int(recent),
}
def upsert(self, event: dict) -> None:
"""Insert or update a regulation event."""
cols = [c for c in _ALL_COLUMNS if c in event]
placeholders = ", ".join(f"%({c})s" for c in cols)
updates = ", ".join(f"{c} = EXCLUDED.{c}" for c in cols if c != "id")
sql = f"""
INSERT INTO regulation_events ({', '.join(cols)})
VALUES ({placeholders})
ON CONFLICT (id) DO UPDATE SET {updates}
"""
row: dict[str, Any] = {}
for c in cols:
val = event.get(c)
if c in ("obligations", "deadlines", "changed_sections", "affected_docs") and val is not None:
row[c] = json.dumps(val, ensure_ascii=False)
elif c == "tags" and isinstance(val, list):
row[c] = val
else:
row[c] = val
with self._conn() as conn:
try:
with conn.cursor() as cur:
cur.execute(sql, row)
conn.commit()
except Exception:
conn.rollback()
raise
def get_by_standard_code(self, standard_code: str) -> dict | None:
with self._conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(
"""SELECT * FROM regulation_events
WHERE standard_code = %s
ORDER BY published_at DESC NULLS LAST
LIMIT 1""",
(standard_code,),
)
row = cur.fetchone()
return _row_to_dict(row) if row else None

View File

@@ -0,0 +1,169 @@
"""Redis-backed conversation store for persistent chat sessions.
Sessions are stored as JSON strings under the key `session:{session_id}`.
The Redis TTL is refreshed on every write so active sessions stay alive.
On expiry, `get_session` returns None — callers should create a new session.
"""
from __future__ import annotations
import json
import time
import uuid
from typing import Any
from loguru import logger
from app.domain.conversation import ConversationMessage, ConversationSession, ConversationStore
class RedisConversationStore(ConversationStore):
"""Store conversation sessions in Redis with automatic TTL expiry.
Each session is serialised as a JSON object at key ``session:{session_id}``.
The TTL is reset on every write so sessions stay alive as long as they are active.
"""
# Prefix for all session keys to avoid collisions with other Redis consumers.
_PREFIX = "session:"
def __init__(self, *, redis_client: Any, timeout_seconds: int = 1800) -> None:
"""Initialise the store with an existing Redis client and a TTL in seconds."""
self._redis = redis_client
self._ttl = timeout_seconds
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _key(self, session_id: str) -> str:
"""Build the Redis key for a session."""
return f"{self._PREFIX}{session_id}"
def _serialise(self, session: ConversationSession) -> str:
"""Serialise a ConversationSession to a JSON string."""
return json.dumps(
{
"session_id": session.session_id,
"created_at": session.created_at,
"updated_at": session.updated_at,
"metadata": session.metadata,
"messages": [
{
"role": msg.role,
"content": msg.content,
"timestamp": msg.timestamp,
"sources": msg.sources,
}
for msg in session.messages
],
},
ensure_ascii=False,
)
def _deserialise(self, raw: bytes | str) -> ConversationSession:
"""Deserialise a JSON string back into a ConversationSession."""
data = json.loads(raw)
messages = [
ConversationMessage(
role=m["role"],
content=m["content"],
timestamp=m["timestamp"],
sources=m.get("sources", []),
)
for m in data.get("messages", [])
]
session = ConversationSession(
session_id=data["session_id"],
created_at=data.get("created_at", 0),
updated_at=data.get("updated_at", 0),
metadata=data.get("metadata", {}),
)
session.messages = messages
return session
def _save(self, session: ConversationSession) -> None:
"""Persist a session to Redis and refresh its TTL."""
self._redis.setex(self._key(session.session_id), self._ttl, self._serialise(session))
# ------------------------------------------------------------------
# ConversationStore protocol
# ------------------------------------------------------------------
def create_session(self, metadata: dict | None = None) -> ConversationSession:
"""Create a new empty session and persist it immediately."""
now = int(time.time())
session = ConversationSession(
session_id=str(uuid.uuid4())[:8],
created_at=now,
updated_at=now,
metadata=metadata or {},
)
self._save(session)
return session
def get_session(self, session_id: str) -> ConversationSession | None:
"""Return a session by ID, or None if it does not exist or has expired."""
raw = self._redis.get(self._key(session_id))
if raw is None:
return None
try:
return self._deserialise(raw)
except Exception:
logger.warning("Failed to deserialise session: {}", session_id)
return None
def save_message(
self,
session_id: str,
*,
role: str,
content: str,
sources: list[dict] | None = None,
) -> ConversationSession | None:
"""Append a message to a session and refresh its TTL."""
session = self.get_session(session_id)
if session is None:
return None
session.messages.append(
ConversationMessage(
role=role,
content=content,
timestamp=int(time.time()),
sources=sources or [],
)
)
session.updated_at = int(time.time())
self._save(session)
return session
def delete_session(self, session_id: str) -> bool:
"""Delete a session. Returns True if it existed, False otherwise."""
deleted = self._redis.delete(self._key(session_id))
return bool(deleted)
def list_sessions(self) -> list[dict]:
"""Return summary dicts for all live sessions visible in this Redis DB.
Note: KEYS is used for simplicity; replace with SCAN for large deployments.
"""
pattern = f"{self._PREFIX}*"
keys = self._redis.keys(pattern)
result = []
for key in keys:
raw = self._redis.get(key)
if raw is None:
continue
try:
data = json.loads(raw)
result.append(
{
"session_id": data["session_id"],
"message_count": len(data.get("messages", [])),
"created_at": data.get("created_at", 0),
"updated_at": data.get("updated_at", 0),
}
)
except Exception:
continue
return result

View File

@@ -0,0 +1,5 @@
"""Celery task definitions for background processing.
This package exposes the shared Celery application instance and all
registered task functions used by API routes to enqueue work.
"""

View File

@@ -0,0 +1,45 @@
"""Shared Celery application instance for background task processing.
All workers and enqueueing call sites import `celery_app` from this module
so the broker/backend configuration stays in one place.
"""
from __future__ import annotations
from celery import Celery
from app.config.settings import settings
def _redis_url() -> str:
"""Return a Redis connection URL from application settings."""
if settings.redis_password:
return (
f"redis://:{settings.redis_password}@"
f"{settings.redis_host}:{settings.redis_port}/{settings.redis_db}"
)
return f"redis://{settings.redis_host}:{settings.redis_port}/{settings.redis_db}"
_BROKER = _redis_url()
_BACKEND = _redis_url()
celery_app = Celery(
"compliance_hub",
broker=_BROKER,
backend=_BACKEND,
include=["app.infrastructure.tasks.document_tasks"],
)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
timezone="UTC",
enable_utc=True,
# Acknowledge task only after successful execution to avoid data loss.
task_acks_late=True,
task_reject_on_worker_lost=True,
# Keep results for 1 hour for status polling.
result_expires=3600,
)

View File

@@ -0,0 +1,73 @@
"""Celery tasks for document processing.
Each task is a thin wrapper that retrieves the already-stored document
binary and delegates to DocumentCommandService._process_document.
The task does not accept raw file bytes — it reads them from the binary
store using the doc_id, so the Celery message payload stays small.
"""
from __future__ import annotations
from loguru import logger
from app.infrastructure.tasks.celery_app import celery_app
@celery_app.task(
name="app.infrastructure.tasks.document_tasks.process_document_task",
bind=True,
max_retries=3,
default_retry_delay=30,
acks_late=True,
)
def process_document_task(
self,
doc_id: str,
file_name: str,
doc_name: str,
regulation_type: str,
version: str,
generate_summary: bool,
run_id: str | None = None,
) -> dict:
"""Parse, embed, and index a document that has already been stored.
The task reads the file binary from MinIO using doc_id so the Celery
message stays small. Retries up to 3 times with a 30-second delay on
transient infrastructure errors.
"""
# Import inside the task function to avoid pickling issues and to ensure
# that each worker process initialises its own bootstrap singletons.
from app.shared.bootstrap import get_document_command_service, get_document_query_service
logger.info("process_document_task started: doc_id={}", doc_id)
try:
svc = get_document_command_service()
doc = get_document_query_service().get(doc_id)
if not doc:
raise ValueError(f"Document record not found: {doc_id}")
# Read the stored binary from MinIO — avoids passing raw bytes in the task message.
content = svc.binary_store.read(doc.object_name)
result = svc._process_document(
doc_id=doc_id,
file_name=file_name,
final_doc_name=doc_name,
content=content,
regulation_type=regulation_type,
version=version,
generate_summary=generate_summary,
run_id=run_id,
)
logger.info(
"process_document_task completed: doc_id={} status={} chunks={}",
doc_id, result.status, result.num_chunks,
)
return {"doc_id": result.doc_id, "status": result.status, "num_chunks": result.num_chunks}
except Exception as exc:
logger.exception("process_document_task failed: doc_id={}", doc_id)
# Retry on transient errors; permanent errors (bad file, parse failure)
# will exhaust retries and leave the document in FAILED state.
raise self.retry(exc=exc)

View File

@@ -0,0 +1,21 @@
"""No-op reranker stub.
Returns the original candidate list sliced to top_k.
Replace with CrossEncoderReranker when a local cross-encoder model is available.
"""
from __future__ import annotations
from app.domain.retrieval.models import RetrievedChunk
from app.domain.retrieval.ports import Reranker
class PassThroughReranker(Reranker):
"""Pass-through reranker that preserves original retrieval order.
Acts as a placeholder for future cross-encoder reranking (e.g. ms-marco-MiniLM).
Wire via bootstrap.get_compliance_reranker() when ready to swap.
"""
def rerank(self, query: str, chunks: list[RetrievedChunk], top_k: int) -> list[RetrievedChunk]:
"""Return the first top_k chunks without reordering."""
return chunks[:top_k]

View File

@@ -81,3 +81,29 @@ class AnalyzeResponse(BaseModel):
"""Define the Analyze Response API model."""
task_id: str
status: str = "processing"
class AnalyzeStreamSource(BaseModel):
"""SSE source event payload for analyze-stream."""
standard: str
clause: str
score: float
status: str
full_content: str
class AnalyzeStreamFinding(BaseModel):
"""SSE finding event payload for analyze-stream."""
title: str
desc: str
status: str
clause_ref: Optional[str] = None
class AnalyzeStreamDone(BaseModel):
"""SSE done event payload for analyze-stream."""
conclusion: str
actions: list[dict]
risk_score: int
highlight_terms: list[str]
para_text: str

View File

@@ -19,6 +19,15 @@ from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBu
from app.infrastructure.parser.local_document_parser import LocalDocumentParser
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
from app.infrastructure.perception.mock_event_store import MockEventStore
from app.application.perception.crawl_service import CrawlService
from app.infrastructure.perception.base_event_store import BaseEventStore
from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler
from app.infrastructure.perception.crawlers.guobiao_crawler import (
GuobiaoMandatoryCrawler,
GuobiaoRecommendedCrawler,
)
from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler
from app.infrastructure.perception.llm_pipeline import LlmPipeline
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
from app.infrastructure.storage.json_document_processing_store import JsonDocumentProcessingStore
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
@@ -31,6 +40,8 @@ from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatib
from app.infrastructure.vectorstore.dense_retriever import DenseRetriever
from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex
from app.services.llm.llm_factory import LLMFactory
from app.domain.compliance.ports import ComplianceRepository
from app.infrastructure.compliance.repository import PostgresComplianceRepository
# Keep shared wiring centralized so dependency construction remains consistent.
@@ -252,7 +263,31 @@ def get_document_query_service() -> DocumentQueryService:
@lru_cache
def get_conversation_store() -> InMemoryConversationStore:
"""Return conversation store."""
"""Return the active conversation store based on settings.
When session_backend='redis', sessions survive backend restarts and scale
across multiple API worker processes. When session_backend='memory' (default),
sessions are process-local and lost on restart.
"""
if settings.session_backend == "redis":
import redis as redis_lib
from app.infrastructure.session.redis_conversation_store import RedisConversationStore
# Build the Redis client from the same connection settings used by Celery.
kwargs: dict = {
"host": settings.redis_host,
"port": settings.redis_port,
"db": settings.redis_db,
"decode_responses": False,
}
if settings.redis_password:
kwargs["password"] = settings.redis_password
redis_client = redis_lib.Redis(**kwargs)
return RedisConversationStore( # type: ignore[return-value]
redis_client=redis_client,
timeout_seconds=settings.session_timeout_minutes * 60,
)
return InMemoryConversationStore(
max_sessions=settings.session_max_sessions,
timeout_minutes=settings.session_timeout_minutes,
@@ -269,11 +304,57 @@ def get_agent_conversation_service() -> AgentConversationService:
)
@lru_cache
def get_event_store() -> BaseEventStore:
"""Return event store selected by DOCUMENT_REPOSITORY_BACKEND setting."""
if settings.document_repository_backend == "postgres":
from app.infrastructure.perception.postgres_event_store import PostgresEventStore
return PostgresEventStore()
return MockEventStore()
@lru_cache
def get_compliance_repository() -> ComplianceRepository:
"""Return the compliance analysis repository.
Requires document_repository_backend=postgres and valid postgres_* settings.
Raises NotImplementedError for any other backend value.
"""
if settings.document_repository_backend != "postgres":
raise NotImplementedError(
f"ComplianceRepository requires document_repository_backend=postgres, "
f"got '{settings.document_repository_backend}'. "
"Set DOCUMENT_REPOSITORY_BACKEND=postgres in your .env file."
)
return PostgresComplianceRepository(
host=settings.postgres_host,
port=settings.postgres_port,
user=settings.postgres_user,
password=settings.postgres_password,
dbname=settings.postgres_db,
)
@lru_cache
def get_perception_service() -> PerceptionService:
"""Return perception service for regulatory intelligence."""
return PerceptionService(
event_store=MockEventStore(),
event_store=get_event_store(),
retrieval_service=get_retrieval_service(),
)
@lru_cache
def get_crawl_service() -> CrawlService:
crawlers = {
"CATARC": CatarcCrawler(),
"国标委·强制性": GuobiaoMandatoryCrawler(),
"国标委·推荐性": GuobiaoRecommendedCrawler(),
"EUR-Lex": EurlexCrawler(),
}
return CrawlService(
crawlers=crawlers,
event_store=get_event_store(),
llm_pipeline=LlmPipeline(),
retrieval_service=get_retrieval_service(),
)
@@ -284,6 +365,35 @@ def get_agent_session_service() -> AgentSessionService:
return AgentSessionService(conversation_store=get_conversation_store())
@lru_cache
def get_celery_app():
"""Return the shared Celery application instance.
Imported lazily so Celery is not required when running without workers
(e.g., tests that mock bootstrap or dev without Redis).
"""
from app.infrastructure.tasks.celery_app import celery_app
return celery_app
@lru_cache
def get_jwt_handler():
"""Return the shared JWTHandler instance for token creation and validation."""
from app.infrastructure.auth.jwt_handler import JWTHandler
return JWTHandler(
secret_key=settings.auth_secret_key,
algorithm=settings.auth_algorithm,
expire_minutes=settings.auth_token_expire_minutes,
)
@lru_cache
def get_user_store():
"""Return the PostgreSQL user store (lazy-connects on first call)."""
from app.infrastructure.auth.user_store import PostgresUserStore
return PostgresUserStore()
def preload_runtime_dependencies() -> None:
"""Warm dependencies that are safe and useful to preload during startup."""
LLMFactory.preload_clients(["qwen", "deepseek"])

View File

@@ -1,30 +1,48 @@
# ── Web framework ─────────────────────────────────────────────────────────────
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.9
# ── Config & utilities ────────────────────────────────────────────────────────
pydantic>=2.0.0
pydantic-settings>=2.0.0
python-dotenv>=1.0.0
loguru>=0.7.0
httpx>=0.25.0
beautifulsoup4>=4.12.0
lxml>=5.0.0
tiktoken>=0.5.0
tenacity>=8.2.0
# ── Auth ──────────────────────────────────────────────────────────────────────
python-jose[cryptography]>=3.3.0
# passlib is incompatible with bcrypt>=4.0 (removed __about__, strict 72-byte limit).
# Pin bcrypt to 3.x until passlib ships a fix.
passlib[bcrypt]>=1.7.4
bcrypt>=3.2.0,<4.0.0
# ── Async task queue ──────────────────────────────────────────────────────────
celery>=5.3.0
redis>=4.5.0
# ── Storage & databases ───────────────────────────────────────────────────────
pymilvus>=2.4.0
minio>=7.1.0
psycopg2-binary>=2.9.0
# ── Document parsing ─────────────────────────────────────────────────────────
pymupdf>=1.24.0
python-docx>=1.1.0
numpy>=1.24.0
alibabacloud-docmind-api20220711>=1.0.6
alibabacloud-tea-openapi>=0.3.11
alibabacloud-tea-util>=0.3.13
# ── RAG / LangChain ───────────────────────────────────────────────────────────
langchain>=0.1.0
langchain-milvus>=0.1.0
numpy>=1.24.0
# ── Testing ───────────────────────────────────────────────────────────────────
pytest>=7.4.0
pytest-asyncio>=0.21.0
fakeredis>=2.0.0

View File

View File

@@ -0,0 +1,140 @@
import asyncio
import pytest
from unittest.mock import MagicMock, patch
from datetime import datetime
from app.infrastructure.vectorstore.pass_through_reranker import PassThroughReranker
from app.domain.retrieval.models import RetrievedChunk
from app.domain.compliance.ports import AnalysisRecord, FindingRecord
# ── helpers ──────────────────────────────────────────────────────────────────
def _make_chunk(score: float) -> RetrievedChunk:
return RetrievedChunk(
chunk_id="c1",
doc_id="d1",
doc_title="Test Doc",
section_title="S1",
text="some text",
score=score,
page_start=1,
)
def _make_mock_client(content: str = '{"status":"ok","title":"T","desc":"D","clause_ref":"A1"}'):
client = MagicMock()
response = MagicMock()
response.is_success = True
response.content = content
client.chat.return_value = response
return client
def _make_mock_retrieval():
svc = MagicMock()
svc.retrieve.return_value = []
return svc
# ── existing tests ────────────────────────────────────────────────────────────
def test_pass_through_returns_top_k():
reranker = PassThroughReranker()
chunks = [_make_chunk(0.9), _make_chunk(0.8), _make_chunk(0.7)]
result = reranker.rerank(query="test", chunks=chunks, top_k=2)
assert len(result) == 2
assert result[0].score == 0.9
def test_pass_through_returns_all_when_top_k_exceeds():
reranker = PassThroughReranker()
chunks = [_make_chunk(0.5)]
result = reranker.rerank(query="test", chunks=chunks, top_k=10)
assert len(result) == 1
# ── new tests ─────────────────────────────────────────────────────────────────
def test_process_single_clause_returns_finding():
from app.application.compliance.pipeline import process_single_clause
client = _make_mock_client()
svc = _make_mock_retrieval()
result = process_single_clause("test clause", 0, svc, client)
assert result["finding"] is not None
assert result["index"] == 0
assert result["chunks"] == []
def test_run_clauses_parallel_runs_all():
from app.application.compliance.pipeline import run_clauses_parallel
client = _make_mock_client()
svc = _make_mock_retrieval()
clauses = ["clause one", "clause two", "clause three"]
results = asyncio.run(run_clauses_parallel(clauses, svc, client))
assert len(results) == 3
assert all(r["index"] == i for i, r in enumerate(results))
def test_run_clauses_parallel_handles_clause_failure():
from app.application.compliance.pipeline import run_clauses_parallel
svc = _make_mock_retrieval()
bad_client = MagicMock()
bad_client.chat.side_effect = RuntimeError("LLM exploded")
results = asyncio.run(run_clauses_parallel(
["clause one", "clause two"], svc, bad_client
))
assert len(results) == 2
assert all(r["finding"] is None for r in results)
assert all(r["chunks"] == [] for r in results)
# ── helpers for new tests ─────────────────────────────────────────────────────
def _sample_analysis() -> AnalysisRecord:
return AnalysisRecord(
id="a1", created_at=datetime(2026, 6, 8), created_by="u",
doc_name="doc.pdf", standard_name="EU AI Act",
risk_score=72, conclusion="Gaps found.", actions=[], para_text="para",
highlight_terms=[], findings=[],
)
def _sample_finding(status: str = "risk") -> FindingRecord:
return FindingRecord(
id="f1", analysis_id="a1", seq=0,
title="Missing CSMS", description="No CSMS certification.",
status=status, clause_ref="Art.9.1",
)
# ── new tests ─────────────────────────────────────────────────────────────────
def test_build_finding_context_contains_required_fields():
from app.application.compliance.pipeline import build_finding_context
ctx = build_finding_context(_sample_finding(), _sample_analysis())
assert "doc.pdf" in ctx
assert "EU AI Act" in ctx
assert "Missing CSMS" in ctx
assert "Art.9.1" in ctx
def test_generate_suggestions_returns_three_questions():
from app.application.compliance.pipeline import generate_suggestions
client = _make_mock_client(
'{"questions": ["Q1?", "Q2?", "Q3?"]}'
)
questions = generate_suggestions(_sample_finding("risk"), _sample_analysis(), client)
assert len(questions) == 3
assert all(isinstance(q, str) for q in questions)
def test_generate_suggestions_falls_back_on_error():
from app.application.compliance.pipeline import generate_suggestions
bad_client = MagicMock()
bad_resp = MagicMock()
bad_resp.is_success = False
bad_client.chat.return_value = bad_resp
questions = generate_suggestions(_sample_finding(), _sample_analysis(), bad_client)
assert len(questions) == 3 # fallback always returns 3

View File

@@ -0,0 +1,98 @@
from unittest.mock import MagicMock, patch
from datetime import datetime
from app.domain.compliance.ports import (
AnalysisRecord,
FindingRecord,
ComplianceRepository,
)
def _mock_pool():
"""Return a mock psycopg2 ThreadedConnectionPool."""
conn = MagicMock()
cursor = MagicMock()
cursor.__enter__ = MagicMock(return_value=cursor)
cursor.__exit__ = MagicMock(return_value=False)
conn.cursor.return_value = cursor
pool = MagicMock()
pool.getconn.return_value = conn
return pool, conn, cursor
@patch("app.infrastructure.compliance.repository.psycopg2.pool.ThreadedConnectionPool")
def test_save_analysis_returns_uuid(mock_pool_cls):
from app.infrastructure.compliance.repository import PostgresComplianceRepository
pool, conn, cursor = _mock_pool()
mock_pool_cls.return_value = pool
cursor.fetchone.return_value = {"id": "abc-123"}
repo = PostgresComplianceRepository(
host="localhost", port=5432, user="u", password="p", dbname="db"
)
record = AnalysisRecord(
id="", created_at=datetime.utcnow(), created_by="user1",
doc_name="doc.pdf", standard_name="EU AI Act",
risk_score=50, conclusion="OK", actions=[], para_text="p",
highlight_terms=[], findings=[],
)
result = repo.save_analysis(record)
assert result == "abc-123"
def test_analysis_record_construction():
record = AnalysisRecord(
id="",
created_at=datetime.utcnow(),
created_by="user1",
doc_name="test.pdf",
standard_name="EU AI Act",
risk_score=72,
conclusion="Several gaps found.",
actions=[{"label": "Fix", "value": "Update docs"}],
para_text="The system shall...",
highlight_terms=["CSMS", "ISO 21434"],
findings=[
FindingRecord(
id="",
analysis_id="",
seq=0,
title="Missing CSMS",
description="No CSMS certification found.",
status="risk",
clause_ref="Art.9.1",
)
],
)
assert record.doc_name == "test.pdf"
assert len(record.findings) == 1
assert record.findings[0].status == "risk"
def test_compliance_repository_is_abstract():
import inspect
assert inspect.isabstract(ComplianceRepository)
def test_generate_docx_returns_bytes():
from app.infrastructure.compliance.docx_export import generate_docx
record = AnalysisRecord(
id="test-id", created_at=datetime(2026, 6, 8), created_by="user1",
doc_name="test.pdf", standard_name="EU AI Act",
risk_score=72, conclusion="Several gaps found.",
actions=[{"label": "Fix", "value": "Update CSMS docs"}],
para_text="The system shall implement CSMS.",
highlight_terms=["CSMS"],
findings=[
FindingRecord(
id="f1", analysis_id="test-id", seq=0,
title="Missing CSMS", description="No CSMS cert.",
status="risk", clause_ref="Art.9.1",
)
],
)
data = generate_docx(record)
assert isinstance(data, bytes)
assert len(data) > 1000 # DOCX is at minimum a ZIP with ~1 KB overhead
# Verify it's a valid ZIP (DOCX = ZIP container)
import zipfile, io
assert zipfile.is_zipfile(io.BytesIO(data))

View File

View File

@@ -0,0 +1,95 @@
"""Contract tests: any BaseEventStore implementation must pass these."""
from app.infrastructure.perception.base_event_store import BaseEventStore
from app.infrastructure.perception.mock_event_store import MockEventStore
def _store() -> BaseEventStore:
return MockEventStore()
def test_is_base_event_store():
assert isinstance(_store(), BaseEventStore)
def test_all_returns_list():
result = _store().all()
assert isinstance(result, list)
assert len(result) > 0
def test_get_known_id():
store = _store()
first = store.all()[0]
result = store.get(first["id"])
assert result is not None
assert result["id"] == first["id"]
def test_get_unknown_returns_none():
assert _store().get("does-not-exist") is None
def test_filter_by_impact():
store = _store()
highs = store.filter(impact_level="high", limit=100)
assert all(e["impact_level"] == "high" for e in highs)
def test_filter_limit():
store = _store()
result = store.filter(limit=3)
assert len(result) <= 3
def test_stats_keys():
stats = _store().stats()
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
assert key in stats, f"missing key: {key}"
def test_upsert_and_get():
store = _store()
event = {
"id": "test-upsert-001",
"source": "TEST",
"source_label": "Test Source",
"standard_code": "TST-001",
"title": "Test Event",
"summary": "A test event",
"full_text_url": "https://example.com",
"status": "draft",
"impact_level": "low",
"published_at": "2026-01-01",
"effective_at": None,
"category": "test",
"tags": ["test"],
"content_hash": "abc123",
"previous_hash": None,
}
store.upsert(event)
result = store.get("test-upsert-001")
assert result is not None
assert result["title"] == "Test Event"
def test_get_by_standard_code():
store = _store()
first = store.all()[0]
result = store.get_by_standard_code(first["standard_code"])
assert result is not None
assert result["standard_code"] == first["standard_code"]
def test_upsert_updates_existing():
store = _store()
first = store.all()[0]
original_id = first["id"]
store.upsert({"id": original_id, "title": "Updated Title", "impact_level": first["impact_level"],
"standard_code": first.get("standard_code", ""), "source": first["source"],
"source_label": first.get("source_label", ""), "summary": "Updated",
"full_text_url": "", "status": first["status"], "published_at": first.get("published_at", ""),
"effective_at": None, "category": first.get("category", ""), "tags": [],
"content_hash": "newhash", "previous_hash": None})
result = store.get(original_id)
assert result is not None
assert result["title"] == "Updated Title"

View File

@@ -0,0 +1,111 @@
"""Integration tests for CrawlService."""
from __future__ import annotations
from unittest.mock import MagicMock
import hashlib
import pytest
from app.infrastructure.perception.crawlers.base import RawEvent
from app.infrastructure.perception.mock_event_store import MockEventStore
def _make_raw_event(code="TST-001"):
return RawEvent(
source="TEST", source_label="Test", standard_code=code,
title=f"Test {code}", summary="Summary", full_text_url="https://example.com",
status="enacted", published_at="2026-01-01", effective_at=None,
category="test", tags=["test"], raw_text="full text",
)
def _make_service(raw_events):
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = raw_events
mock_pipeline = MagicMock()
mock_pipeline.extract_structure.return_value = {
"obligations": [], "deadlines": [], "scope": "test",
"penalties": None, "impact_level": "low",
}
mock_pipeline.assess_impact.return_value = []
mock_pipeline.compute_diff.return_value = {
"changed_sections": [], "change_summary": "No changes.",
}
mock_retrieval = MagicMock()
store = MockEventStore()
return CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=mock_retrieval,
)
def test_crawl_yields_progress_and_done():
svc = _make_service([_make_raw_event("TST-001")])
events = list(svc.run_crawl())
event_types = [e.get("event") for e in events]
assert "done" in event_types
def test_crawl_upserts_to_store():
store = MockEventStore()
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = [_make_raw_event("NEW-001")]
mock_pipeline = MagicMock()
mock_pipeline.extract_structure.return_value = {
"obligations": [], "deadlines": [], "scope": "",
"penalties": None, "impact_level": "medium",
}
mock_pipeline.assess_impact.return_value = []
mock_pipeline.compute_diff.return_value = {
"changed_sections": [], "change_summary": "",
}
svc = CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=MagicMock(),
)
list(svc.run_crawl())
result = store.get_by_standard_code("NEW-001")
assert result is not None
assert result["title"] == "Test NEW-001"
def test_crawl_skips_unchanged_events():
store = MockEventStore()
raw = _make_raw_event("SKIP-001")
content_hash = hashlib.sha256(raw.raw_text.encode()).hexdigest()
store.upsert({
"id": hashlib.sha256(f"TEST-SKIP-001".encode()).hexdigest()[:12],
"standard_code": "SKIP-001",
"source": "TEST",
"source_label": "Test",
"title": "Test SKIP-001",
"summary": "",
"full_text_url": "",
"status": "enacted",
"impact_level": "low",
"published_at": "2026-01-01",
"effective_at": None,
"category": "test",
"tags": [],
"content_hash": content_hash,
})
mock_pipeline = MagicMock()
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = [raw]
svc = CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=MagicMock(),
)
list(svc.run_crawl())
mock_pipeline.extract_structure.assert_not_called()

View File

@@ -0,0 +1,127 @@
"""Unit tests for crawlers — mock httpx responses."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from app.infrastructure.perception.crawlers.base import RawEvent, BaseCrawler
def test_raw_event_fields():
ev = RawEvent(
source="TEST",
source_label="Test",
standard_code="TST-001",
title="Test",
summary="Summary",
full_text_url="https://example.com",
status="enacted",
published_at="2026-01-01",
effective_at=None,
category="test",
tags=["a"],
raw_text="full text here",
)
assert ev.source == "TEST"
assert ev.tags == ["a"]
CATARC_HTML = """
<html><body>
<table>
<tr>
<td><a href="/std/detail/123">GB 18384-2025</a></td>
<td>电动汽车安全要求</td>
<td>2025-11-15</td>
<td>现行</td>
</tr>
<tr>
<td><a href="/std/detail/456">GB/T 40429-2026</a></td>
<td>汽车驾驶自动化分级</td>
<td>2026-02-01</td>
<td>即将实施</td>
</tr>
</table>
</body></html>
"""
def test_catarc_crawler_parses_html():
from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = CATARC_HTML
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = CatarcCrawler()
events = crawler.fetch(limit=10)
assert isinstance(events, list)
assert len(events) >= 1
assert all(isinstance(e, RawEvent) for e in events)
codes = [e.standard_code for e in events]
assert "GB 18384-2025" in codes
GUOBIAO_JSON = {
"rows": [
{
"std_code": "GB 18384-2025",
"std_name": "电动汽车安全要求",
"release_date": "2025-11-15",
"implement_date": "2026-07-01",
"std_status": "现行",
"std_type": "强制性",
},
]
}
def test_guobiao_crawler_parses_json():
from app.infrastructure.perception.crawlers.guobiao_crawler import GuobiaoMandatoryCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = GUOBIAO_JSON
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = GuobiaoMandatoryCrawler()
events = crawler.fetch(limit=10)
assert len(events) >= 1
assert events[0].source == "国标委"
assert events[0].standard_code == "GB 18384-2025"
EURLEX_RSS = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>EUR-Lex</title>
<item>
<title>Regulation (EU) 2024/1689 — AI Act</title>
<link>https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32024R1689</link>
<description>The EU Artificial Intelligence Act enters into force.</description>
<pubDate>Fri, 12 Jul 2024 00:00:00 GMT</pubDate>
</item>
</channel>
</rss>"""
def test_eurlex_crawler_parses_rss():
from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = EURLEX_RSS
mock_resp.content = EURLEX_RSS
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = EurlexCrawler()
events = crawler.fetch(limit=5)
assert isinstance(events, list)
assert len(events) >= 1
assert events[0].source == "EUR-Lex"

View File

@@ -0,0 +1,77 @@
"""Unit tests for LlmPipeline — mock LLM client and embedding provider."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import json
import pytest
def _make_pipeline():
with patch("app.infrastructure.perception.llm_pipeline.get_llm_client") as mock_llm_fn, \
patch("app.infrastructure.perception.llm_pipeline.OpenAICompatibleEmbeddingProvider") as mock_emb_cls:
mock_client = MagicMock()
mock_client.chat.return_value = MagicMock(content='{"obligations":[{"text":"test obligation","deontic":"must","subject":"OEM","object":"system","condition":""}],"deadlines":[{"date":"2026-07-01","description":"实施截止"}],"scope":"适用于M1类车辆","penalties":"罚款","impact_level":"high"}')
mock_llm_fn.return_value = mock_client
mock_emb = MagicMock()
mock_emb.embed_texts.return_value = [[0.1] * 1024, [0.9] * 1024]
mock_emb_cls.return_value = mock_emb
from app.infrastructure.perception.llm_pipeline import LlmPipeline
return LlmPipeline(), mock_client, mock_emb
def test_extract_structure_returns_dict():
pipeline, mock_client, _ = _make_pipeline()
event = {
"id": "evt-001",
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"summary": "新增 IP67 级别防护",
"source_label": "CATARC",
"tags": ["电池安全"],
}
result = pipeline.extract_structure(event)
assert isinstance(result, dict)
assert "obligations" in result
assert "impact_level" in result
def test_assess_impact_returns_list():
pipeline, mock_client, _ = _make_pipeline()
mock_client.chat.return_value = MagicMock(content='[{"doc_id":"d1","doc_name":"Safety Manual","score":0.85,"key_clauses":"§4.2","recommendation":"更新第4章"}]')
mock_retrieval = MagicMock()
chunk = MagicMock()
chunk.doc_id = "d1"
chunk.doc_title = "Safety Manual"
chunk.score = 0.85
chunk.text = "relevant text"
chunk.section_title = "§4.2"
mock_retrieval.retrieve.return_value = [chunk]
event = {
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"obligations": [{"text": "OEM shall comply"}],
}
result = pipeline.assess_impact(event, mock_retrieval)
assert isinstance(result, list)
def test_compute_diff_no_change():
pipeline, _, mock_emb = _make_pipeline()
mock_emb.embed_texts.return_value = [[0.5] * 1024, [0.5] * 1024]
result = pipeline.compute_diff("paragraph one", "paragraph one")
assert isinstance(result, dict)
assert "changed_sections" in result
assert "change_summary" in result
def test_compute_diff_detects_change():
pipeline, mock_client, mock_emb = _make_pipeline()
mock_emb.embed_texts.return_value = [
[1.0] + [0.0] * 1023,
[0.0] + [1.0] + [0.0] * 1022,
]
mock_client.chat.return_value = MagicMock(content='{"change_type":"tightened","summary":"Requirement tightened"}')
result = pipeline.compute_diff("old paragraph text", "new tighter requirement text")
assert isinstance(result["changed_sections"], list)

View File

@@ -0,0 +1,98 @@
"""Unit tests for PostgresEventStore using a mocked psycopg2 pool."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
# Patch psycopg2 before importing the module under test
import sys
mock_psycopg2 = MagicMock()
mock_psycopg2.extras = MagicMock()
sys.modules.setdefault("psycopg2", mock_psycopg2)
sys.modules.setdefault("psycopg2.extras", mock_psycopg2.extras)
sys.modules.setdefault("psycopg2.pool", MagicMock())
from app.infrastructure.perception.base_event_store import BaseEventStore
SAMPLE_ROW = {
"id": "pg-001",
"source": "国标委",
"source_label": "国家标准化管理委员会",
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"summary": "新增要求",
"full_text_url": "https://openstd.samr.gov.cn",
"status": "enacted",
"impact_level": "high",
"published_at": "2025-11-15",
"effective_at": "2026-07-01",
"category": "电动汽车安全",
"tags": ["电池安全"],
"obligations": None,
"deadlines": None,
"scope": None,
"penalties": None,
"content_hash": "abc123",
"previous_hash": None,
"change_summary": None,
"changed_sections": None,
"affected_docs": None,
"crawled_at": "2026-06-05T10:00:00+00:00",
"processed_at": None,
"raw_storage_key": None,
}
def _make_store_with_pool(mock_pool):
with patch("psycopg2.pool.ThreadedConnectionPool", return_value=mock_pool):
with patch(
"app.infrastructure.perception.postgres_event_store.PostgresEventStore._ensure_schema"
):
from app.infrastructure.perception.postgres_event_store import PostgresEventStore
return PostgresEventStore()
def _cursor_returning(rows):
cursor = MagicMock()
cursor.__enter__ = lambda s: s
cursor.__exit__ = MagicMock(return_value=False)
cursor.fetchall.return_value = rows
cursor.fetchone.return_value = rows[0] if rows else None
return cursor
def test_is_base_event_store():
mock_pool = MagicMock()
store = _make_store_with_pool(mock_pool)
assert isinstance(store, BaseEventStore)
def test_filter_returns_list():
mock_pool = MagicMock()
conn = MagicMock()
conn.__enter__ = lambda s: s
conn.__exit__ = MagicMock(return_value=False)
cursor = _cursor_returning([SAMPLE_ROW])
conn.cursor.return_value = cursor
mock_pool.getconn.return_value = conn
store = _make_store_with_pool(mock_pool)
result = store.filter(limit=10)
assert isinstance(result, list)
def test_stats_returns_correct_keys():
mock_pool = MagicMock()
conn = MagicMock()
conn.__enter__ = lambda s: s
conn.__exit__ = MagicMock(return_value=False)
cursor = MagicMock()
cursor.__enter__ = lambda s: s
cursor.__exit__ = MagicMock(return_value=False)
cursor.fetchone.return_value = {"count": 5}
conn.cursor.return_value = cursor
mock_pool.getconn.return_value = conn
store = _make_store_with_pool(mock_pool)
stats = store.stats()
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
assert key in stats

36
dev.sh
View File

@@ -549,7 +549,7 @@ AI+合规智能中枢统一脚本
用法:
./dev.sh help
./dev.sh setup
./dev.sh start [all|api|frontend] [--foreground] [--mode dev|static]
./dev.sh start [all|api|frontend|worker|beat] [--foreground] [--mode dev|static]
./dev.sh stop [all|api|frontend]
./dev.sh restart [all|api|frontend] [--mode dev|static]
./dev.sh status
@@ -563,6 +563,9 @@ AI+合规智能中枢统一脚本
进行一次性的本地初始化。
包含 Python 版本检查、.venv 虚拟环境创建、后端依赖安装、前端 npm install、
以及 6.86.80.8 基础服务端口连通性检查。
初始化完成后,首次运行前还需执行:
PYTHONPATH=backend .venv/bin/python scripts/seed_users.py
以创建 admin/legal/ehs/readonly 四个演示用户。
start
启动服务。默认行为等同于 ./dev.sh start all。
@@ -570,6 +573,8 @@ AI+合规智能中枢统一脚本
all 同时启动 API 和前端。
api 只启动后端 API。
frontend 只启动前端。
worker 启动 Celery 文档处理 worker前台运行需要 Redis
beat 启动 Celery Beat 定时调度器(前台运行,需要 Redis
可选参数:
--foreground 仅对 start api 生效,前台运行并开启 --reload便于调试。
--mode dev 前端使用 Vite 开发服务器,默认端口 5173。
@@ -578,6 +583,7 @@ AI+合规智能中枢统一脚本
stop
停止服务。默认行为等同于 ./dev.sh stop all。
会优先读取 logs/*.pidPID 文件失效时会回退到端口探测。
注意: worker 和 beat 为前台进程,直接 Ctrl+C 停止。
restart
先停止再启动,支持 all/api/frontend。
@@ -601,8 +607,11 @@ AI+合规智能中枢统一脚本
常用示例:
./dev.sh setup
PYTHONPATH=backend .venv/bin/python scripts/seed_users.py
./dev.sh start
./dev.sh start api --foreground
./dev.sh start worker
./dev.sh start beat
./dev.sh start frontend --mode static
./dev.sh restart frontend --mode dev
./dev.sh status
@@ -615,7 +624,7 @@ parse_target() {
local default_target="$1"
local candidate="${2:-}"
case "$candidate" in
all|api|frontend)
all|api|frontend|worker|beat)
echo "$candidate"
;;
*)
@@ -646,6 +655,27 @@ main() {
shift || true
fi
# worker and beat are pass-through — forward remaining args to celery directly.
case "$target" in
worker)
print_header "AI+合规智能中枢 - 启动 Celery Worker"
require_venv
export PYTHONPATH="backend${PYTHONPATH:+:$PYTHONPATH}"
"$VENV_PYTHON" -m celery -A app.infrastructure.tasks.celery_app worker \
--loglevel=info \
--concurrency=2 \
--queues=celery \
"$@"
;;
beat)
print_header "AI+合规智能中枢 - 启动 Celery Beat"
require_venv
export PYTHONPATH="backend${PYTHONPATH:+:$PYTHONPATH}"
"$VENV_PYTHON" -m celery -A app.infrastructure.tasks.celery_app beat \
--loglevel=info \
"$@"
;;
*)
while [ $# -gt 0 ]; do
case "$1" in
--foreground)
@@ -684,6 +714,8 @@ main() {
;;
esac
;;
esac
;;
stop)
target="$(parse_target all "${1:-}")"
print_header "AI+合规智能中枢 - 停止服务"

View File

@@ -58,7 +58,8 @@ services:
retries: 5
restart: unless-stopped
# PostgreSQL数据库 (可选,启用 DOCUMENT_REPOSITORY_BACKEND=postgres 时使用)
# PostgreSQL数据库 (启用 DOCUMENT_REPOSITORY_BACKEND=postgres 时使用
# 合规分析历史记录 Direction B、DOCX 报告下载及 Finding Chat 持久化 Direction C 均依赖此服务)
postgres:
image: postgres:15-alpine
container_name: postgres

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,289 @@
# AI+合规智能中枢 — 下一步开发与优化路线图(设计文档)
- 日期2026-06-05
- 定位:试点 MVP 走向生产
- 范围:全景清单 + 异步任务化(设计①)+ 法规感知闭环(设计②)深入方案 + 三阶段实施路线图
- 作者AI Regulations Teambrainstorming 产出)
---
## 0. 背景与目的
本文档基于对当前仓库前后端真实代码的逐文件探查,结合四份愿景文档(`AI_Regulations_Report.pptx``AI_Regulations_Architecture.docx``01_Architecture.html``02_Architecture_Detail.html`)与最新开源 AI 技术调研,给出**下一步可继续开发与优化的方向清单**,并对两个最高价值方向给出可落地的深入设计。
本文档是**方向性设计spec**不是实施计划plan。阶段一、阶段二的具体落地由后续 writing-plans 环节拆分为分步计划。
### 0.1 现状一句话
后端是一套结构清晰的 DDD 风格 FastAPI RAG 系统(上传 → 解析 → 分块 → BGE-M3 嵌入 → Milvus → 混合检索 → 流式问答 + 合规分析),**真实可用**。但愿景文档中的多个旗舰能力知识图谱、法规感知闭环、RBAC、EHS、异步化目前为 **mock 或缺失**
---
## 1. 现状盘点(基于真实代码)
### 1.1 已实现且真实可用
- **文档处理主链路**`application/documents/services.py::DocumentCommandService.upload_and_process` — 存储 → 解析(阿里云 DocMind / 本地)→ 分块 → BGE-M3 嵌入 → Milvus 入库,含 `DocumentProcessingStore` 全程状态事件记录。
- **混合检索**`application/knowledge/services.py::KnowledgeRetrievalService` — Dense`DenseRetriever`+ BM25jieba+ Reciprocal Rank Fusion + 可选 Cross-Encoder 重排。
- **流式 RAG 问答**`application/agent/services.py::AgentConversationService.stream_chat` + `api/routes/rag.py` — 真实检索 + 引文 + 会话历史 + SSE。
- **合规分析管线**`application/compliance/pipeline.py` — clause_split → retrieve → gap_check → conclusion真实 LLM + 真实检索SSE 流式(`api/routes/compliance.py::analyze_stream`)。
- **状态/健康面板**`api/routes/status.py` + 前端 `StatusPage.tsx` — Milvus/MinIO/BM25/Reranker/会话实时状态。
- **存储后端**PostgreSQL / MinIO 适配器齐全JSON 与 Postgres 双后端可切换。
- **前端**React 19 + Vite + Tailwind6 个页面Overview/Status/Perception/Docs/Compliance/RagChat
### 1.2 愿景已规划但代码缺失或为 mock
| 能力 | 愿景出处 | 代码现状 |
|------|---------|---------|
| 知识图谱 / Neo4j 多跳推理 | 架构图 L4/L5、Slide 5 | 全代码 0 处 neo4j/graph |
| 法规感知自动更新闭环 | 01_Architecture.html L157-193、Slide 11 | `PerceptionService``MockEventStore`20 条死数据) |
| 认证 / RBAC / 审计日志 | Slide 12 四角色权限矩阵 | 全代码 0 处 auth/jwt/rbac`main.py` CORS=`*` |
| 异步任务 / Worker 集群 | 架构图"Worker 集群"、Slide 9 | `app/workers/` 空目录;处理全同步 |
| EHS 隐患识别SIF/四维根因) | Slide 7 | 未实现 |
| 多渠道推送Email/Teams/飞书) | Slide 8 | 未实现 |
| 闭环整改跟踪、可观测性 | 架构图右栏 | 缺失 |
### 1.3 关键发现
- **`requirements.txt:28` 已有 `celery>=5.3.0` + `redis>=4.5.0`**`docker-compose.yml` 已配 Redis 7`settings.py` 已有 redis 配置 —— **异步化是"接线",不是"从零搭建"**
- **`DocumentProcessingStore` 已能记录 run 状态/状态事件** —— 是天然的任务进度表。
- **`PerceptionService.analyze_event` 的 LLM 影响分析与 RAG 关联检索是真的** —— 感知闭环缺的只是前半段(采集 → Diff → 入库)。
- 后端正处于 legacy 迁移期:`services/*``workflows/*` 为兼容层(见 `docs/architecture/backend-project-architecture.md`)。
---
## 2. 全景机会清单
类型标记:`[新能力]`=愿景缺口补齐,`[加固]`=已实现能力优化。价值 ★1-5工作量 S/M/L。
### P0 — 生产地基(阻断"走向生产"的硬伤)
| # | 机会点 | 类型 | 现状证据 | 价值 | 工作量 |
|---|--------|------|---------|------|--------|
| 1 | 异步任务化Celery + 已配 Redis解析/嵌入/感知/推送下沉 worker | 加固 | `workers/` 空;`documents.py:34` 上传同步阻塞 | ★★★★★ | L |
| 2 | 认证 + RBAC + 审计日志,收紧 CORS | 新能力 | 0 处 auth`main.py` CORS=`*`Slide 12 | ★★★★★ | M |
| 3 | 会话 & 任务持久化(内存 → Redis/PG | 加固 | `bootstrap.py:254` 内存会话;`compliance.py:25` 内存字典 | ★★★★ | M |
| 4 | 基础可观测性Prometheus + 结构化日志 + 追踪) | 加固 | 仅 loguru架构图右栏全缺 | ★★★ | M |
### P1 — 高价值能力补齐 + RAG 质量
| # | 机会点 | 类型 | 现状证据 | 价值 | 工作量 |
|---|--------|------|---------|------|--------|
| 5 | 启用并升级 Reranker`bge-reranker-v2.5-gemma2-lightweight` | 加固 | `settings.py:113` 默认关;管线已写好 | ★★★★ | S |
| 6 | Agentic 检索(查询改写/意图理解/多路召回) | 加固 | `agent/services.py` 直接 retrieve无 rewrite/HyDE | ★★★★ | M |
| 7 | 知识图谱 / GraphRAGNeo4j + LightRAG v1.5 | 新能力 | 0 处 neo4jLightRAG v1.5 原生支持 | ★★★★★ | L |
| 8 | 法规感知自动更新闭环(真实采集 + 版本 Diff + 增量重索引) | 新能力 | `perception/services.py` 用 MockEventStore | ★★★★★ | L |
| 9 | 引文置信度评分Slide 5 承诺"置信度评分+页码溯源" | 加固 | `rag.py` sources 无 confidence | ★★★ | S |
| 10 | 检索评估 harnessrecall@k / faithfulness | 加固 | `tests/` 需真实服务,无离线 RAG 评估 | ★★★ | M |
### P2 — 视野扩展(独立子项目)
| # | 机会点 | 类型 | 价值 | 工作量 |
|---|--------|------|------|--------|
| 11 | EHS 隐患识别SIF 评分 + 四维根因 + ISO 45001 扫描Slide 7 | 新能力 | ★★★★ | L |
| 12 | 多渠道推送 + 订阅规则引擎Email/Teams/飞书Slide 8 | 新能力 | ★★★ | M |
| 13 | 闭环整改跟踪(任务派发 → 进度 → 验收归档) | 新能力 | ★★★ | M |
| 14 | 企业系统集成PLM/ERP/OA/MES Webhook | 新能力 | ★★ | L |
| 15 | MinerU 3.1 升级(已转 Apache 协议VLM 解析)作本地兜底 | 加固 | ★★ | S |
| 16 | 前端加固(清 mock 数据、补 error/loading 态、KG 可视化、登录态) | 加固 | ★★★ | M |
| 17 | 收口 legacy 迁移(`services/*``workflows/*` 按架构文档归位) | 加固 | ★★ | M |
---
## 3. 深入设计 ① — 异步任务化
### 3.1 问题
`upload_document``api/routes/documents.py:34`)在单个 HTTP 请求内同步跑完 存储 → 解析(阿里云云端可达 900 秒,`settings.py:49`)→ 嵌入 → Milvus 入库。大体量 GB 标准必然超时;`compliance.py``/analyze` 为假异步(立即返回 mockperception 爬取闭环无执行载体。PPT Slide 9 已将"大文件性能"列为关键挑战,对策正是"流式处理 + 异步队列 + 实时进度"。
### 3.2 关键前提:基建已就位
- `requirements.txt:28` 已含 `celery>=5.3.0` + `redis>=4.5.0`
- `docker-compose.yml:46` Redis 7 已配置;`settings.py:64` 已有 redis 连接配置
- `PostgresDocumentProcessingStore` 已记录 run 状态/状态事件 —— 天然任务进度表
- `app/workers/` 为空目录(唯一缺口)
### 3.3 架构(遵循 AGENTS.md 的 `api → application → domain ports → infrastructure`
```
api/routes/documents.py POST /upload
│ 1. 存二进制 + 建 Document 记录(快,同步)
│ 2. enqueue task → 立即返回 {doc_id, status:"queued", run_id}
infrastructure/tasks/ ← 新增
celery_app.py broker=redis, backend=redis
document_tasks.py @task process_document(doc_id) → DocumentCommandService
│ 复用现有 upload_and_process 的 parse→embed→index 段
application/documents/services.py拆分store 与 process 解耦)
│ 每阶段写 DocumentProcessingStore已存在→ 进度可查
api/routes/documents.py GET /status/{doc_id} ← 已存在,读 run 状态即可
```
### 3.4 落地步骤(增量、不破坏现有同步路径)
1. 新增 `infrastructure/tasks/celery_app.py` — Celery 实例broker/backend 指向已配 Redis。
2. 拆分 `upload_and_process``store_document`(同步快)+ `process_document`(可异步),复用现有逻辑,零重写解析/嵌入代码。
3. 新增 `document_tasks.py``@celery_app.task` 包裹 `process_document`,失败用 `tenacity`(已在 deps重试 + 死信。
4.`documents.py` 上传 — 默认入队(保留 `?sync=true` 同步回退便于演示);`GET /status/{doc_id}``DocumentProcessingStore` 返回阶段进度。
5. 前端 `DocsPage.tsx` — 上传后轮询/SSE 进度条(架构图 Worker"心跳/状态上报"已是既定设计)。
6. `dev.sh`/`dev.bat` 加 worker 启动:`celery -A app.infrastructure.tasks.celery_app worker`
### 3.5 工作量与风险
- **M3-5 天。**
- 最大风险Celery worker 进程内 `PYTHONPATH=backend` 与 bootstrap `lru_cache` 单例需重新初始化 —— 可控,因 bootstrap 已是懒加载。
- YAGNI 边界:本期仅异步化"文档处理"一条链compliance/perception 复用同一 Celery 基建后续接入。
---
## 4. 深入设计 ② — 法规感知自动更新闭环
### 4.1 问题
感知闭环是愿景旗舰能力(`01_Architecture.html` L157-193、Slide 11。现状`PerceptionService``MockEventStore``mock_event_store.py:7`20 条手写死数据),`list_events`/`stats` 全静态,`source_url` 真实但从不访问。**LLM 影响分析与 RAG 关联检索是真的** —— 闭环缺的是前半段:真实采集 → 变更感知Diff→ 入库。
### 4.2 六步现状对照
| 步骤 | 愿景设计 | 现状 | 本期目标 |
|------|---------|------|---------|
| ① 法规源监控 | 定时爬国标网/MIIT/UN-ECE/EUR-Lex | ❌ 无 | ✅ 适配器+定时 |
| ② 智能变更感知 | NLP 比对新旧版本 Diff | ❌ 无 | ✅ 内容指纹+LLM Diff |
| ③ 自动解析入库 | MinerU→分块→BGE-M3→Milvus | ✅ 已有(复用设计①管线) | ✅ 接线 |
| ④ 知识图谱更新 | Neo4j 关系同步 | ❌ 无 | ⏭️ 本期不做(归 GraphRAG 专项) |
| ⑤ 差距分析&推送 | AI 比对+按角色推送 | 🟡 analyze_event 已有分析,无推送 | 🟡 分析复用,推送下期 |
| ⑥ 触发整改闭环 | 整改任务跟踪 | ❌ 无 | ⏭️ 下期 |
本期聚焦 ①②③,复用设计①异步管线与已有解析/嵌入/检索/分析能力。
### 4.3 架构(端口与适配器)
```
domain/perception/ports.py ← 新增
RegulationSource (Protocol) fetch_latest() → list[RawRegulation]
EventStore (Protocol) 抽象掉 MockEventStore现有 mock 成为一个实现)
ChangeDetector (Protocol) diff(old, new) → ChangeSet
infrastructure/perception/
sources/ ← 新增,每法规源一个适配器
gb_openstd_source.py 国标网 (openstd.samr.gov.cn)
miit_source.py 工信部
base_html_source.py 通用 HTML 抓取基类httpx 已在 deps
postgres_event_store.py ← 替换 MockEventStore真实持久化
content_fingerprint_detector.py 哈希指纹 + LLM 语义 Diff
application/perception/services.py扩展现有
ingest_cycle() ← 新增:①抓取 → ②Diff → ③入队解析(设计①的 task
list_events/analyze_event 保持不变,已是真实逻辑)
infrastructure/tasks/perception_tasks.py ← 复用设计①的 Celery
@task perception_crawl_cycle() Celery Beat 定时触发
```
### 4.4 关键设计决策
1. **接口契约零改动**`PostgresEventStore` 输出与 `MockEventStore` 完全相同的 dict 结构mock_event_store.py 的 20 字段),故 `perception.ts` 前端契约、`PerceptionPage.tsx``analyze_event` 全部不改。Mock 退化为种子数据/演示回退,通过 `perception_event_store=mock|postgres` 开关切换(对齐现有 `document_repository_backend` 模式)。
2. **变更感知分两层**:廉价层(内容哈希指纹判断"是否变了"+ 智能层(变了才调 LLM 做"新增/修订/废止条款"结构化 Diff复用 `get_llm_client`prompt 风格照搬 `compliance/pipeline.py::_extract_json`)。
3. **合规防滥用**:尊重 `robots.txt` + 限速 + `tenacity` 重试 + 抓取失败不污染已有数据;适配器隔离,单源故障不影响其它。
4. **入库复用设计①**:抓到新法规 PDF → 丢进 `process_document` task → 自动走完解析/嵌入/索引。
### 4.5 落地步骤
1.`domain/perception/ports.py`,让现有 `MockEventStore` 实现 `EventStore` 协议(纯重构,行为不变)。
2. `PostgresEventStore` + 建表(参照 `aliyun_parser/schema.sql` 风格)+ 20 条 mock 作 seed。
3. 先做 1 个真实源适配器(建议国标网,结构最稳)跑通 ①→②→③,验证端到端。
4. `content_fingerprint_detector` + LLM Diff。
5. `perception_crawl_cycle` Celery Beat 定时(每日);新事件落 PostgresEventStore + 新法规入队解析。
6. 前端 `PerceptionPage` 加"最近同步时间/本次新增 N 条"stats 已有结构,加 2 字段)。
### 4.6 工作量与风险
- **L5-8 天**,依赖设计①先落地(共用 Celery
- 最大风险:外部源站不可控(改版/反爬)。缓解:适配器隔离 + mock 永久保留为回退 + 先攻 1 个源验证(对齐 Slide 13"选取 2-3 个场景 POC 验证")。
- YAGNI 边界④Neo4j 图谱、⑥整改闭环、多渠道推送本期不做,各自独立子项目。
---
## 5. 三阶段实施路线图
### 5.1 核心主线
项目不缺"能力点",缺的是**让能力点从同步脚本变成可运营的系统**。主线是**异步化基建**:既是文档处理性能解药(设计①),又是感知闭环执行载体(设计②),也是未来 EHS/推送的统一底座。路线图以它为"第 0 块地基",其余能力挂载其上。
### 5.2 与 PPT 三阶段映射Slide 10
```
PPT 规划 代码现状 本路线图补齐
─────────────────────────────────────────────────────
一阶段 知识库+基础问答 ✅ 大体已实现 → 加固 (P0/P1)
二阶段 文档审查+API集成 🟡 审查真/API半 → 异步化+感知闭环
三阶段 EHS+个性化+图谱 ❌ 基本缺失 → 子项目 (P2)
```
### 5.3 阶段一 · 生产地基2-3 周)— "让它扛得住生产"
| 顺序 | 事项 | 依据 | 估时 |
|------|------|------|------|
| 1 | 设计① 异步任务化 | celery/redis 已在 depsworkers/ 空 | M, 3-5d |
| 2 | 认证 + RBAC + 审计 + 收紧 CORS | 0 处 authSlide 12 矩阵 | M, 3-5d |
| 3 | 会话/任务持久化(内存 → Redis/PG | InMemoryConversationStore 重启即丢 | M, 2-3d |
| 4 | 快赢:启用 Reranker | settings 默认关,管线已写好 | S, 0.5d |
### 5.4 阶段二 · 招牌能力2-3 周)— "让它有亮点"
建议**感知闭环优先于图谱**前者复用阶段一异步基建ROI 更高)。
| 顺序 | 事项 | 依据 | 估时 |
|------|------|------|------|
| 5 | 设计② 法规感知闭环 ①②③ | MockEventStore → 真实采集 | L, 5-8d |
| 6 | Agentic 检索(查询改写/意图理解) | Slide 5"意图理解",代码是直检索 | M, 3-4d |
| 7 | 引文置信度评分 + 基础可观测性 | Slide 5 承诺;架构图右栏全缺 | S+M, 3-4d |
### 5.5 阶段三 · 视野扩展(按需,各为独立子项目)— "让它成体系"
每项单独 brainstorm → spec → 实施,本期不细化:
- 知识图谱 / GraphRAGNeo4j + LightRAG v1.5,接感知闭环第④步)
- EHS 隐患识别SIF + 四维根因Slide 7
- 多渠道推送 + 订阅规则引擎Slide 8→ 闭环整改跟踪(第⑤⑥步)
- 持续加固MinerU 3.1 升级、前端清 mock、legacy 收口
### 5.6 决策建议
1. 强烈建议按阶段顺序:地基 → 招牌 → 扩展。跳过地基直接做招牌,会在生产暴露超时/无鉴权/数据丢失。
2. 阶段一第 4 项Reranker可立即做 —— 半天见效,与其它解耦,适合先尝甜头。
3. 阶段二二选一先行:要 demo 冲击力选"感知闭环";要问答质量选"Agentic 检索"。
---
## 6. 最新 AI 技术调研(支撑选型)
| 技术 | 版本/状态2026 | 对应机会点 |
|------|------------------|-----------|
| LightRAG | v1.5.02026-06EMNLP 2025KG-RAG原生支持 Neo4j + MinerU/Docling含 Web UI 图谱可视化 | #7 知识图谱 |
| MinerU | v3.1.02026-04协议转为 Apache 2.0 基础的开源协议VLM 解析MinerU2.5-Pro109 语言 OCR | #15 本地解析兜底 |
| BGE Reranker | `bge-reranker-v2.5-gemma2-lightweight`token 压缩 + 分层轻量化,生产推荐) | #5 Reranker 升级 |
| BGE-M3 | 100+ 语言8192 上下文dense+sparse+colbert 统一(现已在用) | 现有嵌入 |
| RAGFlow | 2026 支持 DeepSeek v4 / MCP / 跨语言查询agentic RAG 参考实现 | #6 Agentic 检索参考 |
---
## 7. 验收与边界
### 7.1 本文档明确不做YAGNI
- 阶段三所有子项目图谱、EHS、推送、整改闭环、企业集成仅列方向不在本期展开。
- 移动端适配AGENTS.md 明确 desktop-first
- 感知闭环的第④⑤⑥步(图谱同步、推送、整改)。
### 7.2 架构约束(必须遵守)
- 后端遵循 `api → application → domain ports → infrastructure``docs/architecture/backend-project-architecture.md` 为权威)。
- 新业务逻辑不得落入 `services/*``workflows/*`legacy 迁移区)。
- `shared/bootstrap.py` 为依赖装配 composition root新依赖在此接线。
- 后端注释/docstring 全英文AGENTS.md 规范)。
### 7.3 下一步
经用户审阅本 spec 后,对**阶段一**(异步任务化优先)调用 writing-plans 拆分为分步实施计划。

View File

@@ -0,0 +1,328 @@
# Regulatory Signals Intelligence Enhancement — Design Spec
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the 20-item hardcoded MockEventStore with real regulatory data from Chinese and international sources, add LLM-driven structured extraction, impact assessment, and semantic change diff — all accessible through a manual-trigger crawl in the frontend.
**Architecture:** Crawler Service (httpx + BeautifulSoup) → PostgreSQL EventStore → LLM Pipeline (extract → assess → diff) → existing PerceptionService interface. New code follows `api → application → domain ports → infrastructure` layering; no new files in `services/*` or `workflows/*`; `shared/bootstrap.py` is the composition root.
**Tech Stack:** httpx, BeautifulSoup4, sentence-transformers (for diff), existing LLM factory (deepseek/qwen), existing KnowledgeRetrievalService (RAG), PostgreSQL (already available), existing SSE infrastructure.
---
## 1. Data Sources
| Source | URL | Method | Coverage |
|--------|-----|--------|----------|
| CATARC 汽车标准 | `https://www.catarc.org.cn/bzzxd/qcbz/index.html` | httpx + BeautifulSoup (static pages) | 国家/行业汽车标准列表 |
| 国标委强制性标准 | `https://openstd.samr.gov.cn/bzgk/std/std_list_type?p.p1=1&p.p2=车&p.p90=circulation_date&p.p91=desc` | httpx + JSON API parse | 强制性国家标准,按"车"过滤 |
| 国标委推荐性标准 | `https://openstd.samr.gov.cn/bzgk/std/std_list_type?p.p1=2&p.p2=车&p.p90=circulation_date&p.p91=desc` | httpx + JSON API parse | 推荐性国家标准,按"车"过滤 |
| EUR-Lex | RSS + CELLAR REST API | pyeurlex / httpx | EU AI Act, automotive directives |
| UN R155/R156 | CELLAR REST API (CELEX lookup) | httpx | UN-ECE cybersecurity/OTA regulations |
Crawl is **manual-trigger only** — no cron/Celery Beat. Admin clicks "刷新数据源" in the frontend UI.
---
## 2. Database Schema
### New table: `regulation_events`
```sql
CREATE TABLE IF NOT EXISTS regulation_events (
id TEXT PRIMARY KEY, -- sha256(source + standard_code)[:12]
source TEXT NOT NULL, -- 'CATARC' | '国标委' | 'EUR-Lex' | 'UN-ECE'
source_label TEXT, -- Human-readable source label
standard_code TEXT NOT NULL, -- e.g. "GB 18384-2025", "EU/2024/1689"
title TEXT NOT NULL,
summary TEXT, -- Crawled abstract or first paragraph
full_text_url TEXT, -- Original page URL
status TEXT, -- 'enacted' | 'draft' | 'consultation'
impact_level TEXT, -- 'high' | 'medium' | 'low' (LLM-assigned)
published_at DATE,
effective_at DATE,
category TEXT,
tags TEXT[],
-- LLM structured extraction
obligations JSONB, -- [{text, deontic, subject, object, condition}]
deadlines JSONB, -- [{date, description}]
scope TEXT, -- Applicability scope summary
penalties TEXT, -- Penalty / consequence summary
-- Change tracking
content_hash TEXT, -- SHA256 of crawled full text
previous_hash TEXT, -- Hash from prior crawl (NULL on first crawl)
change_summary TEXT, -- LLM-generated description of changes
changed_sections JSONB, -- [{old_text, new_text, change_type}] where cosine<0.85
-- Impact assessment
affected_docs JSONB, -- [{doc_id, doc_name, score, key_clauses, recommendation}]
-- Metadata
crawled_at TIMESTAMPTZ DEFAULT now(),
processed_at TIMESTAMPTZ,
raw_storage_key TEXT -- MinIO path for raw HTML/PDF (optional)
);
CREATE INDEX IF NOT EXISTS regulation_events_source_date
ON regulation_events (source, published_at DESC);
CREATE INDEX IF NOT EXISTS regulation_events_impact_date
ON regulation_events (impact_level, published_at DESC);
CREATE INDEX IF NOT EXISTS regulation_events_tags
ON regulation_events USING gin(tags);
```
---
## 3. Backend Architecture
### 3.1 File Map
**New files (infrastructure layer):**
- `backend/app/infrastructure/perception/crawlers/catarc_crawler.py` — CATARC scraper
- `backend/app/infrastructure/perception/crawlers/guobiao_crawler.py` — 国标委 JSON API crawler
- `backend/app/infrastructure/perception/crawlers/eurlex_crawler.py` — EUR-Lex RSS + CELLAR
- `backend/app/infrastructure/perception/crawlers/base.py` — Abstract base class
- `backend/app/infrastructure/perception/postgres_event_store.py` — PostgresEventStore (replaces MockEventStore)
- `backend/app/infrastructure/perception/llm_pipeline.py` — Extract / assess / diff pipeline
**New files (application layer):**
- `backend/app/application/perception/crawl_service.py` — Orchestrates crawlers + LLM pipeline, exposes `run_crawl(sources)` + progress generator
**Modified files:**
- `backend/app/api/routes/perception.py` — Add `POST /crawl`, `GET /crawl/status` (SSE), `POST /events/{id}/process`, `GET /events/{id}/diff`
- `backend/app/shared/bootstrap.py` — Wire `PostgresEventStore` + `CrawlService` + `LlmPipeline` when `DOCUMENT_REPOSITORY_BACKEND=postgres`; fallback to `MockEventStore` when `json`
- `backend/app/config/settings.py` — Add `perception_crawl_timeout_seconds`, `perception_max_events_per_source`
**Unchanged files:**
- `backend/app/application/perception/services.py``PerceptionService` interface unchanged; only `_store` swap
- `backend/app/infrastructure/perception/mock_event_store.py` — Kept for `json` backend mode
### 3.2 Domain Port (Abstract Interface)
```python
# backend/app/infrastructure/perception/base_event_store.py
from abc import ABC, abstractmethod
class BaseEventStore(ABC):
@abstractmethod
def all(self) -> list[dict]: ...
@abstractmethod
def get(self, event_id: str) -> dict | None: ...
@abstractmethod
def filter(self, source=None, impact_level=None, limit=50) -> list[dict]: ...
@abstractmethod
def stats(self) -> dict: ...
@abstractmethod
def upsert(self, event: dict) -> None: ... # new — needed for crawl writes
@abstractmethod
def get_by_standard_code(self, code: str) -> dict | None: ... # for change detection
```
`MockEventStore` and `PostgresEventStore` both implement this interface.
### 3.3 Crawler Base Contract
```python
# backend/app/infrastructure/perception/crawlers/base.py
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class RawEvent:
source: str
source_label: str
standard_code: str
title: str
summary: str
full_text_url: str
status: str # 'enacted' | 'draft' | 'consultation'
published_at: str # YYYY-MM-DD string
effective_at: str | None
category: str
tags: list[str]
raw_text: str # full crawled text for hashing + LLM
class BaseCrawler(ABC):
@abstractmethod
def fetch(self, limit: int = 50) -> list[RawEvent]: ...
```
### 3.4 LLM Pipeline
```python
# backend/app/infrastructure/perception/llm_pipeline.py
class LlmPipeline:
"""Runs three sequential LLM steps on a regulation event."""
def extract_structure(self, event: dict) -> dict:
"""Step 1: Extract obligations, deadlines, scope, penalties, impact_level.
Returns dict with keys: obligations, deadlines, scope, penalties, impact_level.
Uses JSON-mode or structured prompt; model retries once on parse failure.
"""
def assess_impact(self, event: dict, retrieval_service) -> list[dict]:
"""Step 2: RAG-based impact on existing knowledge base documents.
Query = standard_code + title + first obligation texts.
Returns list of {doc_id, doc_name, score, key_clauses, recommendation}.
"""
def compute_diff(self, old_text: str, new_text: str) -> dict:
"""Step 3: Semantic diff between old and new regulation text.
Splits both texts by paragraph. Calls existing EmbeddingService (text-embedding-v3
via EMBEDDING_BASE_URL) to embed each paragraph, then computes cosine similarity.
Changed paragraphs (cosine < 0.85) sent to LLM for change_type classification:
'tightened' | 'relaxed' | 'added' | 'removed'
Returns {changed_sections: [...], change_summary: str}.
Only called when content_hash differs from previous_hash.
"""
```
### 3.5 CrawlService
```python
# backend/app/application/perception/crawl_service.py
class CrawlService:
def __init__(self, crawlers, event_store, llm_pipeline, retrieval_service): ...
def run_crawl(self, sources: list[str] | None = None) -> Generator[dict, None, None]:
"""Manual-trigger crawl. Yields progress SSE dicts:
{event: 'progress', data: {source, fetched, new, updated, stage}}
{event: 'done', data: {total_new, total_updated, duration_ms}}
{event: 'error', data: {source, message}}
For each crawler:
1. fetch() RawEvents
2. hash check vs stored event → skip if unchanged
3. upsert raw event to DB
4. run LLM pipeline (extract → assess → diff)
5. upsert enriched event to DB
6. yield progress
"""
```
---
## 4. API Endpoints
### Existing (unchanged interface, new store backend)
- `GET /api/v1/perception/stats`
- `GET /api/v1/perception/events`
- `GET /api/v1/perception/events/{id}`
- `POST /api/v1/perception/events/{id}/analyze` (streaming)
### New endpoints
```
POST /api/v1/perception/crawl
Body: { sources?: ["CATARC", "国标委", "EUR-Lex", "UN-ECE"] }
Response: text/event-stream (SSE)
Auth: requires current_user (admin/legal role)
Streams progress events until done or error.
POST /api/v1/perception/events/{id}/process
Trigger LLM pipeline for a single already-crawled event.
Response: { status: "ok", processed_at: "..." }
Auth: requires current_user
GET /api/v1/perception/events/{id}/diff
Returns: { changed_sections: [...], change_summary: str, previous_hash: str }
Returns 404 if no diff available (first crawl or no change detected).
```
---
## 5. Frontend Changes
### 5.1 New: Crawl Control Bar (top of PerceptionPage)
Above the stats-bar, add a `<CrawlBar>` component:
- "刷新数据源" button — triggers `POST /crawl` (all sources)
- Inline progress display: shows SSE progress events as a mini status line
- e.g. "CATARC: 抓取中… | 国标委: 12 条新增 | EUR-Lex: 等待中"
- On completion: shows "更新完成 — 新增 N 条,更新 M 条"
- Disabled while crawl is in progress (prevents double-trigger)
### 5.2 Signal Card Enhancement
Existing cards get two new indicators:
- **NEW badge** — shown when `crawled_at` is within last 24h (green dot)
- **CHANGED badge** — shown when `previous_hash != content_hash` and `change_summary` exists
### 5.3 Right Panel — Structured Tab
Right detail panel adds a tab bar: **概览 | 义务条款 | 影响评估 | 变更对比**
**义务条款 tab:**
- Table: 义务描述 | 主体 | 对象 | 截止日期
- Tags for deontic type: 强制 / 禁止 / 允许
- Shows `obligations[]` + `deadlines[]` from DB
**影响评估 tab:**
- Replaces hardcoded MOCK_DOCS with real `affected_docs[]` from DB
- Each row: document name, similarity score (%), key clause excerpt, LLM recommendation
- "Run fresh assessment" button → triggers `POST /events/{id}/process`
**变更对比 tab:**
- Only visible when `change_summary` is non-null
- Top: `change_summary` text (LLM prose)
- Below: diff table with old/new paragraph pairs, change_type badge per row
- Hidden (tab disabled) on first-crawl events with no prior version
### 5.4 Existing behavior preserved
- `analyze` streaming (AI analysis) unchanged
- Search/filter (source, impact) unchanged — now hits real DB data
- Stats bar — now reflects real counts from PostgreSQL
---
## 6. Settings Additions
```python
# backend/app/config/settings.py additions
perception_crawl_timeout_seconds: int = Field(default=120, ...)
perception_max_events_per_source: int = Field(default=100, ...)
perception_diff_similarity_threshold: float = Field(default=0.85, ...)
```
```env
# .env additions
PERCEPTION_CRAWL_TIMEOUT_SECONDS=120
PERCEPTION_MAX_EVENTS_PER_SOURCE=100
PERCEPTION_DIFF_SIMILARITY_THRESHOLD=0.85
```
---
## 7. Dependencies
```
# requirements.txt additions
httpx>=0.27.0 # already likely present; confirm
beautifulsoup4>=4.12.0 # HTML parsing for CATARC
lxml>=5.0.0 # BeautifulSoup parser backend
# sentence-transformers NOT added — diff uses existing text-embedding-v3 API (EMBEDDING_BASE_URL)
```
No new infrastructure required (PostgreSQL + MinIO + Milvus already available).
---
## 8. Backward Compatibility
- `DOCUMENT_REPOSITORY_BACKEND=json``bootstrap.py` uses `MockEventStore` (unchanged behavior)
- `DOCUMENT_REPOSITORY_BACKEND=postgres` → uses `PostgresEventStore`
- Migration: run `CREATE TABLE` SQL on first startup (idempotent `CREATE TABLE IF NOT EXISTS`)
- Existing 20 mock events are not seeded to PostgreSQL; PostgreSQL starts empty until first crawl
---
## 9. Out of Scope (this phase)
- Automatic/scheduled crawling (Celery Beat) — manual trigger only
- Playwright-based JS-rendered pages — all target sites work with httpx
- Knowledge Graph (Neo4j / LightRAG) — future phase
- Email/Slack webhook notifications — future phase
- User-facing diff history (versioning beyond one prior snapshot) — future phase

View File

@@ -0,0 +1,459 @@
# Compliance Analysis Enhancement Design
**Date:** 2026-06-08
**Directions:** A (Analysis Quality) + B (History & Reports) + C (Deep Chat)
**Approach:** Three independent but coordinated feature sets sharing one DB schema (method one / structured tables).
---
## Goals
1. **A — Analysis Quality:** Parallel clause processing (3-5× speed), fix `highlight_terms` bug (always returns empty), add LLM retry with tenacity, reserve `PassThroughReranker` for future cross-encoder work.
2. **B — Analysis History & Reports:** Auto-save every completed analysis to PostgreSQL, history rail in UI, per-record DOCX export, delete with confirmation.
3. **C — Deep Chat:** Per-finding persistent chat threads grounded in real retrieved text, LLM-generated suggestion questions, multi-turn memory.
---
## Architecture Overview
### Layering Rules (must not be violated)
```
api/routes/ → thin HTTP handlers, SSE generators only
application/ → orchestration logic (pipeline.py)
domain/ports/ → ABCs, no implementation
infrastructure/ → DB, docx, external calls
shared/bootstrap.py → composition root, wires everything
```
New business logic goes in `application/compliance/pipeline.py` and domain ports. Never in `services/*` or `workflows/*`.
### Shared Database Schema (B + C)
Three tables, created together so C's FK references are valid from day one:
```sql
CREATE TABLE compliance_analyses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_by VARCHAR(255),
doc_name VARCHAR(500),
standard_name VARCHAR(500),
risk_score INTEGER,
conclusion TEXT,
actions JSONB,
para_text TEXT,
highlight_terms JSONB
);
CREATE TABLE compliance_findings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
analysis_id UUID NOT NULL REFERENCES compliance_analyses(id) ON DELETE CASCADE,
seq INTEGER NOT NULL,
title VARCHAR(500),
description TEXT,
status VARCHAR(50),
clause_ref VARCHAR(200)
);
CREATE TABLE finding_chat_messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
analysis_id UUID NOT NULL REFERENCES compliance_analyses(id) ON DELETE CASCADE,
finding_id UUID NOT NULL REFERENCES compliance_findings(id) ON DELETE CASCADE,
role VARCHAR(20) NOT NULL, -- 'user' | 'assistant'
content TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
```
---
## Direction A — Analysis Quality
### A1: Parallel Clause Processing
**Current:** Route handler has a sequential `for i, clause in enumerate(clauses)` loop. Each iteration calls `retrieve_for_clause()` then `check_clause_compliance()` synchronously via `asyncio.to_thread`.
**Change:** Extract a `process_single_clause(clause, idx, ...) -> dict` function in `pipeline.py`, then replace the loop with `asyncio.gather`:
```python
async def run_clauses_parallel(clauses, retrieval_svc, llm_client, standard_name, para_text):
tasks = [
asyncio.to_thread(process_single_clause, clause, i, retrieval_svc, llm_client, standard_name, para_text)
for i, clause in enumerate(clauses)
]
return await asyncio.gather(*tasks, return_exceptions=True)
```
Results are yielded to the SSE stream in original order. Exceptions from individual clauses are caught and emitted as `{type: "error", clause_index: i}` events rather than crashing the whole stream.
### A2: Fix highlight_terms
**Root cause:** `synthesize_conclusion()` passes the LLM response through `json.loads()` but the LLM often wraps output in markdown fences (` ```json ... ``` `), causing a parse failure and silent fallback to `[]`.
**Fix in `pipeline.py`:**
```python
import re
def _extract_json(text: str) -> dict:
"""Strip markdown fences then parse JSON. Raises ValueError on failure."""
cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", text.strip(), flags=re.MULTILINE)
return json.loads(cleaned)
```
Apply `_extract_json` in `synthesize_conclusion()` instead of bare `json.loads`. Wrap with `@retry` (see A3) so transient parse failures get a second attempt.
### A3: LLM Retry with tenacity
`tenacity` is already in `requirements.txt` but unused. Add to all LLM calls in `pipeline.py`:
```python
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=4),
retry=retry_if_exception_type((httpx.HTTPError, ValueError)),
reraise=True,
)
def _call_llm_with_retry(client, prompt: str) -> str:
"""Call LLM and return raw text. Retries on HTTP errors and JSON parse failures."""
...
```
On final failure, the calling function catches and emits `{type: "error", text: "LLM call failed after 3 attempts"}` to the SSE stream.
### A4: PassThroughReranker (future-ready stub)
`domain/retrieval/ports.py` already defines a `Reranker` ABC. Add the no-op implementation:
**New file:** `backend/app/infrastructure/retrieval/reranker.py`
```python
from app.domain.retrieval.ports import Reranker, RetrievedChunk
class PassThroughReranker(Reranker):
"""No-op reranker. Replace with CrossEncoderReranker when a local model is available."""
def rerank(self, query: str, chunks: list[RetrievedChunk], top_k: int) -> list[RetrievedChunk]:
return chunks[:top_k]
```
Register in `shared/bootstrap.py` as the default `Reranker` implementation.
### A — Files Changed
| File | Action |
|------|--------|
| `backend/app/application/compliance/pipeline.py` | Add `process_single_clause`, `run_clauses_parallel`, `_extract_json`, `_call_llm_with_retry` |
| `backend/app/api/routes/compliance.py` | Replace sequential loop with `await run_clauses_parallel(...)` |
| `backend/app/infrastructure/retrieval/reranker.py` | New — `PassThroughReranker` |
| `backend/app/shared/bootstrap.py` | Register `PassThroughReranker` |
---
## Direction B — History & Reports
### B1: Domain Port
**New file:** `backend/app/domain/compliance/ports.py`
```python
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
@dataclass
class FindingRecord:
id: str
analysis_id: str
seq: int
title: str
description: str
status: str
clause_ref: Optional[str] = None
@dataclass
class AnalysisRecord:
id: str
created_at: datetime
created_by: Optional[str]
doc_name: str
standard_name: str
risk_score: int
conclusion: str
actions: list
para_text: str
highlight_terms: list
findings: list[FindingRecord] = field(default_factory=list)
class ComplianceRepository(ABC):
@abstractmethod
def save_analysis(self, record: AnalysisRecord) -> str: ...
@abstractmethod
def list_analyses(self, limit: int = 50, offset: int = 0) -> list[AnalysisRecord]: ...
@abstractmethod
def get_analysis(self, analysis_id: str) -> Optional[AnalysisRecord]: ...
@abstractmethod
def delete_analysis(self, analysis_id: str) -> None: ...
@abstractmethod
def save_message(self, analysis_id: str, finding_id: str, role: str, content: str) -> str: ...
@abstractmethod
def get_messages(self, finding_id: str) -> list[dict]: ...
```
### B2: PostgresComplianceRepository
**New file:** `backend/app/infrastructure/compliance/repository.py`
Implements `ComplianceRepository` using `psycopg2` (already in requirements). Connection string from `settings.DATABASE_URL`. Key methods:
- `save_analysis`: INSERT into `compliance_analyses`, then bulk INSERT findings into `compliance_findings`, return `analysis_id` (UUID string).
- `list_analyses`: SELECT with JOIN on findings count, ORDER BY `created_at DESC`, supports limit/offset.
- `get_analysis`: SELECT analysis + all findings by `analysis_id`.
- `delete_analysis`: DELETE cascades to findings and chat messages via FK.
- `save_message` / `get_messages`: INSERT/SELECT on `finding_chat_messages`.
Uses a connection pool (simple `psycopg2.pool.ThreadedConnectionPool`, min=1, max=5).
### B3: Auto-save Hook
In the SSE generator in `compliance.py`, after the `done` event is assembled:
```python
# After yielding the done event
if repo is not None:
record = AnalysisRecord(
id="", # will be assigned by DB
created_at=datetime.utcnow(),
created_by=current_user,
doc_name=doc_name,
standard_name=standard_name,
risk_score=done_payload["risk_score"],
conclusion=done_payload["conclusion"],
actions=done_payload["actions"],
para_text=done_payload["para_text"],
highlight_terms=done_payload["highlight_terms"],
findings=[FindingRecord(...) for f in accumulated_findings],
)
analysis_id = await asyncio.to_thread(repo.save_analysis, record)
# Emit an extra SSE event so frontend receives the analysis_id
yield f"data: {json.dumps({'type': 'saved', 'analysis_id': analysis_id})}\n\n"
```
### B4: New API Endpoints
Added to `backend/app/api/routes/compliance.py`:
```
GET /api/v1/compliance/history
Query params: limit=20&offset=0
Response: [{id, created_at, doc_name, standard_name, risk_score, finding_count}]
GET /api/v1/compliance/history/{analysis_id}
Response: full AnalysisRecord including findings list
DELETE /api/v1/compliance/history/{analysis_id}
Response: 204 No Content
GET /api/v1/compliance/history/{analysis_id}/download
Response: DOCX file (application/vnd.openxmlformats-officedocument.wordprocessingml.document)
```
### B5: DOCX Export
**New file:** `backend/app/infrastructure/compliance/docx_export.py`
Uses `python-docx` (already in requirements). Generates a structured report:
- Cover: document name, standard, date, risk score badge
- Executive summary: conclusion paragraph
- Findings table: seq / title / status / clause_ref / description
- Action items: numbered list
- Footer: generated by AI Regulation Analysis System
```python
def generate_docx(record: AnalysisRecord) -> bytes:
"""Generate a DOCX compliance report and return as bytes."""
doc = Document()
# ... build document ...
buf = BytesIO()
doc.save(buf)
return buf.getvalue()
```
### B6: Frontend — History Rail
`CompliancePage.tsx` gains a left rail (same layout pattern as RagChat's `history-pane`):
```
┌──────────────┬─────────────────────────────────┐
│ History │ Main Analysis Area │
│ ────────── │ │
│ 2026-06-08 │ (current analysis or loaded │
│ doc.pdf │ read-only historical record) │
│ ⚠ 72 [↓][×]│ │
│ ────────── │ │
│ 2026-06-07 │ │
│ csms.pdf │ │
│ ✓ 15 [↓][×]│ │
└──────────────┴─────────────────────────────────┘
```
- `[↓]` triggers `GET /history/{id}/download` and saves the DOCX file
- `[×]` shows a confirmation dialog, then calls `DELETE /history/{id}`
- Clicking a row loads that analysis into the main area in read-only mode
- `PageStateContext.ComplianceState` gains `analysisId: string | null` and `isReadOnly: boolean`
On mount, the rail calls `GET /history?limit=20` to populate the list. The list re-fetches after delete or after a new analysis completes (triggered by the `saved` SSE event).
### B — Files Changed
| File | Action |
|------|--------|
| `backend/app/domain/compliance/ports.py` | New — `ComplianceRepository` ABC + data classes |
| `backend/app/infrastructure/compliance/repository.py` | New — `PostgresComplianceRepository` |
| `backend/app/infrastructure/compliance/docx_export.py` | New — `generate_docx()` |
| `backend/app/api/routes/compliance.py` | Add history endpoints + auto-save hook |
| `backend/app/shared/bootstrap.py` | Register `PostgresComplianceRepository` |
| `frontend/src/pages/Compliance/CompliancePage.tsx` | Add History Rail |
| `frontend/src/contexts/PageStateContext.tsx` | Add `analysisId`, `isReadOnly` to `ComplianceState` |
---
## Direction C — Deep Chat
### C1: New Chat Endpoints
Replace the existing `/compliance/chat/{segment_id}` (kept for backward compatibility but deprecated) with finding-scoped endpoints:
```
POST /api/v1/compliance/analyses/{analysis_id}/findings/{finding_id}/chat
Body: {query: string}
Response: SSE stream — chunk / done / error events
GET /api/v1/compliance/analyses/{analysis_id}/findings/{finding_id}/chat
Response: [{id, role, content, created_at}]
POST /api/v1/compliance/analyses/{analysis_id}/findings/{finding_id}/suggestions
Response: {questions: [string, string, string]}
```
### C2: Grounded Context Construction
New function in `pipeline.py`:
```python
def build_finding_context(finding: FindingRecord, analysis: AnalysisRecord) -> str:
"""
Build a grounded system context string for a finding chat thread.
Combines finding details with analysis metadata for LLM grounding.
"""
return (
f"Document: {analysis.doc_name}\n"
f"Standard: {analysis.standard_name}\n"
f"Finding [{finding.seq}]: {finding.title}\n"
f"Status: {finding.status}\n"
f"Clause reference: {finding.clause_ref or 'N/A'}\n"
f"Description: {finding.description}\n"
f"Overall conclusion: {analysis.conclusion}\n"
)
```
This string is prepended to the system prompt for every chat call — replacing the fragile `segment_context` approach.
### C3: Multi-turn Context
Chat handler fetches existing messages from `finding_chat_messages` via `repo.get_messages(finding_id)` and prepends them to the LLM call as `[{"role": "user"/"assistant", "content": "..."}]` message history. Max history: 10 most recent messages (5 turns) to avoid token overflow.
After each LLM response, both the user message and assistant message are saved via `repo.save_message()`.
### C4: Suggestion Generation
New function in `pipeline.py`:
```python
SUGGESTION_PROMPTS = {
"non_compliant": "Generate 3 questions focused on remediation steps and timeline.",
"partial": "Generate 3 questions focused on identifying the compliance gap.",
"compliant": "Generate 3 questions focused on maintaining and evidencing compliance.",
}
def generate_suggestions(finding: FindingRecord, analysis: AnalysisRecord, llm_client) -> list[str]:
"""
Generate 3 context-aware follow-up questions for a finding chat thread.
Returns a list of 3 question strings. Falls back to generic questions on error.
"""
focus = SUGGESTION_PROMPTS.get(finding.status, SUGGESTION_PROMPTS["partial"])
context = build_finding_context(finding, analysis)
prompt = f"{context}\n\n{focus}\nReturn JSON: {{\"questions\": [\"...\", \"...\", \"...\"]}}"
# ... call LLM, parse JSON, return list ...
# Fallback on error:
return ["What are the specific requirements?", "What is the remediation timeline?", "Which regulation clause applies?"]
```
### C5: Frontend — Finding Chat Drawer
New component: `frontend/src/pages/Compliance/FindingChatDrawer.tsx`
Drawer slides in from the right (CSS: `position: fixed; right: 0; width: 420px`), reusing existing CSS variables (`--surface`, `--border`, `--accent`).
Structure:
- Header: finding title + close button
- Suggestions section: 3 chip buttons (only shown before first user message; hidden after)
- Message list: scrollable, same bubble style as RagChat
- Composer: textarea + send button, same pattern as RagChat composer
State managed in `PageStateContext.ComplianceState`:
- `activeFindingId: string | null` — which finding's drawer is open
- Drawer open/close controlled by `activeFindingId !== null`
On open:
1. `GET /analyses/{id}/findings/{fid}/chat` → restore history
2. If history is empty: `POST /findings/{fid}/suggestions` → show chips
Each finding card in `CompliancePage.tsx` gains a `💬 Chat` button that sets `activeFindingId`.
### C — Files Changed
| File | Action |
|------|--------|
| `backend/app/api/routes/compliance.py` | Add 3 new finding-chat endpoints |
| `backend/app/application/compliance/pipeline.py` | Add `build_finding_context`, `generate_suggestions` |
| `backend/app/infrastructure/compliance/repository.py` | Add `save_message`, `get_messages` (already in port) |
| `frontend/src/pages/Compliance/FindingChatDrawer.tsx` | New component |
| `frontend/src/pages/Compliance/CompliancePage.tsx` | Add Chat button to finding cards, render drawer |
| `frontend/src/contexts/PageStateContext.tsx` | Add `activeFindingId` to `ComplianceState` |
---
## Implementation Order
Direction A must be completed first (parallel processing changes the route handler that B's auto-save hook attaches to). B must be completed before C (C's FK references require B's tables and repository).
```
A (parallel + bug fixes + reranker stub)
└→ B (schema migration + history + DOCX)
└→ C (finding chat + suggestions)
```
---
## Non-Goals
- PDF export (DOCX only; users convert via Word/WPS)
- Cross-encoder reranking (stub reserved, not implemented)
- Scheduled/automatic crawling
- User-level history isolation (all users share history — global visibility)
- Prompt version management or A/B testing
---
## Constraints
- Backend comments and docstrings: English only
- No new top-level libraries beyond those already in `requirements.txt` (`tenacity`, `python-docx`, `psycopg2-binary` are all present)
- `DOCUMENT_REPOSITORY_BACKEND=postgres``PostgresComplianceRepository`; any other value → raise `NotImplementedError` with a clear message (no mock fallback for compliance history)
- Git commits are made by the user, never automated

View File

@@ -0,0 +1,421 @@
# Internationalisation (i18n) Design — Frontend Chinese/English Toggle
**Date:** 2026-06-08
**Scope:** UI framework strings only (nav labels, button labels, status messages, placeholders). Mock data, API-returned content, and domain regulation text are explicitly excluded.
---
## Goals
Add a language toggle button (EN ↔ 中) in the Sidebar footer, immediately left of the existing theme-toggle button, so users can switch the UI between English and Simplified Chinese. Default language is English on every page load; preference is not persisted across sessions.
---
## Architecture
### Approach
Custom `LanguageContext` following the same pattern as the existing `ThemeContext`. No external library dependencies. Translation strings live in two TypeScript modules (`locales/en.ts` and `locales/zh.ts`) that export identical-shape objects.
### Layering
```
src/
├── contexts/
│ └── LanguageContext.tsx # type Lang, LanguageProvider, useLanguage()
└── locales/
├── en.ts # English translations (default)
└── zh.ts # Simplified Chinese translations
```
`LanguageProvider` wraps the entire app in `App.tsx` — outermost provider so every component can consume it.
### Context interface
```ts
type Lang = 'en' | 'zh';
interface LanguageContextValue {
lang: Lang;
t: Translations; // typed translation object
toggleLang: () => void;
}
```
`useState<Lang>('en')` — hardcoded default, no localStorage read on mount.
### Translation object shape (both files export `Translations`)
```ts
export interface Translations {
nav: {
groupMain: string;
groupWorkbench: string;
groupChat: string;
overview: string;
signals: string;
status: string;
documents: string;
compliance: string;
chat: string;
};
sidebar: {
toggleTheme: string;
toggleLang: string;
signOut: string;
};
overview: {
eyebrow: string;
heroTitle: string;
heroDesc: string;
openDashboard: string;
jumpToChat: string;
sectionHowItWorks: string;
sectionScreens: string;
stepUpload: string; stepUploadDesc: string;
stepProcess: string; stepProcessDesc: string;
stepMonitor: string; stepMonitorDesc: string;
stepAnalyze: string; stepAnalyzeDesc: string;
stepReview: string; stepReviewDesc: string;
stepChat: string; stepChatDesc: string;
statScreens: string;
statFlows: string;
statReviewPosture: string;
navLiveHealth: string;
navRegulatoryChanges: string;
navUploadDocs: string;
navComplianceWorkspace: string;
navChatCited: string;
navKPIs: string;
};
signals: {
topbarTitle: string;
topbarSub: string;
searchPlaceholder: string;
refreshBtn: string;
crawlingBtn: string;
statTotal: string;
statHigh: string;
statMedium: string;
statLast90: string;
badgeFinal: string;
badgeDraft: string;
badgeUrgent: string;
badgePublished: string;
emptySelectSignal: string;
runAnalysis: string;
stopBtn: string;
sourceLink: string;
tabOverview: string;
tabObligations: string;
tabImpact: string;
tabChanges: string;
cardScopeHeader: string;
cardObligationsHeader: string;
obligationsEmpty: string;
colObligationDesc: string;
colSubject: string;
colType: string;
colDeadline: string;
deadlinePending: string;
cardAffectedDocs: string;
noAffectedDocs: string;
cardAIImpact: string;
footerText: string;
statusConnecting: string;
statusNoStream: string;
statusCrawling: string;
statusProcessing: string;
statusComplete: string;
statusUpdateComplete: string;
statusError: string;
statusConnFailed: string;
};
status: {
topbarTitle: string;
searchPlaceholder: string;
exportBtn: string;
refreshBtn: string;
newUploadBtn: string;
statTotal: string;
statIndexed: string;
statFailed: string;
statChunks: string;
statCoverage: string;
cardHealth: string;
badgeOnline: string;
badgeError: string;
badgeDegraded: string;
badgeUnknown: string;
healthEndpointError: string;
serviceEnabled: string;
serviceDisabled: string;
serviceNotLoaded: string;
cardConfig: string;
labelLLMProvider: string;
labelLLMModel: string;
labelEmbeddingModel: string;
labelEmbeddingDim: string;
labelMilvusCollection: string;
labelParserBackend: string;
labelChunkBackend: string;
labelParserFailureMode: string;
configLoadError: string;
cardBreakdown: string;
breakdownIndexed: string;
breakdownProcessing: string;
breakdownFailed: string;
cardRuntime: string;
labelActiveSessions: string;
labelSessionCapacity: string;
labelReranker: string;
labelBM25: string;
statusActive: string;
statusUnavailable: string;
footerAllOk: string;
footerDegraded: string;
footerChecking: string;
};
docs: {
topbarTitle: string;
searchPlaceholder: string;
refreshBtn: string;
uploadBtn: string;
confirmDeleteTitle: string;
cancelBtn: string;
deleteBtn: string;
filterAll: string;
filterReady: string;
filterProcessing: string;
filterFailed: string;
filterPending: string;
filterAllTypes: string;
selectedCount: string; // '{n} document(s) selected' — use {n} placeholder
deleteSelected: string;
colName: string;
colStatus: string;
colUploaded: string;
colChunks: string;
colSize: string;
colType: string;
colActions: string;
loading: string;
emptyNoDocuments: string;
emptyNoMatch: string;
footerCount: string; // '{n} of {m} document(s)'
titleDownload: string;
titleRetry: string;
titleDelete: string;
confirmSingle: string; // '{name}' placeholder
confirmBatch: string; // '{n}' placeholder
};
compliance: {
topbarTitle: string;
searchPlaceholder: string;
clearBtn: string;
exportBtn: string;
exportJSON: string;
exportText: string;
newAnalysisBtn: string;
statusAnalyzing: string;
statusComplete: string;
statusError: string;
emptyTitle: string;
emptyDesc: string;
colRetrieved: string; // 'Retrieved Regulations {count}'
retrievingMsg: string;
defaultRegulation: string;
matchSuffix: string;
colParagraph: string;
extractingMsg: string;
noTextExtracted: string;
stagesHeader: string;
stageExtraction: string;
stageClauseSplit: string;
stageRetrieval: string;
stageSynthesis: string;
colFindings: string; // 'Findings {count}'
gapInProgress: string;
askAIBtn: string;
chatBtn: string;
conclusionHeader: string;
riskScoreTooltip: string;
statusCovered: string;
statusGap: string;
statusCritical: string;
statusInfo: string;
sourceTypePasted: string;
sourceTypeIndexed: string;
sourceTypeUploaded: string;
chatSidebarHeader: string;
chatThinking: string;
quickQ1: string;
quickQ2: string;
quickQ3: string;
chatPlaceholder: string;
sendBtn: string;
analysisFailed: string;
exportReportHeader: string;
exportSectionParagraph: string;
exportSectionFindings: string;
exportSectionConclusion: string;
exportSectionActions: string;
historyHeader: string;
downloadReport: string;
historyEmpty: string;
historyDeleteConfirm: string;
drawerClose: string;
drawerChatEmpty: string;
drawerSuggestionsHeader: string;
};
ragchat: {
topbarTitle: string;
exportBtn: string;
quickPromptsHeader: string;
inputPlaceholder: string;
citationsHeader: string; // 'Sources {count}'
citationsEmpty: string;
jumpToSource: string; // 'Jump to source [N]'
apiError: string;
quickPrompt1: string;
quickPrompt2: string;
quickPrompt3: string;
quickPrompt4: string;
};
}
```
---
## Language Toggle Button
Location: `Sidebar.tsx` footer `<div style={{ display: 'flex', gap: 4 }}>`.
Inserted **left of** the existing theme button:
```tsx
<button className="theme-btn" onClick={toggleLang} title={t.sidebar.toggleLang}>
{lang === 'en' ? 'EN' : '中'}
</button>
```
- Reuses existing `theme-btn` CSS class — no new styles needed.
- Displays two-character label: `EN` or `中`.
- `title` attribute (tooltip) translates with the rest of the UI.
---
## Translation Files (complete values)
### `locales/en.ts` (English — default)
Key values (representative; full file contains all keys above):
```ts
nav: { groupMain: 'Main', groupWorkbench: 'Workbench', groupChat: 'Chat',
overview: 'Overview', signals: 'Regulatory Signals', status: 'System Status',
documents: 'Documents', compliance: 'Compliance Analysis', chat: 'Regulation Q&A' },
sidebar: { toggleTheme: 'Toggle theme', toggleLang: 'Switch language', signOut: 'Sign out' },
signals: { refreshBtn: 'Refresh Sources', crawlingBtn: 'Crawling...', ... },
docs: { uploadBtn: 'Upload document', deleteBtn: 'Delete', cancelBtn: 'Cancel', ... },
compliance: { newAnalysisBtn: 'New analysis', analyzeBtn: 'Analyze', sendBtn: 'Send', ... },
ragchat: { exportBtn: 'Export chat', inputPlaceholder: 'Ask about your regulations…', ... },
```
### `locales/zh.ts` (Simplified Chinese)
Key values:
```ts
nav: { groupMain: '主菜单', groupWorkbench: '工作台', groupChat: '对话',
overview: '概览', signals: '法规信号', status: '系统状态',
documents: '文档管理', compliance: '合规分析', chat: '法规问答' },
sidebar: { toggleTheme: '切换主题', toggleLang: '切换语言', signOut: '退出' },
signals: { refreshBtn: '刷新数据源', crawlingBtn: '抓取中...', ... },
docs: { uploadBtn: '上传文档', deleteBtn: '删除', cancelBtn: '取消', ... },
compliance: { newAnalysisBtn: '新建分析', analyzeBtn: '开始分析', sendBtn: '发送', ... },
ragchat: { exportBtn: '导出对话', inputPlaceholder: '请输入关于法规的问题…', ... },
```
---
## App.tsx Provider Wrapping
```tsx
// Before
<ThemeProvider>
<AuthProvider>
<PageStateProvider>
<AppRouter />
</PageStateProvider>
</AuthProvider>
</ThemeProvider>
// After
<LanguageProvider>
<ThemeProvider>
<AuthProvider>
<PageStateProvider>
<AppRouter />
</PageStateProvider>
</AuthProvider>
</ThemeProvider>
</LanguageProvider>
```
`LanguageProvider` is outermost so it is available to all components including the theme toggle itself.
---
## Usage in Components
```tsx
import { useLanguage } from '../../contexts/LanguageContext';
function MyComponent() {
const { t } = useLanguage();
return <button>{t.docs.uploadBtn}</button>;
}
```
No wrapping needed — `t` is always the correct object for the current language.
---
## Files Changed
| File | Action |
|------|--------|
| `src/contexts/LanguageContext.tsx` | New — `LanguageProvider`, `useLanguage()`, `Lang` type |
| `src/locales/en.ts` | New — complete English `Translations` object |
| `src/locales/zh.ts` | New — complete Chinese `Translations` object |
| `src/App.tsx` | Add `<LanguageProvider>` wrapper |
| `src/components/layout/Sidebar.tsx` | Add language toggle button; replace nav group titles and labels with `t.nav.*` |
| `src/pages/Overview/OverviewPage.tsx` | Replace all UI strings with `t.overview.*` |
| `src/pages/Perception/PerceptionPage.tsx` | Replace all UI strings with `t.signals.*` |
| `src/pages/Status/StatusPage.tsx` | Replace all UI strings with `t.status.*` |
| `src/pages/Docs/DocsPage.tsx` | Replace all UI strings with `t.docs.*` |
| `src/pages/Compliance/CompliancePage.tsx` | Replace all UI strings with `t.compliance.*` |
| `src/pages/RagChat/RagChatPage.tsx` | Replace all UI strings with `t.ragchat.*` |
| `src/pages/Compliance/HistoryRail.tsx` | Replace UI strings with `t.compliance.*` |
| `src/pages/Compliance/FindingChatDrawer.tsx` | Replace UI strings with `t.compliance.*` |
---
## Non-Goals
- Persistence across sessions (no localStorage for language preference)
- More than two languages
- RTL layout support
- Pluralisation helpers (simple string substitution with `{n}` placeholders is sufficient — callers replace via `t.docs.selectedCount.replace('{n}', String(count))`)
- Translation of API-returned content, mock data, regulation names, or document file names
- Date/number formatting localisation
---
## Constraints
- Zero new npm dependencies
- Follow existing `ThemeContext` pattern exactly
- Backend comments/docstrings: English only (no backend changes in this feature)
- Git commits made by the user, never automated

108
docs_dump.txt Normal file
View File

@@ -0,0 +1,108 @@
===== PPTX =====
--- Slide 1 ---: AI + 法律法规 | 合规智能中枢 | 面向车企与工厂的 AI 驱动合规解决方案 | 2026年4月 | EMS & EHS Compliance Intelligence Hub | AI Compliance
Intelligence Hub | Internal | AI 合规智能中枢 | 2026.04
--- Slide 2 ---: 背景与挑战 | 车企和工厂面临的合规困境 | 法规来源复杂 | 国标GB · MIIT · UN-ECE
IATF 16949 · ISO 45001
多轨并行,难以统管 | 更新频率高 | 新能源 · 数据安全 · 碳排放
PIPL · NEV积分 · CCER
政策持续迭代 | 跨语言需求 | 中英文法规混存
跨国工厂多语言
合规场景并存 | 文档高度分散 | 分散于 Confluence
SharePoint · ERP · PLM
无法联通查询 | 隐患识别被动 | EHS 安全依赖人工
隐患发现滞后
缺乏预防性机制 | 覆盖核心法规域 | 🚗 车辆安全 GB 7258 · GB 18384 · UN-ECE R155/156 | 🔒 数据安全 PIPL · DSL · GB/T 35273 | 🏭 工厂EHS GB 6441 · AQ/T系列 · ISO 45001 | ♻️ 碳排放 NEV积分 · CCER · 欧盟碳边境税 | ✅ 质量管理 IATF 16949 · GB/T 19001 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 2 / 13
--- Slide 3 ---: 产品定位与整体架构 | AI 驱动的全链路合规智能平台 | AI 合规智能中枢 | 📚 知识库构建 | 内外部法规 · 历史案例
统一知识图谱 · 自动更新 | 💬 智能问答 | 混合检索 · 语义+关键词
中英双语 · 引文溯源 | 📄 合规审查 | PDF/Word上传
自动比对法规 · 风险标注 | 🔌 API集成 | 对接PLM · ERP · OA · MES | 🎯 个性化推荐 | 角色画像 · 上下文感知 | 📢 定制推送 | Email · Teams
飞书 · 钉钉
法规变更
实时通知 | 🦺 EHS 隐患识别 & 管理体系审计C-SG专项 | 事故报告 NLP | SIF潜力识别 | 四维根因分析 | ISO 45001 要素扫描 | 自动生成审计报告 | 趋势分析仪表板 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 3 / 13
--- Slide 4 ---: 功能一:合规知识库构建与动态更新 | 统一接入内外部法规,构建可检索的结构化知识库 | 📥 数据来源 | 内部文档 | Confluence · SharePoint
飞书 · 历史合规报告 · 审计记录 | ↓ | 外部法规 | 国标全文库 · 工标网
MIIT政策 · UN-ECE · EUR-Lex | ↓ | 历史案例 | 处罚案例库 · 整改记录
行业事故通报 | ⚙️ 处理流程 | 1 | ① 文档解析 | 版面感知OCR扫描件 · PDF表格 · 多栏 · Word/Excel | ↓ | 2 | ② 智能分块 | 章节级 / 条款级双粒度切割,保留语义完整性 | ↓ | 3 | ③ 向量化存储 | 多语言嵌入(中英双语),向量库 + 关键词索引双轨 | ↓ | 4 | ④ 知识图谱 | 法规实体 → 条款 → 义务 → 适用范围关系图谱 | ↓ | 5 | ⑤ 自动更新 | 定时监控法规变更,触发增量重索引 + 版本管理 | ✨ 核心价值 | 数据不出厂 | 私有化本地部署
满足PIPL/DSL数据主权 | 权限分级管理 | 研发/生产/采购/法务
差异化访问控制 | 实时保鲜 | 法规修订自动触发重索引
确保知识时效性 | 多格式支持 | 扫描件 · PDF · Word
Excel · 标准文件全覆盖 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 4 / 13
--- Slide 5 ---: 功能二:混合检索智能问答引擎 | 语义检索 + 关键词检索 + 知识图谱,生成可溯源的合规决策建议 | 用户提问 | 中 / 英 / 混合
自然语言输入 | ▶ | 意图理解 | 识别法规实体
适用场景 · 地域 | ▶ | 混合检索 | BM25关键词
+ 语义向量
本地+网络双路 | ▶ | 重排序 | Cross-Encoder
精排召回结果 | ▶ | 生成回答 | 引文锚定输出
置信度评分
页码溯源 | 典型问答场景 | 法规解读 | "我们的纯电SUV需满足哪些GB强制认证要求" | 政策查询 | "2025年NEV积分核算方式有哪些最新变化" | 合规判断 | "供应商A的REACH声明是否满足我司采购合规要求" | 多跳推理 | "ISO 45001变更管理要求对应哪些内部流程需更新" | 对比分析 | "GB 18384与欧盟ECE R100在电池安全上有哪些差异" | 📎 引文溯源 | 答案标注原文出处
页码精确定位 | 🌐 多语言支持 | 中英混合检索
无需切换语言 | ⚖️ 决策辅助 | 结合内部制度
输出综合建议 | 🔄 图谱增强 | 关联上下游条款
多跳推理支持 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 5 / 13
--- Slide 6 ---: 功能三:智能文档合规审查 | 上传 PDF/Word自动比对法规库标注风险并给出整改建议 | ⚙️ 审查流程 | 1 | ① 文件上传 | PDF · Word · Excel · 扫描件,支持批量 | ↓ | 2 | ② 文档解析 | 版面感知OCR段落/条款级分块 | ↓ | 3 | ③ 法规域匹配 | 根据文档类型+内容自动识别适用法规域 | ↓ | 4 | ④ 合规比对 | 条款级语义对比,缺项检测 · 风险评分 | ↓ | 5 | ⑤ 报告输出 | 非合规位置标注,整改建议 · 风险等级 | 📋 报告输出内容 | 📍 | 非合规位置标注 | 页码 + 段落高亮,一键跳转原文 | ⚠️ | 风险等级分级 | 红(高危)/ 橙(中)/ 黄(低)三级 | 📖 | 法规条款引用 | 精确关联对应法规原文条款编号 | 🔧 | 整改建议 | 基于历史合规案例,给出可执行方案 | 📂 适用文档类型 | 供应商合规声明 | REACH/RoHS · 碳足迹申报 | 新产品EHS评估 | GB安全标准覆盖完整性核查 | 工厂安全作业规程 | AQ/T符合性 · 许可条款 | 劳动合同/协议 | 劳动法 · 工时 · 竞业条款 | 数据处理协议 | PIPL/GDPR 数据主体权利 | 供应链碳申报 | CCER/CBAM 核算方法验证 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 6 / 13
--- Slide 7 ---: EHS 隐患识别 & 管理体系审计C-SG专项 | AI驱动的主动安全预防从被动响应到预测性干预 | 📥 数据输入 | 📝 事故/事件报告文本 · 巡检记录 · 安全观察卡 | 📊 设备运行数据 · 工伤统计 · 隐患整改台账 | 📷 现场照片(目标检测)· 视频(行为分析,可选) | 🤖 AI隐患识别引擎 | NLP文本分析 | 从叙述性文本中提取隐患实体
触发因素 · 伤害类型 · 位置信息 | SIF风险评分 | 高严重性事件潜力预测
优先处置最高风险隐患 | 四维根因分析 | 人因/设备/管理/环境
系统性根因挖掘 | 法规自动关联 | 与GB 6441/AQ系列/ISO 45001
自动映射对应条款 | 📋 体系审计功能 | ✓ ISO 45001要素覆盖度扫描PDCA完整性 | ✓ 历史案例相似度匹配与经验复用 | ✓ 整改优先级排序(风险×紧迫×可行性) | ✓ 审计报告自动生成(条款级评分) | ⚠️ 典型隐患场景 | ▸ 高处坠落 | AQ/T 3049 | ▸ 有限空间 | AQ 3028 | ▸ 化学品管理 | GB 13690 | ▸ 设备点检 | IATF §8.5 | ▸ 应急演练 | ISO 45001 §8.2 | 📤 输出成果 | 隐患清单 | 位置 · 类型
风险等级
法规依据
整改建议 | 体系审计
报告 | 条款级符合
性评分
整改优先级 | 趋势分析
仪表板 | 隐患热图
月度趋势
部门对比 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 7 / 13
--- Slide 8 ---: 系统集成 · 个性化推荐 · 定制推送 | 合规能力 API 化,主动触达用户,融入业务流程 | 🔌 合规审查 API 化 | POST | /compliance/check | 大文本分片合规检查 | POST | /compliance/upload | PDF/Word文件上传审查 | GET | /compliance/query | 法规知识库问答 | POST | /compliance/subscribe | 法规变更Webhook订阅 | 🔗 企业系统集成 | PLM | 新产品立项/BOM变更 | → 自动触发法规适用性检查 | ERP | 供应商准入/合同签署 | → 供应商自动合规评分 | OA | 合同/协议提交审批 | → 高风险自动抄送法务 | MES | 生产工艺变更 | → 触发EHS合规影响评估 | 🎯 个性化推荐 | 👤 角色画像EHS · 法务 · 采购 · 研发 | 💡 上下文感知:对话主题 → 关联法规推荐 | 🔔 到期提醒:认证到期 · 法规更新预警 | 📈 行为学习:历史查询 → 智能问题推荐 | 📢 定制化法规推送 | 📧 Email | HTML富文本含变更对比 | 💬 Teams | 企业Bot实时推送 | 📱 飞书/钉钉 | 企业机器人,移动端 | 🔔 站内消息 | 系统内通知中心 | ⚙️ 推送规则引擎 | ▸ 订阅维度: | 按法规域 / 业务场景 / 地域灵活订阅 | ▸ 优先级: | 🔴 强制 🟠 推荐 🔵 参考 三级分类 | ▸ 免打扰: | 工作时间推送 · 摘要合并 · 频率上限 | ▸ 内容生成: | LLM自动生成变更摘要 + 影响分析 + 行动项 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 8 / 13
--- Slide 9 ---: 关键挑战与应对策略 | 确保合规建议的准确性、时效性与数据安全 | LLM幻觉风险 | 问题 | 合规建议失真
可能导致法律责任 | 应对 | 引文锚定 + 输出验证
高风险强制人工审核 | 数据主权 | 问题 | 敏感文件不能
上传公有云 | 应对 | 全链路私有化部署
数据不出厂 | 法规时效性 | 问题 | 知识库滞后
导致错误建议 | 应对 | 自动更新机制
时间戳标注 + 提醒 | 跨语言质量 | 问题 | 中英混合场景
检索精度下降 | 应对 | 多语言嵌入模型
语言标签过滤策略 | 大文件性能 | 问题 | GB标准数百页
处理超时风险 | 应对 | 流式处理 + 分层索引
异步队列实时进度 | 权限管控 | 问题 | 不同角色需
不同密级访问 | 应对 | RBAC权限体系
知识库分区 + 审计日志 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 9 / 13
--- Slide 10 ---: 分阶段实施路线 | 从核心知识库到全链路合规智能,稳步落地 | 第一阶段 | 0 - 3 个月 | 知识库 + 基础问答 | 1 | 部署合规知识库平台,接入内部文档 | 2 | 接入 GB 标准 · AQ 系列 · IATF 16949 | 3 | 上线中英双语混合检索问答界面 | 4 | 完成权限分级与数据安全配置 | 第二阶段 | 3 - 6 个月 | 文档审查 + API 集成 | 1 | 构建文档合规审查引擎PDF/Word | 2 | 完成合规 API 封装对接PLM/ERP/OA | 3 | 上线法规变更监控与推送服务 | 4 | 接入 Teams / 飞书 Bot 推送渠道 | 第三阶段 | 6 - 12 个月 | EHS隐患识别 + 个性化 | 1 | 构建 EHS 隐患识别与体系审计模块 | 2 | 引入知识图谱,支持多跳推理 | 3 | 上线个性化推荐引擎(角色画像) | 4 | 全链路合规智能体系正式上线 | ▶ | ▶ | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 10 / 13
--- Slide 11 ---: 三类合规闭环场景 | 从「发现问题」到「关闭归档」的完整业务闭环 | 📡 法规变更合规闭环 | 1 | ① | 法规监控 | ( | 扩展功能 | ) | 国内外法规数据库实时监控
自动检测条款变更与新法发布 | ↓ | 2 | ② 知识库更新 | 变更内容自动解析入库
版本管理 + 影响范围标注 | ↓ | 3 | ③ 精准推送 | 按角色/业务域推送变更摘要
Email · Teams · 飞书多渠道 | ↓ | 4 | ④ 差距分析 | AI对比新旧法规差异
识别企业现行制度缺口 | ↓ | 5 | ⑤ | 整改执行 | ( | 扩展功能 | ) | 生成整改任务清单
关联责任人与完成时限 | ↓ | 6 | ⑥ | 闭环归档 | ( | 扩展功能 | ) | 整改完成后验收确认 |
| 合规证据归档留存 | 归档的文档放在 | share point | 同步更新知识库 | ↺ 持续监控 → 知识库保鲜 → 合规常态化 | 📄 文档审查合规闭环 | 1 | ① 文件上传 | PDF · Word · Excel · 扫描件
支持批量上传与拖拽 | ↓ | 2 | ② AI解析 | 版面感知OCR条款级分块
自动识别文档类型与法规域 | ↓ | 3 | ③ 合规比对 | 条款级语义对比法规库
缺项检测 · 风险评分 | ↓ | 4 | ④ 风险标注 | 页码+段落精确定位
红/橙/黄三级风险可视化 | ↓ | 5 | ⑤ 整改建议 | AI生成具体整改方案
关联历史合规最佳实践 | ↓ | 6 | ⑥ 复审归档 | 整改后重新提交复核 |
| 通过后合规证明自动归档 | 归档的文档放在 | share point | 同步更新知识库 | ↺ 上传即审查 → 整改即跟踪 → 归档即留证 | 🦺 | EHS安全管理闭环 | ( | 扩展功能 | ) | 1 | ① 隐患发现 | NLP解析巡检/事故报告文本
图像识别 · 传感器数据接入 | ↓ | 2 | ② 风险评级 | SIF潜力评分 + 四维根因分析
高/中/低三级优先级排序 | ↓ | 3 | ③ 任务派发 | 自动生成整改工单
关联责任人 · 截止时间 · 法规依据 | ↓ | 4 | ④ 过程跟踪 | 整改进度实时可视化
超期自动升级提醒 | ↓ | 5 | ⑤ 验收关闭 | 整改完成后现场复查
AI辅助验收确认 | ↓ | 6 | ⑥ 体系优化 | 根因数据回流知识库
优化隐患模型与预防策略 | ↺ 发现即评级 → 整改即跟踪 → 关闭即优化 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 11 / 13
--- Slide 12 ---: 组织架构与 | RBAC | 权限体系 | 按角色分级授权,确保数据安全与合规责任落实到人 | 🏢 组织架构层级 | 集团 / 总部 | 合规委员会 · 法务部 · EHS总监 | ▼ | 事业部 / 工厂 | EHS部门 · 质量部 · 采购部 · 研发部 | ▼ | 业务线 / 车间 | 安全员 · 质检员 · 工艺工程师 | ▼ | 外部协作方 | 供应商 · 第三方审计 · 监管机构 | 🔐 角色权限矩阵RBAC | 知识库
查询 | 文档
审查 | EHS
审计 | 法规
推送 | 系统
管理 | 合规管理员 | ● | ● | ● | ★ | ★ | 法务专员 | ● | ● | ◑ | ◑ | ○ | EHS工程师 | ● | ◑ | ● | ◑ | ○ | 采购专员 | ◑ | ● | ○ | ◑ | ○ | 研发工程师 | ◑ | ◑ | ○ | ◑ | ○ | 工厂安全员 | ◑ | ○ | ● | ◑ | ○ | 供应商(外部) | ○ | ◑ | ○ | ○ | ○ | ● 完全权限 | ◑ 只读/有限 | ○ 无权限 | ★ 管理权限 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 12 / 13
--- Slide 13 ---: 总结与下一步行动 | 构建面向车企与工厂的 AI 驱动全链路合规智能体系 | 📚 | 知识统一 | 内外部法规 + 历史案例
一库统管,自动更新 | 💬 | 智能问答 | 混合检索 + 知识图谱
可溯源的决策建议 | 📄 | 合规审查 | AI自动比对标注
风险等级 + 整改建议 | 🦺 | EHS防控 | SIF预测 + 体系审计
被动响应到主动预防 | 🔌 | 无缝集成 | API化能力嵌入
PLM · ERP · OA · MES | 建议下一步行动 | 01 | 需求确认 | 与EHS · 法务 · 采购
核心用户开展访谈 | 02 | POC验证 | 选取2-3个场景快速
搭建原型验证可行性 | 03 | 数据准备 | 梳理内部文档,确认
数据分级与权限策略 | 04 | 架构评审 | 与IT安全团队确认
私有化部署与集成规范 | AI 合规智能中枢 | 面向车企与工厂 | 2026.04 | 13 / 13
===== DOCX =====

View File

@@ -1,12 +1,18 @@
import './styles/globals.css';
import { ThemeProvider } from './contexts';
import { ThemeProvider, AuthProvider, PageStateProvider, LanguageProvider } from './contexts';
import { AppRouter } from './router/AppRouter';
function App() {
return (
<LanguageProvider>
<ThemeProvider>
<AuthProvider>
<PageStateProvider>
<AppRouter />
</PageStateProvider>
</AuthProvider>
</ThemeProvider>
</LanguageProvider>
);
}

43
frontend/src/api/auth.ts Normal file
View File

@@ -0,0 +1,43 @@
const AUTH_API_BASE = '/api/v1';
export interface TokenResponse {
access_token: string;
token_type: string;
expires_in: number;
}
export interface MeResponse {
user_id: string;
username: string;
role: string;
}
export async function loginRequest(
username: string,
password: string,
): Promise<TokenResponse> {
const body = new URLSearchParams();
body.set('username', username);
body.set('password', password);
const res = await fetch(`${AUTH_API_BASE}/auth/token`, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: body.toString(),
});
if (!res.ok) {
const payload = await res.json().catch(() => ({})) as { detail?: string };
throw new Error(payload.detail ?? `Login failed (${res.status})`);
}
return res.json() as Promise<TokenResponse>;
}
export async function getMeRequest(token: string): Promise<MeResponse> {
const res = await fetch(`${AUTH_API_BASE}/auth/me`, {
headers: { Authorization: `Bearer ${token}` },
});
if (!res.ok) throw new Error(`Unauthorised (${res.status})`);
return res.json() as Promise<MeResponse>;
}

View File

@@ -1,6 +1,12 @@
import type { DocInfo, DocListResponse, DocUploadResponse } from './index';
import { API_BASE_URL } from './index';
const TOKEN_KEY = 'auth_token';
function authHeaders(extra?: Record<string, string>): Record<string, string> {
const token = localStorage.getItem(TOKEN_KEY);
return token ? { Authorization: `Bearer ${token}`, ...extra } : { ...extra };
}
interface BackendDocumentItem {
doc_id: string;
doc_name: string;
@@ -76,6 +82,7 @@ export async function uploadDocument(
const response = await fetch(`${API_BASE_URL}/documents/upload`, {
method: 'POST',
headers: authHeaders(),
body: formData,
});
@@ -95,7 +102,9 @@ export async function uploadDocument(
}
export async function getDocumentList(): Promise<DocListResponse> {
const response = await fetch(`${API_BASE_URL}/documents/management-list`);
const response = await fetch(`${API_BASE_URL}/documents/management-list`, {
headers: authHeaders(),
});
if (!response.ok) {
throw new Error(`List failed: ${response.status}`);
}
@@ -107,7 +116,9 @@ export async function getDocumentList(): Promise<DocListResponse> {
}
export async function getDocumentStatus(docId: string): Promise<DocUploadResponse> {
const response = await fetch(`${API_BASE_URL}/documents/status/${docId}`);
const response = await fetch(`${API_BASE_URL}/documents/status/${docId}`, {
headers: authHeaders(),
});
if (!response.ok) {
throw new Error(`Status check failed: ${response.status}`);
}
@@ -115,14 +126,20 @@ export async function getDocumentStatus(docId: string): Promise<DocUploadRespons
}
export async function deleteDocument(docId: string): Promise<void> {
const response = await fetch(`${API_BASE_URL}/documents/${docId}`, { method: 'DELETE' });
const response = await fetch(`${API_BASE_URL}/documents/${docId}`, {
method: 'DELETE',
headers: authHeaders(),
});
if (!response.ok) {
throw new Error(`Delete failed: ${response.status}`);
}
}
export async function retryDocument(docId: string): Promise<DocUploadResponse> {
const response = await fetch(`${API_BASE_URL}/documents/${docId}/retry`, { method: 'POST' });
const response = await fetch(`${API_BASE_URL}/documents/${docId}/retry`, {
method: 'POST',
headers: authHeaders(),
});
if (!response.ok) {
throw new Error(`Retry failed: ${response.status}`);
}
@@ -132,10 +149,10 @@ export async function retryDocument(docId: string): Promise<DocUploadResponse> {
export async function searchRegulations(query: string, topK: number = 8): Promise<RegulationSearchResponse> {
const response = await fetch(`${API_BASE_URL}/knowledge/retrieval`, {
method: 'POST',
headers: {
headers: authHeaders({
Accept: 'application/json',
'Content-Type': 'application/json',
},
}),
body: JSON.stringify({ query, top_k: topK }),
});

View File

@@ -1,5 +1,12 @@
const API_BASE_URL = '/api/v1';
const TOKEN_KEY = 'auth_token';
/** Read the stored JWT without importing AuthContext (avoids circular deps). */
function getStoredToken(): string | null {
return localStorage.getItem(TOKEN_KEY);
}
interface ApiErrorPayload {
detail?: string;
message?: string;
@@ -19,8 +26,24 @@ async function readErrorMessage(response: Response): Promise<string> {
}
}
/** Inject Authorization header when a token is available. */
function withAuth(headers: Headers): Headers {
const token = getStoredToken();
if (token && !headers.has('Authorization')) {
headers.set('Authorization', `Bearer ${token}`);
}
return headers;
}
/** Handle 401 by clearing the stored token so the app redirects to login. */
function handle401() {
localStorage.removeItem(TOKEN_KEY);
// Emit a custom event so AuthContext / router can react without a direct import.
window.dispatchEvent(new CustomEvent('auth:unauthorized'));
}
export async function fetchAPI<T>(endpoint: string, options?: RequestInit): Promise<T> {
const headers = new Headers(options?.headers);
const headers = withAuth(new Headers(options?.headers));
if (!headers.has('Accept')) {
headers.set('Accept', 'application/json');
}
@@ -33,6 +56,11 @@ export async function fetchAPI<T>(endpoint: string, options?: RequestInit): Prom
headers,
});
if (response.status === 401) {
handle401();
throw new Error('Session expired, please log in again.');
}
if (!response.ok) {
throw new Error(`API Error: ${await readErrorMessage(response)}`);
}
@@ -54,15 +82,25 @@ export async function streamSSE<TMessage extends SSEMessage>(
onError?: (error: Error) => void,
onComplete?: () => void
): Promise<void> {
const response = await fetch(buildUrl(endpoint), {
method: 'POST',
headers: {
const headers: Record<string, string> = {
Accept: 'text/event-stream',
'Content-Type': 'application/json',
},
};
const token = getStoredToken();
if (token) headers['Authorization'] = `Bearer ${token}`;
const response = await fetch(buildUrl(endpoint), {
method: 'POST',
headers,
body: JSON.stringify(body),
});
if (response.status === 401) {
handle401();
onError?.(new Error('Session expired, please log in again.'));
return;
}
if (!response.ok) {
onError?.(new Error(`HTTP error! status: ${await readErrorMessage(response)}`));
return;

View File

@@ -1,4 +1,9 @@
const PERCEPTION_API_BASE = '/api/v1';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
export type ImpactLevel = 'high' | 'medium' | 'low';
export type EventStatus = 'enacted' | 'draft' | 'consultation';
@@ -48,7 +53,7 @@ export interface AnalysisSSEMessage {
}
export async function getPerceptionStats(): Promise<PerceptionStats> {
const res = await fetch(`${PERCEPTION_API_BASE}/perception/stats`);
const res = await fetch(`${PERCEPTION_API_BASE}/perception/stats`, { headers: authHeader() });
if (!res.ok) throw new Error(`stats failed: ${res.status}`);
return res.json() as Promise<PerceptionStats>;
}
@@ -62,7 +67,7 @@ export async function listEvents(params?: {
if (params?.source) query.set('source', params.source);
if (params?.impact_level) query.set('impact_level', params.impact_level);
if (params?.limit) query.set('limit', String(params.limit));
const res = await fetch(`${PERCEPTION_API_BASE}/perception/events?${query.toString()}`);
const res = await fetch(`${PERCEPTION_API_BASE}/perception/events?${query.toString()}`, { headers: authHeader() });
if (!res.ok) throw new Error(`list events failed: ${res.status}`);
return res.json() as Promise<EventListResponse>;
}
@@ -76,7 +81,7 @@ export async function analyzeEvent(
try {
const res = await fetch(`${PERCEPTION_API_BASE}/perception/events/${eventId}/analyze`, {
method: 'POST',
headers: { Accept: 'text/event-stream' },
headers: { Accept: 'text/event-stream', ...authHeader() },
signal,
});
if (!res.ok || !res.body) throw new Error(`analyze failed: ${res.status}`);

View File

@@ -1,6 +1,8 @@
import type { QuickQuestionsResponse, SSEMessage } from './index';
const AGENT_API_BASE = '/api/v1';
const TOKEN_KEY = 'auth_token';
function getToken(): string | null { return localStorage.getItem(TOKEN_KEY); }
const _FALLBACK_QUESTIONS = [
{ id: '1', question: '请总结最新入库法规对电池安全的核心要求', category: '法规解读' },
@@ -100,6 +102,7 @@ export async function ragChat(
headers: {
'Content-Type': 'application/json',
Accept: 'text/event-stream',
...(getToken() ? { Authorization: `Bearer ${getToken()}` } : {}),
},
body: JSON.stringify({
query,

View File

@@ -1,9 +1,11 @@
import { NavLink } from 'react-router-dom';
import {
LayoutDashboard, Radio, Monitor, FileText,
Shield, MessageSquare, Sun, Moon
Shield, MessageSquare, Sun, Moon, LogOut
} from 'lucide-react';
import { useTheme } from '../../contexts/ThemeContext';
import { useAuth } from '../../contexts/AuthContext';
import { useLanguage } from '../../contexts/LanguageContext';
interface NavItem {
to: string;
@@ -12,21 +14,6 @@ interface NavItem {
badge?: number;
}
const mainNav: NavItem[] = [
{ to: '/', icon: <LayoutDashboard size={16} />, label: 'Overview' },
{ to: '/signals', icon: <Radio size={16} />, label: 'Regulatory Signals' },
{ to: '/status', icon: <Monitor size={16} />, label: 'System Status' },
];
const workbenchNav: NavItem[] = [
{ to: '/documents', icon: <FileText size={16} />, label: 'Documents' },
{ to: '/compliance', icon: <Shield size={16} />, label: 'Compliance Analysis' },
];
const chatNav: NavItem[] = [
{ to: '/chat', icon: <MessageSquare size={16} />, label: 'Regulation Q&A' },
];
function NavGroup({ title, items }: { title: string; items: NavItem[] }) {
return (
<div className="nav-group">
@@ -49,8 +36,33 @@ function NavGroup({ title, items }: { title: string; items: NavItem[] }) {
);
}
/** Avatar initials from username (up to 2 chars). */
function initials(name: string): string {
const parts = name.trim().split(/[\s_-]+/);
if (parts.length >= 2) return (parts[0][0] + parts[1][0]).toUpperCase();
return name.slice(0, 2).toUpperCase();
}
export function Sidebar() {
const { theme, toggleTheme } = useTheme();
const { user, logout } = useAuth();
const { lang, t, toggleLang } = useLanguage();
const mainNav: NavItem[] = [
{ to: '/', icon: <LayoutDashboard size={16} />, label: t.nav.overview },
{ to: '/signals', icon: <Radio size={16} />, label: t.nav.signals },
{ to: '/status', icon: <Monitor size={16} />, label: t.nav.status },
];
const workbenchNav: NavItem[] = [
{ to: '/documents', icon: <FileText size={16} />, label: t.nav.documents },
{ to: '/compliance', icon: <Shield size={16} />, label: t.nav.compliance },
];
const chatNav: NavItem[] = [
{ to: '/chat', icon: <MessageSquare size={16} />, label: t.nav.chat },
];
return (
<aside className="sidebar">
<div className="sidebar-brand">
@@ -62,22 +74,43 @@ export function Sidebar() {
</div>
<nav className="sidebar-nav">
<NavGroup title="Main" items={mainNav} />
<NavGroup title="Workbench" items={workbenchNav} />
<NavGroup title="Chat" items={chatNav} />
<NavGroup title={t.nav.groupMain} items={mainNav} />
<NavGroup title={t.nav.groupWorkbench} items={workbenchNav} />
<NavGroup title={t.nav.groupChat} items={chatNav} />
</nav>
<div className="sidebar-footer">
<div className="sidebar-user">
<div className="user-avatar">TS</div>
<div className="user-avatar">{user ? initials(user.username) : 'TS'}</div>
<div className="user-info">
<div className="user-name">Analyst</div>
<div className="user-role">T-Systems</div>
<div className="user-name">{user?.username ?? 'Analyst'}</div>
<div className="user-role">
{user ? (
<span className="user-badge">{user.role}</span>
) : (
'T-Systems'
)}
</div>
</div>
<button className="theme-btn" onClick={toggleTheme} title="Toggle theme">
</div>
<div style={{ display: 'flex', gap: 4 }}>
<button
className="theme-btn"
onClick={toggleLang}
title={t.sidebar.toggleLang}
style={{ fontSize: 12, fontWeight: 600 }}
>
{lang === 'en' ? 'EN' : '中'}
</button>
<button className="theme-btn" onClick={toggleTheme} title={t.sidebar.toggleTheme}>
{theme === 'dark' ? <Sun size={14} /> : <Moon size={14} />}
</button>
{user && (
<button className="logout-btn" onClick={logout} title={t.sidebar.signOut}>
<LogOut size={14} />
</button>
)}
</div>
</div>
</aside>
);

View File

@@ -0,0 +1,72 @@
import React, { createContext, useCallback, useContext, useEffect, useState } from 'react';
import { loginRequest, getMeRequest } from '../api/auth';
const TOKEN_KEY = 'auth_token';
export interface AuthUser {
user_id: string;
username: string;
role: string;
}
interface AuthContextValue {
token: string | null;
user: AuthUser | null;
loading: boolean;
login: (username: string, password: string) => Promise<void>;
logout: () => void;
}
const AuthContext = createContext<AuthContextValue>({
token: null,
user: null,
loading: true,
login: async () => {},
logout: () => {},
});
export function AuthProvider({ children }: { children: React.ReactNode }) {
const [token, setToken] = useState<string | null>(() => localStorage.getItem(TOKEN_KEY));
const [user, setUser] = useState<AuthUser | null>(null);
const [loading, setLoading] = useState(true);
// Validate the stored token on mount by calling /auth/me.
useEffect(() => {
if (!token) {
setLoading(false);
return;
}
getMeRequest(token)
.then(setUser)
.catch(() => {
// Token is expired or invalid — force re-login.
localStorage.removeItem(TOKEN_KEY);
setToken(null);
})
.finally(() => setLoading(false));
}, []); // eslint-disable-line react-hooks/exhaustive-deps
const login = useCallback(async (username: string, password: string) => {
const resp = await loginRequest(username, password);
const me = await getMeRequest(resp.access_token);
localStorage.setItem(TOKEN_KEY, resp.access_token);
setToken(resp.access_token);
setUser(me);
}, []);
const logout = useCallback(() => {
localStorage.removeItem(TOKEN_KEY);
setToken(null);
setUser(null);
}, []);
return (
<AuthContext.Provider value={{ token, user, loading, login, logout }}>
{children}
</AuthContext.Provider>
);
}
export function useAuth() {
return useContext(AuthContext);
}

View File

@@ -0,0 +1,36 @@
import React, { createContext, useContext, useState } from 'react';
import { en } from '../locales/en';
import type { Translations } from '../locales/en';
import { zh } from '../locales/zh';
export type Lang = 'en' | 'zh';
interface LanguageContextValue {
lang: Lang;
t: Translations;
toggleLang: () => void;
}
const LanguageContext = createContext<LanguageContextValue>({
lang: 'en',
t: en,
toggleLang: () => {},
});
export function LanguageProvider({ children }: { children: React.ReactNode }) {
const [lang, setLang] = useState<Lang>('en');
const toggleLang = () => setLang(l => (l === 'en' ? 'zh' : 'en'));
const t = lang === 'en' ? en : zh;
return (
<LanguageContext.Provider value={{ lang, t, toggleLang }}>
{children}
</LanguageContext.Provider>
);
}
export function useLanguage() {
return useContext(LanguageContext);
}

View File

@@ -0,0 +1,218 @@
/**
* PageStateContext — preserves page-level session state across route changes.
*
* When React Router unmounts a page component, all its useState values are lost.
* This context lives above the router and holds the state that must survive
* navigation so users can switch modules and return without losing their work.
*
* Covered pages:
* - RagChat: message history, citation rail, sessionId, input draft
* - Compliance: analysis result (sources, findings, conclusion, meta)
* - Perception: selected signal, filter state, AI analysis output
*/
import React, { createContext, useContext, useState, useCallback, useRef } from 'react';
// ── RagChat types ─────────────────────────────────────────────────────────────
export interface RagMessage {
id: string;
role: 'user' | 'assistant';
text: string;
citationRefs?: number[];
}
export interface RagCitation {
index: number;
score: number;
name: string;
clause: string;
snippet: string;
docId?: string;
}
export interface RagChatState {
messages: RagMessage[];
citations: RagCitation[];
sessionId: string | null;
inputDraft: string;
}
const RAG_INIT: RagChatState = {
messages: [
{
id: 'init',
role: 'assistant',
text: 'Hello! I can answer questions about your indexed regulations and compliance documents. Try asking about EU AI Act requirements, MIIT rules, or ISO/SAE 21434 scope.',
},
],
citations: [],
sessionId: null,
inputDraft: '',
};
// ── Compliance types ──────────────────────────────────────────────────────────
export interface ComplianceSourceEvent {
standard: string;
clause: string;
score: number;
status: string;
full_content: string;
}
export interface ComplianceFindingEvent {
title: string;
desc: string;
status: 'ok' | 'warn' | 'risk';
clause_ref?: string;
}
export interface ComplianceActionItem {
label: string;
value: string;
risk?: boolean;
}
export interface ComplianceDonePayload {
conclusion: string;
actions: ComplianceActionItem[];
risk_score: number;
highlight_terms: string[];
para_text: string;
}
export interface ComplianceMeta {
title: string;
sourceType: 'text' | 'doc' | 'upload';
startedAt: string;
}
export type ComplianceStatus = 'idle' | 'streaming' | 'done' | 'error';
export interface ComplianceState {
status: ComplianceStatus;
stageLabel: string;
stageKey: string;
meta: ComplianceMeta | null;
sources: ComplianceSourceEvent[];
findings: ComplianceFindingEvent[];
done: ComplianceDonePayload | null;
errorText: string;
// Direction B additions:
analysisId: string | null;
isReadOnly: boolean;
activeFindingId: string | null;
}
const COMPLIANCE_INIT: ComplianceState = {
status: 'idle',
stageLabel: '',
stageKey: '',
meta: null,
sources: [],
findings: [],
done: null,
errorText: '',
analysisId: null,
isReadOnly: false,
activeFindingId: null,
};
// ── Perception types ──────────────────────────────────────────────────────────
export interface PerceptionSignal {
id: string;
source: string;
standard: string;
status: 'ok' | 'warn' | 'risk' | 'info';
title: string;
summary: string;
date: string;
tags: string[];
impact: 'High' | 'Medium' | 'Low';
}
export interface PerceptionPageState {
signals: PerceptionSignal[];
searchQuery: string;
sourceFilter: string;
impactFilter: string;
selectedId: string | null;
aiOutput: string;
detailTab: 'overview' | 'obligations' | 'assessment' | 'diff';
crawlStatus: string;
}
const PERCEPTION_INIT: PerceptionPageState = {
signals: [],
searchQuery: '',
sourceFilter: 'All',
impactFilter: 'All',
selectedId: null,
aiOutput: '',
detailTab: 'overview',
crawlStatus: '',
};
// ── Context value ─────────────────────────────────────────────────────────────
interface PageStateContextValue {
// RagChat
ragState: RagChatState;
setRagState: React.Dispatch<React.SetStateAction<RagChatState>>;
ragStreamingRef: React.MutableRefObject<boolean>;
ragAbortRef: React.MutableRefObject<AbortController | null>;
// Compliance
complianceState: ComplianceState;
setComplianceState: React.Dispatch<React.SetStateAction<ComplianceState>>;
complianceAbortRef: React.MutableRefObject<AbortController | null>;
resetCompliance: () => void;
// Perception
perceptionState: PerceptionPageState;
setPerceptionState: React.Dispatch<React.SetStateAction<PerceptionPageState>>;
perceptionAbortRef: React.MutableRefObject<AbortController | null>;
perceptionCrawlAbortRef: React.MutableRefObject<AbortController | null>;
}
const PageStateContext = createContext<PageStateContextValue | null>(null);
// ── Provider ──────────────────────────────────────────────────────────────────
export function PageStateProvider({ children }: { children: React.ReactNode }) {
const [ragState, setRagState] = useState<RagChatState>(RAG_INIT);
const ragStreamingRef = useRef(false);
const ragAbortRef = useRef<AbortController | null>(null);
const [complianceState, setComplianceState] = useState<ComplianceState>(COMPLIANCE_INIT);
const complianceAbortRef = useRef<AbortController | null>(null);
const resetCompliance = useCallback(() => {
complianceAbortRef.current?.abort();
setComplianceState(COMPLIANCE_INIT);
}, []);
const [perceptionState, setPerceptionState] = useState<PerceptionPageState>(PERCEPTION_INIT);
const perceptionAbortRef = useRef<AbortController | null>(null);
const perceptionCrawlAbortRef = useRef<AbortController | null>(null);
return (
<PageStateContext.Provider value={{
ragState, setRagState, ragStreamingRef, ragAbortRef,
complianceState, setComplianceState, complianceAbortRef, resetCompliance,
perceptionState, setPerceptionState, perceptionAbortRef, perceptionCrawlAbortRef,
}}>
{children}
</PageStateContext.Provider>
);
}
// ── Hook ──────────────────────────────────────────────────────────────────────
export function usePageState() {
const ctx = useContext(PageStateContext);
if (!ctx) throw new Error('usePageState must be used inside PageStateProvider');
return ctx;
}

View File

@@ -1 +1,20 @@
export { ThemeProvider, useTheme } from './ThemeContext';
export { AuthProvider, useAuth } from './AuthContext';
export type { AuthUser } from './AuthContext';
export { PageStateProvider, usePageState } from './PageStateContext';
export { LanguageProvider, useLanguage } from './LanguageContext';
export type { Lang } from './LanguageContext';
export type {
RagChatState,
RagMessage,
RagCitation,
ComplianceState,
ComplianceStatus,
ComplianceSourceEvent,
ComplianceFindingEvent,
ComplianceDonePayload,
ComplianceMeta,
ComplianceActionItem,
PerceptionPageState,
PerceptionSignal,
} from './PageStateContext';

460
frontend/src/locales/en.ts Normal file
View File

@@ -0,0 +1,460 @@
// English translations — default language
export interface Translations {
nav: {
groupMain: string;
groupWorkbench: string;
groupChat: string;
overview: string;
signals: string;
status: string;
documents: string;
compliance: string;
chat: string;
};
sidebar: {
toggleTheme: string;
toggleLang: string;
signOut: string;
};
overview: {
eyebrow: string;
heroTitle: string;
heroDesc: string;
openDashboard: string;
jumpToChat: string;
sectionHowItWorks: string;
sectionScreens: string;
statScreens: string;
statFlows: string;
statReviewPosture: string;
stepUpload: string; stepUploadDesc: string;
stepProcess: string; stepProcessDesc: string;
stepMonitor: string; stepMonitorDesc: string;
stepAnalyze: string; stepAnalyzeDesc: string;
stepReview: string; stepReviewDesc: string;
stepChat: string; stepChatDesc: string;
screenStatus: string; screenStatusDesc: string;
screenSignals: string; screenSignalsDesc: string;
screenDocuments: string; screenDocumentsDesc: string;
screenCompliance: string; screenComplianceDesc: string;
screenChat: string; screenChatDesc: string;
screenAnalytics: string; screenAnalyticsDesc: string;
};
signals: {
topbarTitle: string;
topbarSub: string;
searchPlaceholder: string;
refreshBtn: string;
crawlingBtn: string;
statTotal: string;
statHigh: string;
statMedium: string;
statLast90: string;
badgeFinal: string;
badgeDraft: string;
badgeUrgent: string;
badgePublished: string;
emptySelectSignal: string;
runAnalysis: string;
stopBtn: string;
sourceLink: string;
tabOverview: string;
tabObligations: string;
tabImpact: string;
tabChanges: string;
cardScopeHeader: string;
cardObligationsHeader: string;
obligationsEmpty: string;
colObligationDesc: string;
colSubject: string;
colType: string;
colDeadline: string;
deadlinePending: string;
cardAffectedDocs: string;
noAffectedDocs: string;
cardAIImpact: string;
footerText: string;
statusConnecting: string;
statusNoStream: string;
statusCrawling: string;
statusProcessing: string;
statusComplete: string;
statusUpdateComplete: string;
statusError: string;
statusConnFailed: string;
diffOld: string;
diffNew: string;
diffCardHeader: string;
};
status: {
topbarTitle: string;
searchPlaceholder: string;
exportBtn: string;
refreshBtn: string;
newUploadBtn: string;
statTotal: string;
statIndexed: string;
statFailed: string;
statChunks: string;
statCoverage: string;
cardHealth: string;
badgeOnline: string;
badgeError: string;
badgeDegraded: string;
badgeUnknown: string;
healthEndpointError: string;
serviceEnabled: string;
serviceDisabled: string;
serviceNotLoaded: string;
cardConfig: string;
labelLLMProvider: string;
labelLLMModel: string;
labelEmbeddingModel: string;
labelEmbeddingDim: string;
labelMilvusCollection: string;
labelParserBackend: string;
labelChunkBackend: string;
labelParserFailureMode: string;
configLoadError: string;
cardBreakdown: string;
breakdownIndexed: string;
breakdownProcessing: string;
breakdownFailed: string;
cardRuntime: string;
labelActiveSessions: string;
labelSessionCapacity: string;
labelReranker: string;
labelBM25: string;
statusActive: string;
statusUnavailable: string;
footerAllOk: string;
footerDegraded: string;
footerChecking: string;
totalChunks: string;
};
docs: {
topbarTitle: string;
searchPlaceholder: string;
refreshBtn: string;
uploadBtn: string;
confirmDeleteTitle: string;
cancelBtn: string;
deleteBtn: string;
filterAll: string;
filterReady: string;
filterProcessing: string;
filterFailed: string;
filterPending: string;
filterAllTypes: string;
deleteSelected: string;
colName: string;
colStatus: string;
colUploaded: string;
colChunks: string;
colSize: string;
colType: string;
colActions: string;
loading: string;
emptyNoDocuments: string;
emptyNoMatch: string;
titleDownload: string;
titleRetry: string;
titleDelete: string;
};
compliance: {
topbarTitle: string;
searchPlaceholder: string;
clearBtn: string;
exportBtn: string;
exportJSON: string;
exportText: string;
newAnalysisBtn: string;
statusAnalyzing: string;
statusComplete: string;
statusError: string;
emptyTitle: string;
emptyDesc: string;
retrievingMsg: string;
defaultRegulation: string;
matchSuffix: string;
colParagraph: string;
extractingMsg: string;
noTextExtracted: string;
stagesHeader: string;
stageExtraction: string;
stageClauseSplit: string;
stageRetrieval: string;
stageSynthesis: string;
gapInProgress: string;
askAIBtn: string;
chatBtn: string;
conclusionHeader: string;
riskScoreTooltip: string;
statusCovered: string;
statusGap: string;
statusCritical: string;
statusInfo: string;
sourceTypePasted: string;
sourceTypeIndexed: string;
sourceTypeUploaded: string;
chatSidebarHeader: string;
chatThinking: string;
quickQ1: string;
quickQ2: string;
quickQ3: string;
chatPlaceholder: string;
sendBtn: string;
analysisFailed: string;
exportReportHeader: string;
exportSectionParagraph: string;
exportSectionFindings: string;
exportSectionConclusion: string;
exportSectionActions: string;
historyHeader: string;
downloadReport: string;
historyEmpty: string;
historyDeleteConfirm: string;
drawerClose: string;
drawerChatEmpty: string;
drawerSuggestionsHeader: string;
};
ragchat: {
topbarTitle: string;
exportBtn: string;
quickPromptsHeader: string;
inputPlaceholder: string;
citationsHeader: string;
citationsEmpty: string;
apiError: string;
};
}
export const en: Translations = {
nav: {
groupMain: 'Main',
groupWorkbench: 'Workbench',
groupChat: 'Chat',
overview: 'Overview',
signals: 'Regulatory Signals',
status: 'System Status',
documents: 'Documents',
compliance: 'Compliance Analysis',
chat: 'Regulation Q&A',
},
sidebar: {
toggleTheme: 'Toggle theme',
toggleLang: 'Switch language',
signOut: 'Sign out',
},
overview: {
eyebrow: 'T-Systems · AI Regulation Hub',
heroTitle: 'AI Compliance,\nAutomated end-to-end',
heroDesc: 'Monitor global AI regulations, analyze document compliance gaps, and get cited answers — all in one platform.',
openDashboard: 'Open dashboard',
jumpToChat: 'Jump to regulation chat',
sectionHowItWorks: 'How it works',
sectionScreens: 'Screens',
statScreens: 'Screens',
statFlows: 'Backend-aware flows',
statReviewPosture: 'Review posture',
stepUpload: 'Upload', stepUploadDesc: 'Ingest regulation documents',
stepProcess: 'Process', stepProcessDesc: 'Embed and chunk via vector DB',
stepMonitor: 'Monitor', stepMonitorDesc: 'Watch regulatory signal feed',
stepAnalyze: 'Analyze', stepAnalyzeDesc: 'Run compliance gap analysis',
stepReview: 'Review', stepReviewDesc: 'Inspect findings with AI assist',
stepChat: 'Chat', stepChatDesc: 'Ask questions with cited answers',
screenStatus: 'System Status', screenStatusDesc: 'Live health and workflow queue',
screenSignals: 'Regulatory Signals', screenSignalsDesc: 'AI-detected regulatory changes',
screenDocuments: 'Document Management', screenDocumentsDesc: 'Upload and inspect documents',
screenCompliance: 'Compliance Analysis', screenComplianceDesc: 'Three-column compliance workspace',
screenChat: 'Regulation Q&A', screenChatDesc: 'Chat with cited regulation sources',
screenAnalytics: 'Analytics', screenAnalyticsDesc: 'KPIs and coverage metrics',
},
signals: {
topbarTitle: 'Regulatory Signals',
topbarSub: 'ai-powered · live feed',
searchPlaceholder: 'Search signals...',
refreshBtn: 'Refresh Sources',
crawlingBtn: 'Crawling...',
statTotal: 'Total signals',
statHigh: 'High impact',
statMedium: 'Medium impact',
statLast90: 'Last 90 days',
badgeFinal: 'Final',
badgeDraft: 'Draft',
badgeUrgent: 'Urgent',
badgePublished: 'Published',
emptySelectSignal: 'Select a signal to run impact analysis',
runAnalysis: 'Run impact analysis',
stopBtn: 'Stop',
sourceLink: 'Source',
tabOverview: 'Overview',
tabObligations: 'Obligations',
tabImpact: 'Impact Assessment',
tabChanges: 'Change Comparison',
cardScopeHeader: 'Scope & Summary',
cardObligationsHeader: 'Obligations',
obligationsEmpty: 'No structured data yet. Click "Run impact analysis" to extract.',
colObligationDesc: 'Obligation',
colSubject: 'Subject',
colType: 'Type',
colDeadline: 'Deadlines',
deadlinePending: 'Pending',
cardAffectedDocs: 'Affected documents',
noAffectedDocs: 'No affected documents found.',
cardAIImpact: 'AI Impact Analysis',
footerText: 'Live feed · Regulation Hub',
statusConnecting: 'Connecting to data sources...',
statusNoStream: 'No stream',
statusCrawling: 'Crawling...',
statusProcessing: 'Processing {count} items...',
statusComplete: 'Done +{count} items',
statusUpdateComplete: 'Update complete — {new} added, {updated} updated',
statusError: 'Error: {message}',
statusConnFailed: 'Connection failed: {message}',
diffOld: 'Previous',
diffNew: 'Current',
diffCardHeader: 'Change Comparison',
},
status: {
topbarTitle: 'System Status',
searchPlaceholder: 'Search...',
exportBtn: 'Export',
refreshBtn: 'Refresh',
newUploadBtn: 'New upload',
statTotal: 'Documents total',
statIndexed: 'Indexed',
statFailed: 'Failed',
statChunks: 'Vector chunks',
statCoverage: 'Index coverage',
cardHealth: 'System health',
badgeOnline: 'Online',
badgeError: 'Error',
badgeDegraded: 'Degraded',
badgeUnknown: 'Unknown',
healthEndpointError: 'Could not reach health endpoint',
serviceEnabled: 'Enabled',
serviceDisabled: 'Disabled',
serviceNotLoaded: 'Not loaded',
cardConfig: 'System configuration',
labelLLMProvider: 'LLM provider',
labelLLMModel: 'LLM model',
labelEmbeddingModel: 'Embedding model',
labelEmbeddingDim: 'Embedding dim',
labelMilvusCollection: 'Milvus collection',
labelParserBackend: 'Parser backend',
labelChunkBackend: 'Chunk backend',
labelParserFailureMode: 'Parser failure mode',
configLoadError: 'Could not load config',
cardBreakdown: 'Document breakdown',
breakdownIndexed: 'Indexed',
breakdownProcessing: 'Processing / Parsed',
breakdownFailed: 'Failed',
cardRuntime: 'Runtime info',
labelActiveSessions: 'Active chat sessions',
labelSessionCapacity: 'Session capacity',
labelReranker: 'Cross-encoder reranker',
labelBM25: 'BM25 hybrid retrieval',
statusActive: 'Active',
statusUnavailable: 'Unavailable',
footerAllOk: 'All systems operational',
footerDegraded: 'Degraded',
footerChecking: 'Checking…',
totalChunks: 'Total vector chunks',
},
docs: {
topbarTitle: 'Document Management',
searchPlaceholder: 'Search documents...',
refreshBtn: 'Refresh',
uploadBtn: 'Upload document',
confirmDeleteTitle: 'Confirm deletion',
cancelBtn: 'Cancel',
deleteBtn: 'Delete',
filterAll: 'All',
filterReady: 'Ready',
filterProcessing: 'Processing',
filterFailed: 'Failed',
filterPending: 'Pending',
filterAllTypes: 'All types',
deleteSelected: 'Delete selected',
colName: 'Document name',
colStatus: 'Status',
colUploaded: 'Uploaded',
colChunks: 'Chunks',
colSize: 'Size',
colType: 'Type',
colActions: 'Actions',
loading: 'Loading documents…',
emptyNoDocuments: 'No documents yet. Upload a document to get started.',
emptyNoMatch: 'No documents match the current filters.',
titleDownload: 'Download original file',
titleRetry: 'Retry processing',
titleDelete: 'Delete document',
},
compliance: {
topbarTitle: 'Compliance Analysis',
searchPlaceholder: 'Search analyses...',
clearBtn: 'Clear',
exportBtn: 'Export',
exportJSON: 'Export JSON',
exportText: 'Export Text',
newAnalysisBtn: 'New analysis',
statusAnalyzing: 'Analyzing…',
statusComplete: 'Analysis complete',
statusError: 'Error',
emptyTitle: 'No analysis running',
emptyDesc: 'Click New analysis to start a compliance gap review against your indexed regulations.',
retrievingMsg: 'Retrieving relevant regulations…',
defaultRegulation: 'Regulation',
matchSuffix: '% match',
colParagraph: 'Paragraph Under Review',
extractingMsg: 'Extracting and analyzing text…',
noTextExtracted: 'No text extracted',
stagesHeader: 'Analysis stages',
stageExtraction: 'Text extraction',
stageClauseSplit: 'Clause splitting',
stageRetrieval: 'Regulation retrieval',
stageSynthesis: 'Conclusion synthesis',
gapInProgress: 'Gap analysis in progress…',
askAIBtn: 'Ask AI',
chatBtn: 'Chat',
conclusionHeader: 'Conclusion',
riskScoreTooltip: 'Risk score (0=safe, 100=critical)',
statusCovered: 'Covered',
statusGap: 'Gap',
statusCritical: 'Critical',
statusInfo: 'Info',
sourceTypePasted: 'Pasted Text',
sourceTypeIndexed: 'Indexed Document',
sourceTypeUploaded: 'Uploaded File',
chatSidebarHeader: 'AI Compliance Q&A',
chatThinking: 'Thinking▋',
quickQ1: 'What regulation applies?',
quickQ2: 'How to remediate?',
quickQ3: 'What is the risk?',
chatPlaceholder: 'Ask about this finding…',
sendBtn: 'Send',
analysisFailed: 'Analysis failed',
exportReportHeader: 'COMPLIANCE ANALYSIS REPORT',
exportSectionParagraph: '── PARAGRAPH UNDER REVIEW ──',
exportSectionFindings: '── FINDINGS ──',
exportSectionConclusion: '── CONCLUSION ──',
exportSectionActions: '── RECOMMENDED ACTIONS ──',
historyHeader: 'History',
downloadReport: 'Download report',
historyEmpty: 'No analyses yet.',
historyDeleteConfirm: 'Delete this analysis record? This cannot be undone.',
drawerClose: 'Close',
drawerChatEmpty: 'No messages yet. Ask a question below.',
drawerSuggestionsHeader: 'Suggested questions',
},
ragchat: {
topbarTitle: 'Regulation Q&A',
exportBtn: 'Export chat',
quickPromptsHeader: 'Quick prompts',
inputPlaceholder: 'Ask about your regulations…',
citationsHeader: 'Sources',
citationsEmpty: 'Citations will appear here after a response is generated.',
apiError: 'Could not reach the RAG API. Please check the backend.',
},
};

231
frontend/src/locales/zh.ts Normal file
View File

@@ -0,0 +1,231 @@
import type { Translations } from './en';
export const zh: Translations = {
nav: {
groupMain: '主菜单',
groupWorkbench: '工作台',
groupChat: '对话',
overview: '概览',
signals: '法规信号',
status: '系统状态',
documents: '文档管理',
compliance: '合规分析',
chat: '法规问答',
},
sidebar: {
toggleTheme: '切换主题',
toggleLang: '切换语言',
signOut: '退出',
},
overview: {
eyebrow: 'T-Systems · AI 法规中心',
heroTitle: 'AI 合规,\n端到端自动化',
heroDesc: '监控全球 AI 法规,分析文档合规差距,获取有引用来源的回答——一站式平台。',
openDashboard: '打开仪表盘',
jumpToChat: '跳转到法规对话',
sectionHowItWorks: '工作流程',
sectionScreens: '功能页面',
statScreens: '功能页面',
statFlows: '后端感知流程',
statReviewPosture: '审查状态',
stepUpload: '上传', stepUploadDesc: '导入法规文档',
stepProcess: '处理', stepProcessDesc: '向量化与分块',
stepMonitor: '监控', stepMonitorDesc: '监控法规信号流',
stepAnalyze: '分析', stepAnalyzeDesc: '运行合规差距分析',
stepReview: '审查', stepReviewDesc: 'AI 辅助审查发现',
stepChat: '对话', stepChatDesc: '带引用来源的问答',
screenStatus: '系统状态', screenStatusDesc: '实时健康与任务队列',
screenSignals: '法规信号', screenSignalsDesc: 'AI 检测法规变更',
screenDocuments: '文档管理', screenDocumentsDesc: '上传与查阅文档',
screenCompliance: '合规分析', screenComplianceDesc: '三栏合规工作台',
screenChat: '法规问答', screenChatDesc: '带引用来源的法规对话',
screenAnalytics: '数据分析', screenAnalyticsDesc: 'KPI 与覆盖指标',
},
signals: {
topbarTitle: '法规信号',
topbarSub: 'AI 驱动 · 实时订阅',
searchPlaceholder: '搜索信号...',
refreshBtn: '刷新数据源',
crawlingBtn: '抓取中...',
statTotal: '信号总数',
statHigh: '高影响',
statMedium: '中影响',
statLast90: '近 90 天',
badgeFinal: '已发布',
badgeDraft: '草案',
badgeUrgent: '紧急',
badgePublished: '已发布',
emptySelectSignal: '选择信号以运行影响分析',
runAnalysis: '运行影响分析',
stopBtn: '停止',
sourceLink: '来源',
tabOverview: '概览',
tabObligations: '义务条款',
tabImpact: '影响评估',
tabChanges: '变更对比',
cardScopeHeader: '范围与摘要',
cardObligationsHeader: '义务条款',
obligationsEmpty: '暂无结构化数据。点击"运行影响分析"触发提取。',
colObligationDesc: '义务描述',
colSubject: '主体',
colType: '类型',
colDeadline: '截止日期',
deadlinePending: '待定',
cardAffectedDocs: '受影响文档',
noAffectedDocs: '未找到受影响文档。',
cardAIImpact: 'AI 影响分析',
footerText: '实时订阅 · 法规中心',
statusConnecting: '正在连接数据源...',
statusNoStream: '无数据流',
statusCrawling: '抓取中...',
statusProcessing: '处理 {count} 条...',
statusComplete: '完成 +{count} 条',
statusUpdateComplete: '更新完成 — 新增 {new} 条,更新 {updated} 条',
statusError: '错误: {message}',
statusConnFailed: '连接失败: {message}',
diffOld: '旧版',
diffNew: '新版',
diffCardHeader: '变更对比',
},
status: {
topbarTitle: '系统状态',
searchPlaceholder: '搜索...',
exportBtn: '导出',
refreshBtn: '刷新',
newUploadBtn: '上传文档',
statTotal: '文档总数',
statIndexed: '已索引',
statFailed: '失败',
statChunks: '向量分块数',
statCoverage: '索引覆盖率',
cardHealth: '系统健康',
badgeOnline: '在线',
badgeError: '错误',
badgeDegraded: '降级',
badgeUnknown: '未知',
healthEndpointError: '无法访问健康检查端点',
serviceEnabled: '已启用',
serviceDisabled: '已禁用',
serviceNotLoaded: '未加载',
cardConfig: '系统配置',
labelLLMProvider: 'LLM 提供商',
labelLLMModel: 'LLM 模型',
labelEmbeddingModel: '向量模型',
labelEmbeddingDim: '向量维度',
labelMilvusCollection: 'Milvus 集合',
labelParserBackend: '解析后端',
labelChunkBackend: '分块后端',
labelParserFailureMode: '解析失败模式',
configLoadError: '无法加载配置',
cardBreakdown: '文档分布',
breakdownIndexed: '已索引',
breakdownProcessing: '处理中 / 已解析',
breakdownFailed: '失败',
cardRuntime: '运行时信息',
labelActiveSessions: '活跃对话会话',
labelSessionCapacity: '会话容量',
labelReranker: '交叉编码器重排序',
labelBM25: 'BM25 混合检索',
statusActive: '活跃',
statusUnavailable: '不可用',
footerAllOk: '所有系统正常',
footerDegraded: '降级运行',
footerChecking: '检查中…',
totalChunks: '向量分块总数',
},
docs: {
topbarTitle: '文档管理',
searchPlaceholder: '搜索文档...',
refreshBtn: '刷新',
uploadBtn: '上传文档',
confirmDeleteTitle: '确认删除',
cancelBtn: '取消',
deleteBtn: '删除',
filterAll: '全部',
filterReady: '就绪',
filterProcessing: '处理中',
filterFailed: '失败',
filterPending: '待处理',
filterAllTypes: '所有类型',
deleteSelected: '删除所选',
colName: '文档名称',
colStatus: '状态',
colUploaded: '上传时间',
colChunks: '分块数',
colSize: '大小',
colType: '类型',
colActions: '操作',
loading: '加载文档中…',
emptyNoDocuments: '暂无文档。请上传文档以开始使用。',
emptyNoMatch: '没有文档符合当前筛选条件。',
titleDownload: '下载原始文件',
titleRetry: '重试处理',
titleDelete: '删除文档',
},
compliance: {
topbarTitle: '合规分析',
searchPlaceholder: '搜索分析记录...',
clearBtn: '清除',
exportBtn: '导出',
exportJSON: '导出 JSON',
exportText: '导出文本',
newAnalysisBtn: '新建分析',
statusAnalyzing: '分析中…',
statusComplete: '分析完成',
statusError: '错误',
emptyTitle: '暂无分析任务',
emptyDesc: '点击"新建分析"对已索引法规进行合规差距审查。',
retrievingMsg: '正在检索相关法规…',
defaultRegulation: '法规',
matchSuffix: '% 匹配',
colParagraph: '待审查段落',
extractingMsg: '正在提取并分析文本…',
noTextExtracted: '未提取到文本',
stagesHeader: '分析阶段',
stageExtraction: '文本提取',
stageClauseSplit: '条款分割',
stageRetrieval: '法规检索',
stageSynthesis: '结论综合',
gapInProgress: '差距分析进行中…',
askAIBtn: '问 AI',
chatBtn: '对话',
conclusionHeader: '结论',
riskScoreTooltip: '风险评分0=安全100=严重)',
statusCovered: '已覆盖',
statusGap: '存在差距',
statusCritical: '严重',
statusInfo: '信息',
sourceTypePasted: '粘贴文本',
sourceTypeIndexed: '已索引文档',
sourceTypeUploaded: '上传文件',
chatSidebarHeader: 'AI 合规问答',
chatThinking: '思考中▋',
quickQ1: '适用哪条法规?',
quickQ2: '如何整改?',
quickQ3: '风险等级如何?',
chatPlaceholder: '针对此发现提问…',
sendBtn: '发送',
analysisFailed: '分析失败',
exportReportHeader: '合规分析报告',
exportSectionParagraph: '── 待审查段落 ──',
exportSectionFindings: '── 发现 ──',
exportSectionConclusion: '── 结论 ──',
exportSectionActions: '── 建议行动 ──',
historyHeader: '历史记录',
downloadReport: '下载报告',
historyEmpty: '暂无分析记录。',
historyDeleteConfirm: '删除此分析记录?此操作不可撤销。',
drawerClose: '关闭',
drawerChatEmpty: '暂无消息。请在下方提问。',
drawerSuggestionsHeader: '建议问题',
},
ragchat: {
topbarTitle: '法规问答',
exportBtn: '导出对话',
quickPromptsHeader: '快捷问题',
inputPlaceholder: '请输入关于法规的问题…',
citationsHeader: '引用来源',
citationsEmpty: '生成回答后,引用来源将显示在此处。',
apiError: '无法连接到 RAG API请检查后端服务。',
},
};

View File

@@ -1,126 +1,635 @@
import { useState, useRef, useEffect } from 'react';
import { useLanguage } from '../../contexts/LanguageContext';
import { Search, Plus, AlertTriangle, Download, MessageSquare, ChevronDown } from 'lucide-react';
import { Topbar } from '../../components/layout/Topbar';
import { Search, Plus } from 'lucide-react';
import { NewAnalysisModal } from './NewAnalysisModal';
import { useComplianceAnalysis } from './useComplianceAnalysis';
import { usePageState } from '../../contexts';
import { HistoryRail } from './HistoryRail';
import { FindingChatDrawer } from './FindingChatDrawer';
import type { FindingEvent, SourceEvent, AnalysisMeta } from './useComplianceAnalysis';
const SOURCES = [
{ standard: 'EU AI Act', helper: 'Art. 9 — Risk management', scores: ['Art. 9.1', 'Art. 9.2'], status: 'risk' },
{ standard: 'MIIT Draft 2025-08', helper: '§3 — Training data provenance', scores: ['§3.1', '§3.4'], status: 'warn' },
{ standard: 'ISO/SAE 21434:2021', helper: 'Clause 9 — CSMS', scores: ['9.3', '9.4'], status: 'ok' },
];
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
const STAGES = [
{ label: 'Clause retrieval', pct: 100, status: 'ok' },
{ label: 'Requirement extraction', pct: 100, status: 'ok' },
{ label: 'Gap analysis', pct: 78, status: 'warn' },
{ label: 'Recommendation synthesis', pct: 30, status: 'info' },
];
function riskClass(score: number) {
if (score >= 70) return 'high';
if (score >= 40) return 'med';
return 'low';
}
const FINDINGS = [
{ title: 'Missing risk management documentation', desc: 'No formal risk management system found for the described AI system scope under Art. 9.', status: 'risk' },
{ title: 'Training data lineage incomplete', desc: 'MIIT §3.1 requires traceable provenance for training datasets. Current documentation lacks data source registry.', status: 'warn' },
{ title: 'CSMS audit trail present', desc: 'ISO 21434 audit log requirements are met. Retention policy documented in Annex B.', status: 'ok' },
];
function highlightText(text: string, terms: string[]): React.ReactNode[] {
if (!terms.length) return [text];
const escaped = terms.map(t => t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
const pattern = new RegExp(`(${escaped.join('|')})`, 'i');
const patternGlobal = new RegExp(`(${escaped.join('|')})`, 'gi');
return text.split(patternGlobal).map((part, i) =>
pattern.test(part)
? <mark key={i} className="comp-highlight">{part}</mark>
: <span key={i}>{part}</span>
);
}
const PARA = `The AI system described in Section 4.2.1 of the Vehicle AI Safety Manual performs real-time classification of driving scenarios to support Level 3 automated driving decisions. The system ingests sensor fusion data from cameras, LIDAR, and radar arrays, processes it through a deep neural network trained on 2.4M annotated driving scenarios, and outputs driving mode recommendations with associated confidence scores. The model was trained using data collected between 2022 and 2024 across European and Chinese road environments.`;
function formatTs(iso: string) {
try {
return new Date(iso).toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' });
} catch { return iso; }
}
const STATUS_LABEL: Record<string, string> = { ok: 'Covered', warn: 'Gap', risk: 'Critical' };
// ── Chat state for a single finding ─────────────────────────────────────────
interface ChatMsg { id: number; role: 'user' | 'assistant'; content: string }
function useFindingChat() {
const [open, setOpen] = useState(false);
const [findingIdx, setFindingIdx] = useState<number | null>(null);
const [messages, setMessages] = useState<ChatMsg[]>([]);
const [input, setInput] = useState('');
const [loading, setLoading] = useState(false);
const abortRef = useRef<AbortController | null>(null);
function openFor(idx: number, finding: FindingEvent) {
setFindingIdx(idx);
setOpen(true);
setMessages([{
id: 0,
role: 'assistant',
content: `I'm reviewing finding: **${finding.title}**\n\n${finding.desc}${finding.clause_ref ? `\n\nRef: ${finding.clause_ref}` : ''}\n\nHow can I help?`,
}]);
setInput('');
}
function close() { setOpen(false); abortRef.current?.abort(); }
async function send(segmentContext: string) {
if (!input.trim() || loading) return;
const q = input.trim();
setInput('');
const userMsg: ChatMsg = { id: Date.now(), role: 'user', content: q };
const assistantId = Date.now() + 1;
setMessages(m => [...m, userMsg, { id: assistantId, role: 'assistant', content: '' }]);
setLoading(true);
const ctrl = new AbortController();
abortRef.current = ctrl;
try {
const res = await fetch(`/api/v1/compliance/chat/${findingIdx ?? 0}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...authHeader() },
body: JSON.stringify({ query: q, segment_context: segmentContext }),
signal: ctrl.signal,
});
if (!res.body) { setLoading(false); return; }
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buf += dec.decode(value, { stream: true });
const blocks = buf.split('\n\n');
buf = blocks.pop() ?? '';
for (const block of blocks) {
const dl = block.split('\n').find(l => l.startsWith('data: '));
if (!dl) continue;
try {
const j = JSON.parse(dl.slice(6));
if (j.type === 'chunk' && j.text) {
setMessages(m => m.map(msg => msg.id === assistantId ? { ...msg, content: msg.content + j.text } : msg));
}
} catch { /* skip */ }
}
}
} catch (e: unknown) {
if (e instanceof Error && e.name === 'AbortError') return;
} finally {
setLoading(false);
}
}
return { open, findingIdx, messages, input, setInput, loading, openFor, close, send };
}
function _FindingChatDrawerWrapper({
analysisId,
findingIndex,
finding,
onClose,
}: {
analysisId: string;
findingIndex: number;
finding: { title: string; desc: string; status: string; clause_ref?: string };
onClose: () => void;
}) {
const [findingId, setFindingId] = useState<string | null>(null);
useEffect(() => {
fetch(`/api/v1/compliance/history/${analysisId}`, {
headers: { Authorization: `Bearer ${localStorage.getItem('auth_token') ?? ''}` },
})
.then(r => r.json())
.then((data: { findings?: Array<{ seq: number; id: string }> }) => {
const f = (data.findings ?? []).find(f => f.seq === findingIndex);
if (f?.id) setFindingId(f.id);
})
.catch(() => {});
}, [analysisId, findingIndex]);
if (!findingId) return null;
return (
<FindingChatDrawer
analysisId={analysisId}
findingId={findingId}
finding={finding}
onClose={onClose}
/>
);
}
export function CompliancePage() {
const [showModal, setShowModal] = useState(false);
const [showExportMenu, setShowExportMenu] = useState(false);
const { state, run, reset } = useComplianceAnalysis();
const chat = useFindingChat();
const [drawerFindingIdx, setDrawerFindingIdx] = useState<number | null>(null);
const { setComplianceState } = usePageState();
const { t } = useLanguage();
const STATUS_LABEL: Record<string, string> = { ok: t.compliance.statusCovered, warn: t.compliance.statusGap, risk: t.compliance.statusCritical, info: t.compliance.statusInfo };
const SOURCE_TYPE_LABEL: Record<string, string> = { text: t.compliance.sourceTypePasted, doc: t.compliance.sourceTypeIndexed, upload: t.compliance.sourceTypeUploaded };
const [historyRefresh, setHistoryRefresh] = useState(0);
const prevAnalysisIdRef = useRef<string | null>(null);
useEffect(() => {
if (state.analysisId && state.analysisId !== prevAnalysisIdRef.current) {
prevAnalysisIdRef.current = state.analysisId;
setHistoryRefresh(n => n + 1);
}
}, [state.analysisId]);
async function handleSelectHistory(id: string) {
const res = await fetch(`/api/v1/compliance/history/${id}`, { headers: authHeader() });
if (!res.ok) return;
const data = await res.json();
setComplianceState({
status: 'done',
stageLabel: 'Complete',
stageKey: 'concluding',
meta: { title: data.doc_name, sourceType: 'doc', startedAt: data.created_at },
sources: [],
findings: (data.findings || []).map((f: Record<string, unknown>) => ({
title: String(f.title ?? ''),
desc: String(f.description ?? ''),
status: String(f.status ?? 'ok'),
clause_ref: f.clause_ref ? String(f.clause_ref) : undefined,
})),
done: {
conclusion: String(data.conclusion ?? ''),
actions: data.actions ?? [],
risk_score: Number(data.risk_score ?? 0),
highlight_terms: data.highlight_terms ?? [],
para_text: String(data.para_text ?? ''),
},
errorText: '',
analysisId: data.id,
isReadOnly: true,
activeFindingId: null,
});
}
const isIdle = state.status === 'idle';
const isStreaming = state.status === 'streaming';
const isDone = state.status === 'done';
const isError = state.status === 'error';
// ── Export helpers ────────────────────────────────────────────────────────
function exportJSON() {
const data = {
title: state.meta?.title,
sourceType: state.meta?.sourceType,
startedAt: state.meta?.startedAt,
sources: state.sources,
findings: state.findings,
conclusion: state.done?.conclusion,
actions: state.done?.actions,
risk_score: state.done?.risk_score,
highlight_terms: state.done?.highlight_terms,
para_text: state.done?.para_text,
};
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a'); a.href = url;
a.download = `compliance-report-${Date.now()}.json`; a.click();
URL.revokeObjectURL(url);
setShowExportMenu(false);
}
function exportText() {
const lines: string[] = [
t.compliance.exportReportHeader,
`Title: ${state.meta?.title ?? 'Untitled'}`,
`Date: ${state.meta?.startedAt ? formatTs(state.meta.startedAt) : ''}`,
`Source: ${SOURCE_TYPE_LABEL[state.meta?.sourceType ?? 'text']}`,
`Risk Score: ${state.done?.risk_score ?? 'N/A'} / 100`,
'',
t.compliance.exportSectionParagraph,
state.done?.para_text ?? '',
'',
t.compliance.exportSectionFindings,
...state.findings.map((f, i) =>
`[${i + 1}] [${f.status.toUpperCase()}] ${f.title}\n ${f.desc}${f.clause_ref ? `\n Ref: ${f.clause_ref}` : ''}`
),
'',
t.compliance.exportSectionConclusion,
state.done?.conclusion ?? '',
'',
t.compliance.exportSectionActions,
...(state.done?.actions ?? []).map(a => `${a.label}: ${a.value}`),
];
const blob = new Blob([lines.join('\n')], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a'); a.href = url;
a.download = `compliance-report-${Date.now()}.txt`; a.click();
URL.revokeObjectURL(url);
setShowExportMenu(false);
}
// ── Chat context (finding desc + clause_ref as segment context) ──────────
const activeFinding = chat.findingIdx !== null ? state.findings[chat.findingIdx] : null;
const chatContext = activeFinding
? `Finding: ${activeFinding.title}\n${activeFinding.desc}${activeFinding.clause_ref ? `\nRef: ${activeFinding.clause_ref}` : ''}`
: '';
return (
<div className="compliance-page">
<div className="compliance-page" style={{ position: 'relative' }}>
<Topbar
title="Compliance Analysis"
title={t.compliance.topbarTitle}
actions={
<>
<div className="search-box">
<Search size={13} />
<input placeholder="Search analyses..." />
<input placeholder={t.compliance.searchPlaceholder} />
</div>
<button className="btn sm primary"><Plus size={13} />New analysis</button>
{isStreaming || isDone || isError ? (
<button className="btn sm" onClick={reset}>{t.compliance.clearBtn}</button>
) : null}
{isDone && (
<div style={{ position: 'relative' }}>
<button
className="btn sm"
onClick={() => setShowExportMenu(v => !v)}
>
<Download size={13} />{t.compliance.exportBtn}<ChevronDown size={11} />
</button>
{showExportMenu && (
<div style={{
position: 'absolute', right: 0, top: '100%', marginTop: 4,
background: 'var(--surface)', border: '1px solid var(--border)',
borderRadius: 8, boxShadow: '0 6px 20px rgba(0,0,0,.15)',
zIndex: 50, minWidth: 140, overflow: 'hidden',
}}>
<button onClick={exportJSON} style={{ display: 'block', width: '100%', padding: '9px 14px', textAlign: 'left', fontSize: 13, background: 'none', border: 'none', cursor: 'pointer', color: 'var(--fg)' }}
onMouseEnter={e => (e.currentTarget.style.background = 'var(--bg)')}
onMouseLeave={e => (e.currentTarget.style.background = 'none')}
>{t.compliance.exportJSON}</button>
<button onClick={exportText} style={{ display: 'block', width: '100%', padding: '9px 14px', textAlign: 'left', fontSize: 13, background: 'none', border: 'none', cursor: 'pointer', color: 'var(--fg)' }}
onMouseEnter={e => (e.currentTarget.style.background = 'var(--bg)')}
onMouseLeave={e => (e.currentTarget.style.background = 'none')}
>{t.compliance.exportText}</button>
</div>
)}
</div>
)}
<button className="btn sm primary" onClick={() => setShowModal(true)}>
<Plus size={13} />{t.compliance.newAnalysisBtn}
</button>
</>
}
/>
<div className="compliance-hero">
<p className="hero-eyebrow">Compliance Workspace</p>
<h2 className="compliance-title">Document Paragraph Review</h2>
<p className="compliance-desc">
Three-column AI-assisted compliance gap analysis with regulation retrieval, paragraph review, and findings synthesis.
</p>
</div>
{showModal && (
<NewAnalysisModal
onClose={() => setShowModal(false)}
onSubmit={(fd, meta) => run(fd, meta)}
/>
)}
<div className="compliance-workspace">
{/* Status bar */}
{(isStreaming || isDone || isError) && (
<div style={{ padding: '0 24px' }}>
<div className={`compliance-status-bar ${state.status}`}>
<div className="status-dot" />
<span className="status-bar-label">
{isStreaming ? t.compliance.statusAnalyzing : isDone ? t.compliance.statusComplete : t.compliance.statusError}
</span>
<span className="status-bar-sub">{state.stageLabel}</span>
</div>
</div>
)}
{/* Empty state */}
{isIdle && (
<div className="analysis-empty">
<div className="analysis-empty-icon"><Plus size={24} /></div>
<h3>{t.compliance.emptyTitle}</h3>
<p>{t.compliance.emptyDesc}</p>
</div>
)}
{/* Workspace */}
{!isIdle && (
<>
{/* Analysis Header */}
{state.meta && (
<div style={{
display: 'flex', alignItems: 'center', gap: 14,
padding: '10px 24px', borderBottom: '1px solid var(--border)',
fontSize: 13,
}}>
<span style={{ fontWeight: 600, color: 'var(--fg)' }}>{state.meta.title}</span>
<span style={{ color: 'var(--muted)', fontSize: 11 }}>·</span>
<span style={{ color: 'var(--muted)', fontSize: 11 }}>{SOURCE_TYPE_LABEL[state.meta.sourceType]}</span>
<span style={{ color: 'var(--muted)', fontSize: 11 }}>·</span>
<span style={{ color: 'var(--muted)', fontSize: 11 }}>{formatTs(state.meta.startedAt)}</span>
{isDone && state.done && (
<>
<span style={{ color: 'var(--muted)', fontSize: 11 }}>·</span>
<span className={`risk-score-badge ${riskClass(state.done.risk_score)}`}
style={{ width: 28, height: 28, fontSize: 11 }}
title="Risk score">
{state.done.risk_score}
</span>
</>
)}
</div>
)}
<div style={{ display: 'flex', flex: 1, overflow: 'hidden' }}>
<HistoryRail
refreshTrigger={historyRefresh}
onSelect={handleSelectHistory}
selectedId={state.analysisId}
/>
<div style={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}>
<div className="compliance-workspace" style={{ position: 'relative' }}>
{/* Column 1: Retrieved Regulations */}
<div className="comp-col source-col">
<div className="col-header">Retrieved Regulations</div>
{SOURCES.map(s => (
<div key={s.standard} className="source-item card">
<div className="col-header">
{t.compliance.stageRetrieval} {state.sources.length > 0 && `(${state.sources.length})`}
</div>
{state.sources.length === 0 && isStreaming && (
<div style={{ padding: '20px 16px', color: 'var(--muted)', fontSize: 12 }}>
{t.compliance.retrievingMsg}
</div>
)}
{state.sources.map((s: SourceEvent, i: number) => (
<div key={i} className="source-item card">
<div className="source-top">
<span className="source-std">{s.standard}</span>
<span className={`status ${s.status}`}>{STATUS_LABEL[s.status]}</span>
<span className="source-std">{s.standard || t.compliance.defaultRegulation}</span>
<span className={`status ${s.status === 'retrieved' ? 'ok' : s.status}`}>
{STATUS_LABEL[s.status] ?? 'Retrieved'}
</span>
</div>
<div className="source-helper">{s.helper}</div>
{s.clause && <div className="source-helper">{s.clause}</div>}
{s.score > 0 && (
<div className="source-scores">
{s.scores.map(sc => <span key={sc} className="score-pill">{sc}</span>)}
<span className="score-pill">
{s.score <= 1 ? Math.round(s.score * 100) : Math.round(s.score)}{t.compliance.matchSuffix}
</span>
</div>
)}
{s.full_content && (
<div style={{ fontSize: 11, color: 'var(--muted)', marginTop: 6, lineHeight: 1.5 }}>
{s.full_content.slice(0, 120)}
</div>
)}
</div>
))}
</div>
{/* Column 2: Paragraph Under Review + Stages */}
<div className="comp-col review-col">
<div className="col-header">Paragraph Under Review</div>
<div className="col-header">{t.compliance.colParagraph}</div>
<div className="card para-card">
{isDone && state.done?.para_text ? (
<p className="para-text">
{PARA.split(/(AI system)/g).map((part, i) =>
part === 'AI system'
? <mark key={i}>{part}</mark>
: <span key={i}>{part}</span>
)}
{highlightText(state.done.para_text, state.done.highlight_terms ?? [])}
</p>
) : (
<p className="para-text" style={{ color: 'var(--muted)' }}>
{isStreaming ? t.compliance.extractingMsg : t.compliance.noTextExtracted}
</p>
)}
</div>
<div className="card stages-card">
<div className="card-header">Analysis stages</div>
{STAGES.map(st => (
<div key={st.label} className="stage-row">
<div className="card-header">{t.compliance.stagesHeader}</div>
{(() => {
const STAGE_KEYS = ['extracting', 'splitting', 'analyzing', 'concluding'];
const STAGE_LABELS = [t.compliance.stageExtraction, t.compliance.stageClauseSplit, t.compliance.stageRetrieval, t.compliance.stageSynthesis];
const curIdx = STAGE_KEYS.indexOf(state.stageKey);
return STAGE_KEYS.map((key, idx) => {
const pct = isDone ? 100 : idx < curIdx ? 100 : idx === curIdx ? 60 : 0;
const stStatus = pct === 100 ? 'ok' : pct > 0 ? 'running' : 'info';
return (
<div key={key} className={`stage-row${stStatus === 'running' ? ' stage-running' : ''}`}>
<div className="stage-label-row">
<span className="stage-label">{st.label}</span>
<span className="stage-pct">{st.pct}%</span>
<span className="stage-label">{STAGE_LABELS[idx]}</span>
<span className="stage-pct">{pct}%</span>
</div>
<div className="stage-bar">
<div className={`stage-fill stage-${st.status}`} style={{ width: `${st.pct}%` }} />
<div className={`stage-fill stage-${stStatus}`} style={{ width: `${pct}%` }} />
</div>
</div>
))}
);
});
})()}
</div>
</div>
{/* Column 3: Findings + Conclusion */}
<div className="comp-col findings-col">
<div className="col-header">Findings</div>
{FINDINGS.map(f => (
<div key={f.title} className="finding-item card">
<div className="col-header">
Findings {state.findings.length > 0 && `(${state.findings.length})`}
</div>
{state.findings.length === 0 && isStreaming && (
<div style={{ padding: '20px 16px', color: 'var(--muted)', fontSize: 12 }}>
{t.compliance.gapInProgress}
</div>
)}
{state.findings.map((f: FindingEvent, i: number) => (
<div key={i} className="finding-item card">
<div className="finding-top">
<span className="finding-title">{f.title}</span>
<span className={`status ${f.status}`}>{STATUS_LABEL[f.status] ?? f.status}</span>
</div>
<p className="finding-desc">{f.desc}</p>
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginTop: 6 }}>
{f.clause_ref && (
<div style={{ fontSize: 11, color: 'var(--muted)' }}>Ref: {f.clause_ref}</div>
)}
<button
className="btn sm"
style={{ marginLeft: 'auto', fontSize: 11, padding: '3px 8px', gap: 4 }}
onClick={() => chat.openFor(i, f)}
>
<MessageSquare size={11} />{t.compliance.askAIBtn}
</button>
{state.analysisId && (
<button
className="btn sm"
onClick={() => setDrawerFindingIdx(i)}
style={{ marginTop: 6 }}
>
💬 {t.compliance.chatBtn}
</button>
)}
</div>
</div>
))}
{/* Conclusion */}
{isDone && state.done && (
<div className="card conclusion-box">
<div className="card-header">Conclusion</div>
<p className="conclusion-text">
The document requires a formal risk management section documenting the AI system classification, risk identification methodology, and mitigation measures per EU AI Act Art. 9 before compliance can be certified.
</p>
<div className="card-header" style={{ display: 'flex', alignItems: 'center', gap: 10 }}>
<span>{t.compliance.conclusionHeader}</span>
<div
className={`risk-score-badge ${riskClass(state.done.risk_score)}`}
title={t.compliance.riskScoreTooltip}
>
{state.done.risk_score}
</div>
</div>
<div className="risk-meter">
<span style={{ fontSize: 11, color: 'var(--muted)', width: 24 }}>0</span>
<div className="risk-bar-track">
<div className="risk-bar-fill" style={{ width: `${state.done.risk_score}%` }} />
</div>
<span style={{ fontSize: 11, color: 'var(--muted)', width: 24, textAlign: 'right' }}>100</span>
</div>
<p className="conclusion-text">{state.done.conclusion}</p>
<div className="action-items">
<div className="action-item">
<span className="action-label">Next action</span>
<span className="action-value">Draft risk management annex</span>
{state.done.actions.map((a, i) => (
<div key={i} className="action-item">
<span className="action-label">{a.label}</span>
<span className={`action-value${a.risk ? ' risk-text' : ''}`}>{a.value}</span>
</div>
<div className="action-item">
<span className="action-label">Escalation</span>
<span className="action-value risk-text">Legal review required</span>
))}
</div>
</div>
)}
{isError && (
<div className="card" style={{ borderColor: 'var(--danger)', padding: '14px 16px' }}>
<div style={{ display: 'flex', alignItems: 'center', gap: 8, color: 'var(--danger)', fontSize: 13, fontWeight: 600 }}>
<AlertTriangle size={14} /> {t.compliance.analysisFailed}
</div>
<p style={{ fontSize: 12, color: 'var(--muted)', marginTop: 6 }}>{state.errorText}</p>
</div>
)}
</div>
</div>
</div>
</div>
{/* ── Finding Chat Side Panel ────────────────────────────────── */}
{chat.open && (
<div style={{
position: 'fixed', right: 0, top: 0, bottom: 0, width: 400,
background: 'var(--surface)', borderLeft: '1px solid var(--border)',
display: 'flex', flexDirection: 'column', zIndex: 200,
boxShadow: '-8px 0 32px rgba(0,0,0,.12)',
}}>
{/* Header */}
<div style={{ padding: '16px 20px', borderBottom: '1px solid var(--border)', display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
<div>
<div style={{ fontSize: 13, fontWeight: 600 }}>{t.compliance.chatSidebarHeader}</div>
<div style={{ fontSize: 11, color: 'var(--muted)', marginTop: 2 }}>
Finding #{(chat.findingIdx ?? 0) + 1} · {activeFinding?.title}
</div>
</div>
<button
onClick={chat.close}
style={{ background: 'none', border: 'none', cursor: 'pointer', color: 'var(--muted)', padding: 4 }}
></button>
</div>
{/* Messages */}
<div style={{ flex: 1, overflowY: 'auto', padding: '16px 20px', display: 'flex', flexDirection: 'column', gap: 12 }}>
{chat.messages.map(msg => (
<div key={msg.id} style={{ display: 'flex', gap: 10, flexDirection: msg.role === 'user' ? 'row-reverse' : 'row' }}>
{msg.role === 'assistant' && (
<div style={{ width: 28, height: 28, borderRadius: 8, background: 'var(--accent)', display: 'flex', alignItems: 'center', justifyContent: 'center', flexShrink: 0, fontSize: 11, color: '#fff', fontWeight: 700 }}>AI</div>
)}
<div style={{
maxWidth: '82%', padding: '10px 14px', borderRadius: 10, fontSize: 13, lineHeight: 1.6, whiteSpace: 'pre-wrap',
background: msg.role === 'user' ? 'var(--accent)' : 'var(--bg)',
color: msg.role === 'user' ? '#fff' : 'var(--fg)',
border: msg.role === 'assistant' ? '1px solid var(--border)' : 'none',
}}>{msg.content}</div>
</div>
))}
{chat.loading && (
<div style={{ display: 'flex', gap: 10 }}>
<div style={{ width: 28, height: 28, borderRadius: 8, background: 'var(--accent)', display: 'flex', alignItems: 'center', justifyContent: 'center', flexShrink: 0, fontSize: 11, color: '#fff', fontWeight: 700 }}>AI</div>
<div style={{ padding: '10px 14px', borderRadius: 10, border: '1px solid var(--border)', background: 'var(--bg)', fontSize: 13, color: 'var(--muted)' }}>
{t.compliance.chatThinking}
</div>
</div>
)}
</div>
{/* Quick questions */}
<div style={{ padding: '8px 20px', display: 'flex', flexWrap: 'wrap', gap: 6 }}>
{[t.compliance.quickQ1, t.compliance.quickQ2, t.compliance.quickQ3].map(q => (
<button key={q} onClick={() => chat.setInput(q)}
style={{ padding: '4px 10px', fontSize: 11, background: 'var(--bg)', border: '1px solid var(--border)', borderRadius: 6, cursor: 'pointer', color: 'var(--muted)' }}>
{q}
</button>
))}
</div>
{/* Input */}
<div style={{ padding: '12px 20px', borderTop: '1px solid var(--border)', display: 'flex', gap: 8 }}>
<input
value={chat.input}
onChange={e => chat.setInput(e.target.value)}
onKeyDown={e => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); chat.send(chatContext); } }}
placeholder={t.compliance.chatPlaceholder}
style={{ flex: 1, padding: '9px 12px', fontSize: 13, background: 'var(--bg)', border: '1px solid var(--border)', borderRadius: 8, color: 'var(--fg)', outline: 'none' }}
/>
<button
className="btn primary"
onClick={() => chat.send(chatContext)}
disabled={!chat.input.trim() || chat.loading}
style={{ padding: '9px 14px' }}
>{t.compliance.sendBtn}</button>
</div>
</div>
)}
{drawerFindingIdx !== null && state.analysisId && (
<_FindingChatDrawerWrapper
analysisId={state.analysisId}
findingIndex={drawerFindingIdx}
finding={{
title: state.findings[drawerFindingIdx]?.title ?? '',
desc: state.findings[drawerFindingIdx]?.desc ?? '',
status: state.findings[drawerFindingIdx]?.status ?? 'ok',
clause_ref: state.findings[drawerFindingIdx]?.clause_ref,
}}
onClose={() => setDrawerFindingIdx(null)}
/>
)}
</>
)}
</div>
);
}

View File

@@ -0,0 +1,237 @@
// frontend/src/pages/Compliance/FindingChatDrawer.tsx
import { useEffect, useRef, useState } from 'react';
import { X, Send } from 'lucide-react';
import { useLanguage } from '../../contexts/LanguageContext';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface Message {
id: string;
role: 'user' | 'assistant';
content: string;
}
interface FindingInfo {
title: string;
desc: string;
status: string;
clause_ref?: string;
}
interface Props {
analysisId: string;
findingId: string;
finding: FindingInfo;
onClose: () => void;
}
export function FindingChatDrawer({ analysisId, findingId, finding, onClose }: Props) {
const [messages, setMessages] = useState<Message[]>([]);
const [suggestions, setSuggestions] = useState<string[]>([]);
const [input, setInput] = useState('');
const [loading, setLoading] = useState(false);
const [loadingHistory, setLoadingHistory] = useState(true);
const abortRef = useRef<AbortController | null>(null);
const bottomRef = useRef<HTMLDivElement>(null);
const { t } = useLanguage();
// Load history + suggestions on open
useEffect(() => {
setLoadingHistory(true);
fetch(`/api/v1/compliance/analyses/${analysisId}/findings/${findingId}/chat`, {
headers: authHeader(),
})
.then(r => r.json())
.then((data: Message[]) => {
setMessages(Array.isArray(data) ? data.map(m => ({ id: m.id, role: m.role, content: m.content })) : []);
setLoadingHistory(false);
if (!data.length) {
fetch(
`/api/v1/compliance/analyses/${analysisId}/findings/${findingId}/suggestions`,
{ method: 'POST', headers: authHeader() }
)
.then(r => r.json())
.then(d => { if (Array.isArray(d?.questions)) setSuggestions(d.questions); })
.catch(() => {});
}
})
.catch(() => setLoadingHistory(false));
return () => { abortRef.current?.abort(); };
}, [analysisId, findingId]);
// Auto-scroll to bottom
useEffect(() => {
bottomRef.current?.scrollIntoView({ behavior: 'smooth' });
}, [messages]);
async function send(text?: string) {
const q = (text ?? input).trim();
if (!q || loading) return;
setInput('');
setSuggestions([]); // hide chips after first message
const assistantId = `ast-${Date.now()}`;
setMessages(prev => [
...prev,
{ id: `usr-${Date.now()}`, role: 'user', content: q },
{ id: assistantId, role: 'assistant', content: '' },
]);
setLoading(true);
const ctrl = new AbortController();
abortRef.current = ctrl;
try {
const res = await fetch(
`/api/v1/compliance/analyses/${analysisId}/findings/${findingId}/chat`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json', ...authHeader() },
body: JSON.stringify({ query: q }),
signal: ctrl.signal,
}
);
if (!res.body) { setLoading(false); return; }
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buf += dec.decode(value, { stream: true });
const blocks = buf.split('\n\n');
buf = blocks.pop() ?? '';
for (const block of blocks) {
const dl = block.split('\n').find(l => l.startsWith('data: '));
if (!dl) continue;
try {
const j = JSON.parse(dl.slice(6));
if (j.type === 'chunk' && j.text) {
setMessages(prev =>
prev.map(m => m.id === assistantId ? { ...m, content: m.content + (j.text as string) } : m)
);
}
} catch { /* skip */ }
}
}
} catch (e: unknown) {
if (e instanceof Error && e.name !== 'AbortError') {
setMessages(prev =>
prev.map(m => m.id === assistantId ? { ...m, content: 'Error reaching server.' } : m)
);
}
} finally {
setLoading(false);
}
}
const STATUS_COLOR: Record<string, string> = {
risk: 'var(--danger, #dc143c)',
warn: 'var(--warning, #ff8c00)',
ok: 'var(--success, #228b22)',
};
return (
<div
style={{
position: 'fixed', right: 0, top: 0, bottom: 0, width: 420,
background: 'var(--surface)', borderLeft: '1px solid var(--border)',
display: 'flex', flexDirection: 'column', zIndex: 200,
boxShadow: '-4px 0 16px rgba(0,0,0,0.12)',
}}
>
{/* Header */}
<div style={{
padding: '14px 16px', borderBottom: '1px solid var(--border)',
display: 'flex', alignItems: 'flex-start', gap: 10,
}}>
<div style={{ flex: 1, minWidth: 0 }}>
<div style={{ fontSize: 11, color: STATUS_COLOR[finding.status] ?? 'var(--muted)', fontWeight: 600, marginBottom: 2 }}>
{finding.status.toUpperCase()}
{finding.clause_ref && (
<span style={{ fontWeight: 400, marginLeft: 6, color: 'var(--muted)' }}>
{finding.clause_ref}
</span>
)}
</div>
<div style={{ fontSize: 13, fontWeight: 600, overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
{finding.title}
</div>
<div style={{ fontSize: 11, color: 'var(--muted)', marginTop: 2, lineHeight: 1.4 }}>
{finding.desc.length > 100 ? finding.desc.slice(0, 100) + '…' : finding.desc}
</div>
</div>
<button className="btn icon-btn" onClick={onClose} style={{ flexShrink: 0 }} title={t.compliance.drawerClose}>
<X size={14} />
</button>
</div>
{/* Suggestion chips */}
{suggestions.length > 0 && (
<div style={{ padding: '10px 16px', borderBottom: '1px solid var(--border)' }}>
<div style={{ fontSize: 11, color: 'var(--muted)', marginBottom: 6 }}>{t.compliance.drawerSuggestionsHeader}</div>
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
{suggestions.map((q, i) => (
<button
key={i}
className="chip"
style={{ textAlign: 'left', whiteSpace: 'normal', height: 'auto', padding: '6px 10px' }}
onClick={() => send(q)}
>
{q}
</button>
))}
</div>
</div>
)}
{/* Messages */}
<div style={{ flex: 1, overflowY: 'auto', padding: '12px 16px', display: 'flex', flexDirection: 'column', gap: 10 }}>
{loadingHistory && (
<p style={{ fontSize: 12, color: 'var(--muted)', textAlign: 'center' }}>Loading history</p>
)}
{!loadingHistory && messages.length === 0 && (
<p style={{ fontSize: 12, color: 'var(--muted)', textAlign: 'center' }}>{t.compliance.drawerChatEmpty}</p>
)}
{messages.map(msg => (
<div key={msg.id} className={`message msg-${msg.role}`} style={{ maxWidth: '100%' }}>
{msg.role === 'assistant' && <div className="msg-avatar">AI</div>}
<div className="msg-bubble" style={{ fontSize: 13, whiteSpace: 'pre-wrap' }}>
{msg.content || (loading ? '…' : '')}
</div>
{msg.role === 'user' && <div className="msg-avatar user-av">You</div>}
</div>
))}
<div ref={bottomRef} />
</div>
{/* Composer */}
<div style={{ padding: '10px 16px', borderTop: '1px solid var(--border)', display: 'flex', gap: 8 }}>
<textarea
className="composer-input"
placeholder={t.compliance.chatPlaceholder}
value={input}
rows={2}
style={{ flex: 1, fontSize: 13 }}
onChange={e => setInput(e.target.value)}
onKeyDown={e => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); void send(); } }}
/>
<button
className="btn primary"
disabled={!input.trim() || loading}
onClick={() => void send()}
style={{ alignSelf: 'flex-end' }}
title={t.compliance.sendBtn}
>
<Send size={14} />
</button>
</div>
</div>
);
}

View File

@@ -0,0 +1,142 @@
// frontend/src/pages/Compliance/HistoryRail.tsx
import { useEffect, useState, useCallback } from 'react';
import { Download, Trash2 } from 'lucide-react';
import { useLanguage } from '../../contexts/LanguageContext';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface HistoryItem {
id: string;
created_at: string;
doc_name: string;
standard_name: string;
risk_score: number;
finding_count: number;
}
interface Props {
refreshTrigger: number;
onSelect: (id: string) => void;
selectedId: string | null;
}
function riskClass(score: number): string {
if (score >= 70) return 'risk-high';
if (score >= 40) return 'risk-medium';
return 'risk-low';
}
export function HistoryRail({ refreshTrigger, onSelect, selectedId }: Props) {
const [items, setItems] = useState<HistoryItem[]>([]);
const [deletingId, setDeletingId] = useState<string | null>(null);
const { t } = useLanguage();
const fetchHistory = useCallback(() => {
fetch('/api/v1/compliance/history?limit=30', { headers: authHeader() })
.then(r => r.json())
.then(data => {
if (Array.isArray(data)) setItems(data);
})
.catch(() => {/* backend may not have postgres configured */});
}, []);
useEffect(() => { fetchHistory(); }, [fetchHistory, refreshTrigger]);
function handleDownload(e: React.MouseEvent, item: HistoryItem) {
e.stopPropagation();
fetch(`/api/v1/compliance/history/${item.id}/download`, { headers: authHeader() })
.then(r => r.blob())
.then(blob => {
const blobUrl = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = blobUrl;
link.download = `compliance-${item.doc_name.slice(0, 30)}.docx`;
link.click();
URL.revokeObjectURL(blobUrl);
});
}
function handleDelete(e: React.MouseEvent, item: HistoryItem) {
e.stopPropagation();
if (!window.confirm(t.compliance.historyDeleteConfirm)) return;
setDeletingId(item.id);
fetch(`/api/v1/compliance/history/${item.id}`, {
method: 'DELETE',
headers: authHeader(),
})
.then(() => {
setItems(prev => prev.filter(i => i.id !== item.id));
setDeletingId(null);
})
.catch(() => setDeletingId(null));
}
function formatDate(iso: string): string {
try {
return new Date(iso).toLocaleDateString(undefined, { month: 'short', day: 'numeric' });
} catch {
return iso.slice(0, 10);
}
}
if (items.length === 0) {
return (
<div className="history-pane" style={{ minWidth: 200, maxWidth: 220 }}>
<div className="history-header">{t.compliance.historyHeader}</div>
<p style={{ padding: '12px 16px', fontSize: 12, color: 'var(--muted)', lineHeight: 1.5 }}>
{t.compliance.historyEmpty}
</p>
</div>
);
}
return (
<div className="history-pane" style={{ minWidth: 200, maxWidth: 220, overflowY: 'auto' }}>
<div className="history-header">{t.compliance.historyHeader}</div>
{items.map(item => (
<div
key={item.id}
className={`quick-item${selectedId === item.id ? ' active' : ''}`}
onClick={() => onSelect(item.id)}
style={{ cursor: 'pointer' }}
>
<div style={{ fontSize: 11, color: 'var(--muted)', marginBottom: 2 }}>
{formatDate(item.created_at)}
</div>
<div style={{ fontSize: 12, fontWeight: 500, marginBottom: 4, overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
{item.doc_name || 'Untitled'}
</div>
<div style={{ display: 'flex', alignItems: 'center', gap: 6 }}>
<span className={`risk-badge ${riskClass(item.risk_score)}`} style={{ fontSize: 10 }}>
{item.risk_score}
</span>
<span style={{ fontSize: 10, color: 'var(--muted)', flex: 1 }}>
{item.finding_count} finding{item.finding_count !== 1 ? 's' : ''}
</span>
<button
className="btn icon-btn"
title={t.compliance.downloadReport}
onClick={e => handleDownload(e, item)}
style={{ padding: '2px 4px' }}
>
<Download size={11} />
</button>
<button
className="btn icon-btn danger"
title="Delete"
disabled={deletingId === item.id}
onClick={e => handleDelete(e, item)}
style={{ padding: '2px 4px' }}
>
<Trash2 size={11} />
</button>
</div>
</div>
))}
</div>
);
}

View File

@@ -0,0 +1,247 @@
import { useState, useRef, useEffect } from 'react';
import { X, Upload, FileText, Database } from 'lucide-react';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface DocOption {
id: string;
name: string;
type?: string;
}
import type { AnalysisMeta } from './useComplianceAnalysis';
interface Props {
onClose: () => void;
onSubmit: (formData: FormData, meta: AnalysisMeta) => void;
}
const DOMAINS = ['EU AI Act', 'MIIT', 'ISO 21434', 'GDPR', 'NIST AI RMF', 'GB/T'];
export function NewAnalysisModal({ onClose, onSubmit }: Props) {
const [tab, setTab] = useState<'text' | 'doc' | 'upload'>('text');
const [text, setText] = useState('');
const [title, setTitle] = useState('');
const [selectedDomains, setSelectedDomains] = useState<string[]>([]);
const [selectedDocId, setSelectedDocId] = useState<string | null>(null);
const [docs, setDocs] = useState<DocOption[]>([]);
const [file, setFile] = useState<File | null>(null);
const [dragOver, setDragOver] = useState(false);
const fileInputRef = useRef<HTMLInputElement>(null);
const overlayRef = useRef<HTMLDivElement>(null);
// Fetch indexed docs for "From Document" tab
useEffect(() => {
fetch('/api/v1/documents/management-list', { headers: authHeader() })
.then(r => r.json())
.then(d => {
const list: DocOption[] = (d?.documents ?? d ?? []).map((item: Record<string, unknown>) => ({
id: String(item.doc_id ?? item.id ?? ''),
name: String(item.doc_name ?? item.name ?? ''),
type: String(item.regulation_type ?? item.type ?? ''),
}));
setDocs(list);
})
.catch(() => setDocs([]));
}, []);
function toggleDomain(d: string) {
setSelectedDomains(prev =>
prev.includes(d) ? prev.filter(x => x !== d) : [...prev, d]
);
}
function handleFileChange(f: File | null) {
if (!f) return;
setFile(f);
if (!title) setTitle(f.name.replace(/\.[^.]+$/, ''));
}
function handleSubmit() {
const fd = new FormData();
if (title) fd.append('title', title);
if (selectedDomains.length) fd.append('domains', selectedDomains.join(','));
if (tab === 'text') {
if (!text.trim()) return;
fd.append('text', text.trim());
} else if (tab === 'doc') {
if (!selectedDocId) return;
fd.append('doc_id', selectedDocId);
} else {
if (!file) return;
fd.append('file', file);
}
const meta: AnalysisMeta = {
title: title || (tab === 'upload' && file ? file.name.replace(/\.[^.]+$/, '') : 'Untitled Analysis'),
sourceType: tab,
startedAt: new Date().toISOString(),
};
onSubmit(fd, meta);
onClose();
}
const canSubmit =
(tab === 'text' && text.trim().length > 0) ||
(tab === 'doc' && selectedDocId !== null) ||
(tab === 'upload' && file !== null);
return (
<div
className="modal-overlay"
ref={overlayRef}
onClick={e => { if (e.target === overlayRef.current) onClose(); }}
>
<div className="modal-dialog" style={{ maxWidth: 720, gridTemplateColumns: '1fr' }}>
<div className="modal-panel">
{/* Header */}
<div className="modal-header">
<span className="modal-title">New Compliance Analysis</span>
<button className="modal-close" onClick={onClose}><X size={16} /></button>
</div>
{/* Title field */}
<div className="upload-field" style={{ marginBottom: 16 }}>
<label>Analysis title (optional)</label>
<input
value={title}
onChange={e => setTitle(e.target.value)}
placeholder="e.g. Section 4.2.1 AI System Review"
/>
</div>
{/* Tabs */}
<div className="modal-tabs">
<button className={`modal-tab${tab === 'text' ? ' active' : ''}`} onClick={() => setTab('text')}>
<FileText size={12} style={{ marginRight: 5, display: 'inline' }} />Paste Text
</button>
<button className={`modal-tab${tab === 'doc' ? ' active' : ''}`} onClick={() => setTab('doc')}>
<Database size={12} style={{ marginRight: 5, display: 'inline' }} />From Document
</button>
<button className={`modal-tab${tab === 'upload' ? ' active' : ''}`} onClick={() => setTab('upload')}>
<Upload size={12} style={{ marginRight: 5, display: 'inline' }} />Upload File
</button>
</div>
{/* Tab content */}
{tab === 'text' && (
<div className="upload-field full-width">
<label>Document text to analyze</label>
<textarea
style={{ minHeight: 240 }}
placeholder="Paste the document paragraph or clause text here…"
value={text}
onChange={e => setText(e.target.value)}
/>
<span style={{ fontSize: 11, color: 'var(--muted)' }}>{text.length} characters</span>
</div>
)}
{tab === 'doc' && (
<div>
<div style={{ fontSize: 12, color: 'var(--muted)', marginBottom: 6 }}>
Select an indexed document to analyze:
</div>
<div className="doc-select-list" style={{ maxHeight: 340 }}>
{docs.length === 0 && (
<div style={{ padding: '20px', textAlign: 'center', color: 'var(--muted)', fontSize: 13 }}>
No indexed documents found
</div>
)}
{docs.map(doc => (
<div
key={doc.id}
className={`doc-select-item${selectedDocId === doc.id ? ' selected' : ''}`}
onClick={() => setSelectedDocId(doc.id)}
>
<div className="doc-select-check">
{selectedDocId === doc.id && (
<svg width="8" height="8" viewBox="0 0 8 8" fill="white">
<path d="M1 4l2 2 4-4" stroke="white" strokeWidth="1.5" fill="none" strokeLinecap="round" />
</svg>
)}
</div>
<div style={{ flex: 1, minWidth: 0 }}>
<div className="doc-select-name">{doc.name}</div>
{doc.type && <div className="doc-select-meta">{doc.type}</div>}
</div>
</div>
))}
</div>
</div>
)}
{tab === 'upload' && (
<div
className={`dropzone${dragOver ? ' drag-over' : ''}`}
onClick={() => fileInputRef.current?.click()}
onDragOver={e => { e.preventDefault(); setDragOver(true); }}
onDragLeave={() => setDragOver(false)}
onDrop={e => {
e.preventDefault();
setDragOver(false);
handleFileChange(e.dataTransfer.files[0] ?? null);
}}
>
<input
ref={fileInputRef}
type="file"
style={{ display: 'none' }}
accept=".pdf,.docx,.txt,.md"
onChange={e => handleFileChange(e.target.files?.[0] ?? null)}
/>
<div className="drop-icon">PDF</div>
{file ? (
<div>
<div className="drop-label">{file.name}</div>
<div className="drop-hint">{(file.size / 1024).toFixed(0)} KB click to replace</div>
</div>
) : (
<div>
<div className="drop-label">Drop file here or click to browse</div>
<div className="drop-hint">PDF, DOCX, TXT, MD max 20 MB</div>
</div>
)}
</div>
)}
{/* Domain filter */}
<div style={{ marginTop: 18 }}>
<div style={{ fontSize: 12, color: 'var(--muted)', marginBottom: 6 }}>
Filter by regulation domain (optional):
</div>
<div className="domain-chips">
{DOMAINS.map(d => (
<button
key={d}
className={`domain-chip${selectedDomains.includes(d) ? ' selected' : ''}`}
onClick={() => toggleDomain(d)}
>
{d}
</button>
))}
</div>
</div>
{/* Actions */}
<div className="modal-actions" style={{ marginTop: 24 }}>
<button className="btn" onClick={onClose}>Cancel</button>
<button
className="btn primary"
disabled={!canSubmit}
onClick={handleSubmit}
>
Start Analysis
</button>
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,139 @@
/**
* useComplianceAnalysis — compliance analysis state wired to PageStateContext.
*
* State is stored in the global context so it persists when the user navigates
* to another module and returns. The `run` and `reset` actions are identical
* to the previous hook API so CompliancePage needs no structural changes.
*/
import { useCallback } from 'react';
import { usePageState } from '../../contexts';
import type {
ComplianceMeta,
ComplianceState,
ComplianceSourceEvent,
ComplianceFindingEvent,
ComplianceDonePayload,
} from '../../contexts';
export type { ComplianceMeta, ComplianceState, ComplianceSourceEvent as SourceEvent, ComplianceFindingEvent as FindingEvent, ComplianceDonePayload as DonePayload };
export type { ComplianceActionItem as ActionItem } from '../../contexts';
export type AnalysisStatus = import('../../contexts').ComplianceStatus;
export type AnalysisMeta = ComplianceMeta;
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
const INITIAL_STATE: ComplianceState = {
status: 'idle',
stageLabel: '',
stageKey: '',
meta: null,
sources: [],
findings: [],
done: null,
errorText: '',
analysisId: null,
isReadOnly: false,
};
export function useComplianceAnalysis() {
const { complianceState: state, setComplianceState: setState, complianceAbortRef, resetCompliance: reset } = usePageState();
const run = useCallback(async (formData: FormData, meta: ComplianceMeta) => {
complianceAbortRef.current?.abort();
const ctrl = new AbortController();
complianceAbortRef.current = ctrl;
setState({ ...INITIAL_STATE, status: 'streaming', stageLabel: 'Starting…', meta });
try {
const res = await fetch('/api/v1/compliance/analyze-stream', {
method: 'POST',
headers: authHeader(),
body: formData,
signal: ctrl.signal,
});
if (!res.ok) {
const txt = await res.text();
setState(s => ({ ...s, status: 'error', errorText: `HTTP ${res.status}: ${txt}` }));
return;
}
if (!res.body) {
setState(s => ({ ...s, status: 'error', errorText: 'No response stream' }));
return;
}
const reader = res.body.getReader();
const dec = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += dec.decode(value, { stream: true });
const blocks = buffer.split('\n\n');
buffer = blocks.pop() ?? '';
for (const block of blocks) {
const dataLine = block.split('\n').find(l => l.startsWith('data: '));
if (!dataLine) continue;
const raw = dataLine.slice(6).trim();
if (!raw) continue;
try {
const j = JSON.parse(raw);
if (j.type === 'stage') {
setState(s => ({ ...s, stageLabel: j.label ?? '', stageKey: j.stage ?? '' }));
} else if (j.type === 'source') {
const src: ComplianceSourceEvent = {
standard: j.standard ?? '',
clause: j.clause ?? '',
score: j.score ?? 0,
status: j.status ?? 'retrieved',
full_content: j.full_content ?? '',
};
setState(s => ({ ...s, sources: [...s.sources, src] }));
} else if (j.type === 'finding') {
const finding: ComplianceFindingEvent = {
title: j.title ?? '',
desc: j.desc ?? '',
status: j.status ?? 'info',
clause_ref: j.clause_ref,
};
setState(s => ({ ...s, findings: [...s.findings, finding] }));
} else if (j.type === 'done') {
const payload: ComplianceDonePayload = {
conclusion: j.conclusion ?? '',
actions: j.actions ?? [],
risk_score: j.risk_score ?? 0,
highlight_terms: j.highlight_terms ?? [],
para_text: j.para_text ?? '',
};
setState(s => ({ ...s, status: 'done', done: payload, stageKey: 'concluding', stageLabel: 'Complete' }));
} else if (j.type === 'saved') {
setState(s => ({ ...s, analysisId: j.analysis_id ?? null }));
} else if (j.type === 'error') {
setState(s => ({ ...s, status: 'error', errorText: j.text ?? 'Unknown error' }));
}
} catch { /* skip malformed */ }
}
}
// Mark done if stream ended without explicit done event
setState(s => s.status === 'streaming' ? { ...s, status: 'done', stageKey: 'concluding', stageLabel: 'Complete' } : s);
} catch (e: unknown) {
if (e instanceof Error && e.name === 'AbortError') return;
setState(s => ({ ...s, status: 'error', errorText: String(e) }));
}
}, [setState, complianceAbortRef]);
return { state, run, reset };
}

View File

@@ -1,7 +1,14 @@
import { useState, useEffect } from 'react';
import { useState, useEffect, useCallback } from 'react';
import { Topbar } from '../../components/layout/Topbar';
import { Upload, Search } from 'lucide-react';
import { Upload, Search, Download, Trash2, RefreshCw, AlertTriangle } from 'lucide-react';
import { UploadModal } from './UploadModal';
import { useLanguage } from '../../contexts/LanguageContext';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface Doc {
id: string;
@@ -10,45 +17,81 @@ interface Doc {
uploadedAt: string;
chunks: number;
type: string;
sizeBytes: number;
summary?: string;
version?: string;
}
const STATUS_FILTERS = ['All', 'Ready', 'Embedding', 'Failed', 'Pending'];
const TYPE_OPTS = ['All types', 'EU Regulation', 'ISO Standard', 'National Draft', 'Internal Policy'];
const MOCK_DOCS: Doc[] = [
{ id: '1', name: 'EU AI Act — Full text (EN)', status: 'ok', uploadedAt: '2025-11-10', chunks: 842, type: 'EU Regulation' },
{ id: '2', name: 'MIIT Draft 2025-08 (ZH)', status: 'ok', uploadedAt: '2025-11-01', chunks: 320, type: 'National Draft' },
{ id: '3', name: 'ISO/SAE 21434:2021', status: 'ok', uploadedAt: '2025-10-15', chunks: 614, type: 'ISO Standard' },
{ id: '4', name: 'Vehicle AI Safety Manual v3.2', status: 'ok', uploadedAt: '2025-10-08', chunks: 198, type: 'Internal Policy' },
{ id: '5', name: 'ADAS System Requirements', status: 'warn', uploadedAt: '2025-09-22', chunks: 0, type: 'Internal Policy' },
{ id: '6', name: 'UNECE R155 Corrigendum', status: 'info', uploadedAt: '2025-09-12', chunks: 87, type: 'EU Regulation' },
{ id: '7', name: 'GB/T 42118-2022', status: 'risk', uploadedAt: '2025-08-30', chunks: 0, type: 'National Draft' },
];
const STATUS_FILTERS = ['All', 'Ready', 'Processing', 'Failed', 'Pending'];
const STATUS_LABEL: Record<string, string> = { ok: 'Ready', warn: 'Processing', risk: 'Failed', info: 'Pending' };
const STATUS_MAP: Record<string, string> = { All: 'All', Ready: 'ok', Embedding: 'warn', Failed: 'risk', Pending: 'info' };
const STATUS_MAP: Record<string, string> = { All: 'All', Ready: 'ok', Processing: 'warn', Failed: 'risk', Pending: 'info' };
// Map backend DocumentStatus enum values to frontend display status
function backendStatus(s: string): Doc['status'] {
if (s === 'indexed') return 'ok';
if (s === 'failed') return 'risk';
if (s === 'parsed') return 'warn'; // chunked, awaiting embedding
return 'info'; // pending / stored
if (s === 'parsed') return 'warn';
return 'info';
}
function formatSize(bytes: number): string {
if (!bytes) return '—';
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
}
// ── Confirm dialog ─────────────────────────────────────────────────────────
function ConfirmDialog({ message, onConfirm, onCancel }: {
message: string;
onConfirm: () => void;
onCancel: () => void;
}) {
const { t } = useLanguage();
return (
<div className="modal-overlay" onClick={onCancel}>
<div
style={{ background: 'var(--surface)', border: '1px solid var(--border)', borderRadius: 14, padding: '28px 32px', maxWidth: 400, width: '100%', boxShadow: '0 12px 40px rgba(0,0,0,.2)' }}
onClick={e => e.stopPropagation()}
>
<div style={{ display: 'flex', alignItems: 'center', gap: 10, marginBottom: 14 }}>
<AlertTriangle size={18} color="var(--danger)" />
<span style={{ fontWeight: 600, fontSize: 15 }}>{t.docs.confirmDeleteTitle}</span>
</div>
<p style={{ fontSize: 13, color: 'var(--muted)', lineHeight: 1.6, marginBottom: 20 }}>{message}</p>
<div style={{ display: 'flex', gap: 10, justifyContent: 'flex-end' }}>
<button className="btn sm" onClick={onCancel}>{t.docs.cancelBtn}</button>
<button className="btn sm" style={{ background: 'var(--danger)', color: '#fff', borderColor: 'var(--danger)' }} onClick={onConfirm}>
{t.docs.deleteBtn}
</button>
</div>
</div>
</div>
);
}
export function DocsPage() {
const { t } = useLanguage();
const [search, setSearch] = useState('');
const [statusF, setStatusF] = useState('All');
const [typeF, setTypeF] = useState('All types');
const [selected, setSelected] = useState<Set<string>>(new Set());
const [docs, setDocs] = useState<Doc[]>(MOCK_DOCS);
const [docs, setDocs] = useState<Doc[]>([]);
const [loading, setLoading] = useState(true);
const [showUpload, setShowUpload] = useState(false);
const [refreshKey, setRefreshKey] = useState(0);
const [retrying, setRetrying] = useState<Set<string>>(new Set());
const [deleting, setDeleting] = useState<Set<string>>(new Set());
const [confirmDelete, setConfirmDelete] = useState<{ ids: string[]; names: string[] } | null>(null);
useEffect(() => {
fetch('/api/v1/documents/management-list')
// Dynamic type options derived from actual docs
const typeOpts = ['All types', ...Array.from(new Set(docs.map(d => d.type).filter(t => t && t !== '—')))];
const fetchDocs = useCallback(() => {
setLoading(true);
fetch('/api/v1/documents/management-list', { headers: authHeader() })
.then(r => r.json())
.then(d => {
if (!Array.isArray(d?.documents)) return;
if (!Array.isArray(d?.documents)) { setLoading(false); return; }
setDocs(d.documents.map((item: Record<string, unknown>) => ({
id: item.doc_id as string,
name: item.doc_name as string,
@@ -56,11 +99,18 @@ export function DocsPage() {
uploadedAt: ((item.updated_at as string) ?? '').slice(0, 10),
chunks: (item.chunk_count as number) ?? 0,
type: (item.regulation_type as string) || '—',
sizeBytes: (item.size_bytes as number) ?? 0,
summary: item.summary as string | undefined,
version: item.version as string | undefined,
})));
setLoading(false);
})
.catch(() => {});
.catch(() => setLoading(false));
}, []);
useEffect(() => { fetchDocs(); }, [fetchDocs, refreshKey]);
// ── Filtering ────────────────────────────────────────────────────────────
const filtered = docs.filter(d => {
const matchSearch = !search || d.name.toLowerCase().includes(search.toLowerCase());
const matchStatus = statusF === 'All' || d.status === STATUS_MAP[statusF];
@@ -68,37 +118,84 @@ export function DocsPage() {
return matchSearch && matchStatus && matchType;
});
// ── Selection helpers ────────────────────────────────────────────────────
function toggleAll() {
if (selected.size === filtered.length) setSelected(new Set());
else setSelected(new Set(filtered.map(d => d.id)));
}
function toggleOne(id: string) {
const s = new Set(selected);
s.has(id) ? s.delete(id) : s.add(id);
setSelected(s);
}
// ── Download ─────────────────────────────────────────────────────────────
function downloadDoc(id: string, name: string) {
const a = document.createElement('a');
a.href = `/api/v1/documents/download/${id}`;
a.download = name;
a.click();
}
// ── Retry (re-process failed doc) ────────────────────────────────────────
async function retryDoc(id: string) {
setRetrying(r => new Set([...r, id]));
try {
await fetch(`/api/v1/documents/${id}/retry`, { method: 'POST', headers: authHeader() });
setTimeout(() => {
setRetrying(r => { const s = new Set(r); s.delete(id); return s; });
setRefreshKey(k => k + 1);
}, 1500);
} catch {
setRetrying(r => { const s = new Set(r); s.delete(id); return s; });
}
}
// ── Delete (single or batch) ─────────────────────────────────────────────
function askDelete(ids: string[]) {
const names = ids.map(id => docs.find(d => d.id === id)?.name ?? id);
setConfirmDelete({ ids, names });
}
async function confirmDeleteDocs() {
if (!confirmDelete) return;
const { ids } = confirmDelete;
setConfirmDelete(null);
setDeleting(new Set(ids));
await Promise.allSettled(
ids.map(id => fetch(`/api/v1/documents/${id}`, { method: 'DELETE', headers: authHeader() }))
);
setDeleting(new Set());
setSelected(s => { const n = new Set(s); ids.forEach(id => n.delete(id)); return n; });
setRefreshKey(k => k + 1);
}
return (
<div className="docs-page">
<Topbar
title="Document Management"
title={t.docs.topbarTitle}
actions={
<>
<div className="search-box">
<Search size={13} />
<input
placeholder="Search documents..."
placeholder={t.docs.searchPlaceholder}
value={search}
onChange={e => setSearch(e.target.value)}
/>
</div>
<button className="btn sm" onClick={() => setRefreshKey(k => k + 1)}>
<RefreshCw size={13} />{t.docs.refreshBtn}
</button>
<button className="btn sm primary" onClick={() => setShowUpload(true)}>
<Upload size={13} />Upload document
<Upload size={13} />{t.docs.uploadBtn}
</button>
</>
}
/>
<div className="page-content">
<div className="docs-controls">
<div className="chip-group">
@@ -107,22 +204,42 @@ export function DocsPage() {
key={f}
className={`chip${statusF === f ? ' active' : ''}`}
onClick={() => setStatusF(f)}
>{f}</button>
>
{f === 'All' ? t.docs.filterAll
: f === 'Ready' ? t.docs.filterReady
: f === 'Processing' ? t.docs.filterProcessing
: f === 'Failed' ? t.docs.filterFailed
: t.docs.filterPending}
</button>
))}
</div>
<select className="select-input" value={typeF} onChange={e => setTypeF(e.target.value)}>
{TYPE_OPTS.map(o => <option key={o}>{o}</option>)}
{typeOpts.map(o => (
<option key={o} value={o}>{o === 'All types' ? t.docs.filterAllTypes : o}</option>
))}
</select>
</div>
{/* Batch action bar */}
{selected.size > 0 && (
<div className="batch-bar">
<span>{selected.size} document{selected.size > 1 ? 's' : ''} selected</span>
<button className="btn sm">Analyze selected</button>
<button className="btn sm risk-btn">Delete selected</button>
<span>
{selected.size}{' '}
{t.docs.colName === 'Document name'
? `document${selected.size > 1 ? 's' : ''} selected`
: '份文档已选择'}
</span>
<button
className="btn sm"
style={{ color: 'var(--danger)', borderColor: 'rgba(239,68,68,.4)' }}
onClick={() => askDelete([...selected])}
>
<Trash2 size={12} />{t.docs.deleteSelected}
</button>
</div>
)}
{/* Table */}
<div className="docs-table">
<div className="table-header">
<input
@@ -130,36 +247,114 @@ export function DocsPage() {
checked={selected.size === filtered.length && filtered.length > 0}
onChange={toggleAll}
/>
<span>Document name</span>
<span>Status</span>
<span>Uploaded</span>
<span>Chunks</span>
<span>Type</span>
<span>Actions</span>
<span>{t.docs.colName}</span>
<span>{t.docs.colStatus}</span>
<span>{t.docs.colUploaded}</span>
<span>{t.docs.colChunks}</span>
<span>{t.docs.colSize}</span>
<span>{t.docs.colType}</span>
<span>{t.docs.colActions}</span>
</div>
{filtered.map(d => (
<div key={d.id} className={`table-row${selected.has(d.id) ? ' row-selected' : ''}`}>
{loading ? (
<div style={{ padding: '32px 16px', color: 'var(--muted)', fontSize: 13, textAlign: 'center' }}>
{t.docs.loading}
</div>
) : filtered.length === 0 ? (
<div style={{ padding: '40px 16px', color: 'var(--muted)', fontSize: 13, textAlign: 'center' }}>
{docs.length === 0 ? t.docs.emptyNoDocuments : t.docs.emptyNoMatch}
</div>
) : (
filtered.map(d => {
const isDeleting = deleting.has(d.id);
const isRetrying = retrying.has(d.id);
return (
<div
key={d.id}
className={`table-row${selected.has(d.id) ? ' row-selected' : ''}${isDeleting ? ' row-deleting' : ''}`}
>
<input
type="checkbox"
checked={selected.has(d.id)}
onChange={() => toggleOne(d.id)}
disabled={isDeleting}
/>
<span className="doc-name-cell">{d.name}</span>
<span className="doc-name-cell" title={d.summary || d.name}>
{d.name}
{d.version && <span style={{ fontSize: 10, color: 'var(--muted)', marginLeft: 6 }}>v{d.version}</span>}
</span>
<span><span className={`status ${d.status}`}>{STATUS_LABEL[d.status]}</span></span>
<span className="cell-mono">{d.uploadedAt}</span>
<span className="cell-mono">{d.chunks || '—'}</span>
<span className="cell-mono">{formatSize(d.sizeBytes)}</span>
<span className="cell-muted">{d.type}</span>
<span className="row-actions">
<button className="text-link">Inspect</button>
<button className="text-link">Analyze</button>
{d.status === 'risk' && <button className="text-link danger-link">Resolve</button>}
{/* Download */}
<button
className="text-link"
title={t.docs.titleDownload}
onClick={() => downloadDoc(d.id, d.name)}
>
<Download size={12} />
</button>
{/* Retry for failed */}
{d.status === 'risk' && (
<button
className="text-link"
title={t.docs.titleRetry}
disabled={isRetrying}
onClick={() => retryDoc(d.id)}
style={{ color: 'var(--warn)' }}
>
<RefreshCw size={12} style={{ animation: isRetrying ? 'spin 1s linear infinite' : 'none' }} />
</button>
)}
{/* Delete */}
<button
className="text-link danger-link"
title={t.docs.titleDelete}
disabled={isDeleting}
onClick={() => askDelete([d.id])}
>
<Trash2 size={12} />
</button>
</span>
</div>
))}
</div>
);
})
)}
</div>
{showUpload && <UploadModal onClose={() => setShowUpload(false)} />}
{/* Footer count */}
{!loading && (
<div style={{ padding: '10px 0', fontSize: 12, color: 'var(--muted)' }}>
{filtered.length} of {docs.length} document{docs.length !== 1 ? 's' : ''}
{selected.size > 0 && ` · ${selected.size} selected`}
</div>
)}
</div>
{/* Confirm delete dialog */}
{confirmDelete && (
<ConfirmDialog
message={
confirmDelete.ids.length === 1
? `Delete "${confirmDelete.names[0]}"? This will remove the document, all its chunks, and embeddings from the vector store. This action cannot be undone.`
: `Delete ${confirmDelete.ids.length} documents? This will remove them and all their chunks from the vector store. This action cannot be undone.`
}
onConfirm={confirmDeleteDocs}
onCancel={() => setConfirmDelete(null)}
/>
)}
{showUpload && (
<UploadModal
onClose={() => setShowUpload(false)}
onComplete={() => setRefreshKey(k => k + 1)}
/>
)}
</div>
);
}

View File

@@ -1,8 +1,15 @@
import { useState, useRef, useCallback } from 'react';
import { X, Upload } from 'lucide-react';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface Props {
onClose: () => void;
onComplete?: () => void; // called when all uploads finish (indexed)
}
const REG_TYPES = ['EU Regulation', 'ISO Standard', 'National Draft', 'Internal Policy'];
@@ -43,12 +50,7 @@ function docStatusToStages(status: DocStatus): StageState[] {
}
}
// Generate a short unique ID client-side (matches backend's 8-char uuid prefix pattern)
function genDocId(): string {
return Math.random().toString(36).slice(2, 10);
}
export function UploadModal({ onClose }: Props) {
export function UploadModal({ onClose, onComplete }: Props) {
const [files, setFiles] = useState<File[]>([]);
const [regType, setRegType] = useState(REG_TYPES[0]);
const [version, setVersion] = useState('');
@@ -92,13 +94,14 @@ export function UploadModal({ onClose }: Props) {
const pollStatus = useCallback((docId: string, resolve: () => void, reject: (msg: string) => void) => {
let attempts = 0;
const MAX_ATTEMPTS = 120; // 4 minutes at 2s interval
const MAX_ATTEMPTS = 450; // 15 minutes at 2s interval — Aliyun DocMind can take several minutes
stopPolling();
pollTimer.current = setInterval(async () => {
attempts++;
try {
const res = await fetch(`/api/v1/documents/status/${docId}`);
const res = await fetch(`/api/v1/documents/status/${docId}`, { headers: authHeader() });
if (!res.ok) {
// Transient HTTP error (e.g. 502 during restart) — keep polling until timeout.
if (attempts > MAX_ATTEMPTS) { stopPolling(); reject('Polling timeout'); }
return;
}
@@ -113,7 +116,7 @@ export function UploadModal({ onClose }: Props) {
reject(data.message ?? 'Processing failed');
} else if (attempts > MAX_ATTEMPTS) {
stopPolling();
reject('Processing timeout — check Document Management for status');
reject('Processing timeout (15 min) — check Document Management for status');
}
} catch {
// network hiccup — keep polling
@@ -126,37 +129,42 @@ export function UploadModal({ onClose }: Props) {
setCurrentFileIdx(idx);
setDocStatus('idle');
const docId = genDocId();
const form = new FormData();
form.append('file', file);
form.append('doc_id', docId);
form.append('doc_name', file.name);
form.append('regulation_type', regType);
if (version) form.append('version', version);
form.append('generate_summary', 'false');
// Fire upload this is a long-running synchronous call on the backend.
// We start polling immediately so the UI updates as the backend writes status transitions.
const uploadPromise = fetch('/api/v1/documents/upload', { method: 'POST', body: form });
// Start polling after a short delay so the backend has time to create the document record
await new Promise<void>((res, rej) => {
const reject = (msg: string) => rej(new Error(msg));
// Begin polling immediately — backend creates the record synchronously before processing
setTimeout(() => pollStatus(docId, res, reject), 800);
// Also handle the upload response (in case processing finishes before poll catches it)
uploadPromise.then(async httpRes => {
if (!httpRes.ok) {
const detail = await httpRes.text().catch(() => httpRes.statusText);
stopPolling();
reject(`${file.name}: ${httpRes.status} ${detail}`);
}
// Upload succeeded — polling will catch the final status
}).catch(err => {
stopPolling();
reject(err instanceof Error ? err.message : 'Upload error');
// Upload the file first — response contains the authoritative doc_id.
// Without waiting here we risk polling an ID the server has not yet created.
let docId: string;
const uploadRes = await fetch('/api/v1/documents/upload', {
method: 'POST',
headers: authHeader(),
body: form,
});
if (!uploadRes.ok) {
const detail = await uploadRes.text().catch(() => uploadRes.statusText);
throw new Error(`${file.name}: ${uploadRes.status} ${detail}`);
}
const uploadData = await uploadRes.json() as { doc_id: string; status: string };
docId = uploadData.doc_id;
// If backend processed synchronously (sync=true or status already 'indexed'), resolve immediately.
if (uploadData.status === 'indexed') {
setDocStatus('indexed');
return;
}
if (uploadData.status === 'failed') {
setDocStatus('failed');
throw new Error(`${file.name}: Processing failed on server`);
}
// Otherwise start polling the authoritative doc_id returned by the server.
setDocStatus(uploadData.status as DocStatus);
await new Promise<void>((res, rej) => {
pollStatus(docId, res, (msg: string) => rej(new Error(msg)));
});
}
@@ -182,6 +190,7 @@ export function UploadModal({ onClose }: Props) {
setAllDone(true);
setDocStatus('idle');
setCurrentFileIdx(-1);
onComplete?.(); // notify parent to refresh doc list
}
// Compute queue stage display for the currently-processing file

View File

@@ -0,0 +1,80 @@
import React, { FormEvent, useState } from 'react';
import { useAuth } from '../../contexts';
export function LoginPage() {
const { login } = useAuth();
const [username, setUsername] = useState('');
const [password, setPassword] = useState('');
const [error, setError] = useState('');
const [loading, setLoading] = useState(false);
async function handleSubmit(e: FormEvent) {
e.preventDefault();
if (!username.trim() || !password.trim()) return;
setError('');
setLoading(true);
try {
await login(username.trim(), password);
} catch (err) {
setError(err instanceof Error ? err.message : 'Login failed');
} finally {
setLoading(false);
}
}
return (
<div className="login-page">
<div className="login-card">
<div className="login-brand">
<img src="/company-logo.ico" alt="T-Systems" className="login-logo" />
<div className="login-brand-text">
<div className="login-brand-name">T-Systems</div>
<div className="login-brand-sub">AI Regulation Hub</div>
</div>
</div>
<h2 className="login-title">Sign in</h2>
<form onSubmit={handleSubmit} className="login-form">
<div className="login-field">
<label className="login-label" htmlFor="username">Username</label>
<input
id="username"
type="text"
className="login-input"
value={username}
onChange={e => setUsername(e.target.value)}
autoFocus
autoComplete="username"
disabled={loading}
placeholder="e.g. admin"
/>
</div>
<div className="login-field">
<label className="login-label" htmlFor="password">Password</label>
<input
id="password"
type="password"
className="login-input"
value={password}
onChange={e => setPassword(e.target.value)}
autoComplete="current-password"
disabled={loading}
/>
</div>
{error && <p className="login-error">{error}</p>}
<button type="submit" className="login-btn" disabled={loading}>
{loading ? 'Signing in…' : 'Sign in'}
</button>
</form>
<p className="login-hint">
Demo accounts: <code>admin</code> / <code>legal</code> / <code>ehs</code> / <code>readonly</code>
</p>
</div>
</div>
);
}

View File

@@ -1,41 +1,46 @@
import { useNavigate } from 'react-router-dom';
import { ArrowRight, BarChart2, Eye, FileText, Shield, MessageSquare, Monitor } from 'lucide-react';
const SCREENS = [
{ id: 'status', label: 'System Status', icon: <Monitor size={20} />, to: '/status', desc: 'Live health and workflow queue' },
{ id: 'signals', label: 'Regulatory Signals', icon: <Eye size={20} />, to: '/signals', desc: 'AI-detected regulatory changes' },
{ id: 'documents', label: 'Document Management', icon: <FileText size={20} />, to: '/documents', desc: 'Upload and inspect documents' },
{ id: 'compliance', label: 'Compliance Analysis', icon: <Shield size={20} />, to: '/compliance', desc: 'Three-column compliance workspace' },
{ id: 'chat', label: 'Regulation Q&A', icon: <MessageSquare size={20} />, to: '/chat', desc: 'Chat with cited regulation sources' },
{ id: 'analytics', label: 'Analytics', icon: <BarChart2 size={20} />, to: '/status', desc: 'KPIs and coverage metrics' },
];
const STEPS = [
{ num: '01', label: 'Upload', desc: 'Ingest regulation documents' },
{ num: '02', label: 'Process', desc: 'Embed and chunk via vector DB' },
{ num: '03', label: 'Monitor', desc: 'Watch regulatory signal feed' },
{ num: '04', label: 'Analyze', desc: 'Run compliance gap analysis' },
{ num: '05', label: 'Review', desc: 'Inspect findings with AI assist' },
{ num: '06', label: 'Chat', desc: 'Ask questions with cited answers' },
];
import { useLanguage } from '../../contexts/LanguageContext';
export function OverviewPage() {
const navigate = useNavigate();
const { t } = useLanguage();
const SCREENS = [
{ id: 'status', label: t.overview.screenStatus, icon: <Monitor size={20} />, to: '/status', desc: t.overview.screenStatusDesc },
{ id: 'signals', label: t.overview.screenSignals, icon: <Eye size={20} />, to: '/signals', desc: t.overview.screenSignalsDesc },
{ id: 'documents', label: t.overview.screenDocuments, icon: <FileText size={20} />, to: '/documents', desc: t.overview.screenDocumentsDesc },
{ id: 'compliance', label: t.overview.screenCompliance, icon: <Shield size={20} />, to: '/compliance', desc: t.overview.screenComplianceDesc },
{ id: 'chat', label: t.overview.screenChat, icon: <MessageSquare size={20} />, to: '/chat', desc: t.overview.screenChatDesc },
{ id: 'analytics', label: t.overview.screenAnalytics, icon: <BarChart2 size={20} />, to: '/status', desc: t.overview.screenAnalyticsDesc },
];
const STEPS = [
{ num: '01', label: t.overview.stepUpload, desc: t.overview.stepUploadDesc },
{ num: '02', label: t.overview.stepProcess, desc: t.overview.stepProcessDesc },
{ num: '03', label: t.overview.stepMonitor, desc: t.overview.stepMonitorDesc },
{ num: '04', label: t.overview.stepAnalyze, desc: t.overview.stepAnalyzeDesc },
{ num: '05', label: t.overview.stepReview, desc: t.overview.stepReviewDesc },
{ num: '06', label: t.overview.stepChat, desc: t.overview.stepChatDesc },
];
return (
<div className="overview-scroll-wrapper">
<div className="overview-page">
<section className="overview-hero">
<p className="hero-eyebrow">T-Systems · AI Regulation Hub</p>
<h1 className="hero-title">AI Compliance,<br />Automated end-to-end</h1>
<p className="hero-desc">
Monitor global AI regulations, analyze document compliance gaps,
and get cited answers all in one platform.
</p>
<p className="hero-eyebrow">{t.overview.eyebrow}</p>
<h1 className="hero-title">
{t.overview.heroTitle.split('\n').map((line, i) => (
<span key={i}>{line}{i === 0 && <br />}</span>
))}
</h1>
<p className="hero-desc">{t.overview.heroDesc}</p>
<div className="hero-actions">
<button className="btn primary" onClick={() => navigate('/status')}>
Open dashboard <ArrowRight size={14} />
{t.overview.openDashboard} <ArrowRight size={14} />
</button>
<button className="btn" onClick={() => navigate('/chat')}>
Jump to regulation chat
{t.overview.jumpToChat}
</button>
</div>
</section>
@@ -43,22 +48,22 @@ export function OverviewPage() {
<div className="overview-summary card">
<div className="summary-item">
<span className="summary-num">6</span>
<span className="summary-label">Screens</span>
<span className="summary-label">{t.overview.statScreens}</span>
</div>
<div className="summary-divider" />
<div className="summary-item">
<span className="summary-num">5</span>
<span className="summary-label">Backend-aware flows</span>
<span className="summary-label">{t.overview.statFlows}</span>
</div>
<div className="summary-divider" />
<div className="summary-item">
<span className="summary-num">AI</span>
<span className="summary-label">Review posture</span>
<span className="summary-label">{t.overview.statReviewPosture}</span>
</div>
</div>
<section className="overview-workflow">
<h2 className="section-title">How it works</h2>
<h2 className="section-title">{t.overview.sectionHowItWorks}</h2>
<div className="workflow-steps">
{STEPS.map(s => (
<div key={s.num} className="workflow-step">
@@ -71,7 +76,7 @@ export function OverviewPage() {
</section>
<section className="overview-screens">
<h2 className="section-title">Screens</h2>
<h2 className="section-title">{t.overview.sectionScreens}</h2>
<div className="screen-grid">
{SCREENS.map(s => (
<button key={s.id} className="screen-card card" onClick={() => navigate(s.to)}>
@@ -83,5 +88,6 @@ export function OverviewPage() {
</div>
</section>
</div>
</div>
);
}

View File

@@ -1,17 +1,14 @@
import { useState, useEffect, useRef } from 'react';
import { Topbar } from '../../components/layout/Topbar';
import { RefreshCw, Play, Square, ExternalLink } from 'lucide-react';
import { usePageState } from '../../contexts';
import type { PerceptionSignal } from '../../contexts';
import { useLanguage } from '../../contexts/LanguageContext';
interface Signal {
id: string;
source: string;
standard: string;
status: 'ok' | 'warn' | 'risk' | 'info';
title: string;
summary: string;
date: string;
tags: string[];
impact: 'High' | 'Medium' | 'Low';
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
interface Stats {
@@ -21,29 +18,17 @@ interface Stats {
last_90_days: number;
}
interface DocResult {
score: number;
name: string;
clause: string;
snippet: string;
}
const SOURCES = ['All', 'MIIT', 'UN-ECE', 'ISO', 'GB Comm.', 'EUR-Lex', 'IATF'];
const IMPACTS = ['All', 'High', 'Medium', 'Low'];
// Backend /api/v1/perception/stats returns:
// { total, high_impact, medium_impact, last_90_days } — field names match, ✓
// Backend /api/v1/perception/events returns:
// { events: [{ id, title, summary, source, standard, impact_level, published_at, tags, status }] }
// Map backend event fields → frontend Signal shape
function mapEvent(e: Record<string, unknown>): Signal {
// Backend event → Signal
function mapEvent(e: Record<string, unknown>): PerceptionSignal {
const impact = String(e.impact_level ?? '').toLowerCase();
const backendStatus = String(e.status ?? '').toLowerCase();
return {
id: String(e.id ?? e.event_id ?? ''),
source: String(e.source ?? ''),
standard: String(e.standard ?? e.regulation_id ?? ''),
standard: String(e.standard ?? e.standard_code ?? e.regulation_id ?? ''),
status: backendStatus === 'high' || backendStatus === 'urgent' ? 'risk'
: backendStatus === 'medium' || backendStatus === 'draft' ? 'warn'
: backendStatus === 'low' || backendStatus === 'final' ? 'ok'
@@ -56,67 +41,78 @@ function mapEvent(e: Record<string, unknown>): Signal {
};
}
const MOCK_SIGNALS: Signal[] = [
const MOCK_SIGNALS: PerceptionSignal[] = [
{
id: '1', source: 'EUR-Lex', standard: 'EU/2024/1689', status: 'risk',
title: 'EU AI Act — High-risk AI in vehicles',
summary: 'Article 9 mandates risk management systems for automotive AI classifying as high-risk under Annex III point 3.',
date: '2025-11-18', tags: ['automotive', 'GDPR', 'certification'], impact: 'High'
date: '2025-11-18', tags: ['automotive', 'GDPR', 'certification'], impact: 'High',
},
{
id: '2', source: 'MIIT', standard: 'Draft-2025-08', status: 'warn',
title: 'MIIT Draft — in-vehicle AI training data',
summary: 'Draft regulation requires OEM data provenance documentation and OTA audit trails for AI systems.',
date: '2025-10-30', tags: ['OTA', 'data-governance', 'China'], impact: 'High'
date: '2025-10-30', tags: ['OTA', 'data-governance', 'China'], impact: 'High',
},
{
id: '3', source: 'ISO', standard: 'ISO/SAE 21434:2021/Amd1', status: 'info',
title: 'ISO/SAE 21434 Amendment 1',
summary: 'Amendment clarifies CSMS scope for software-only updates and vulnerability disclosure timelines.',
date: '2025-10-05', tags: ['cybersecurity', 'CSMS', 'ISO'], impact: 'Medium'
date: '2025-10-05', tags: ['cybersecurity', 'CSMS', 'ISO'], impact: 'Medium',
},
{
id: '4', source: 'UN-ECE', standard: 'UNECE WP.29 R155', status: 'ok',
title: 'UNECE R155 Corrigendum',
summary: 'Editorial corrections to cybersecurity management system requirements. No substantive changes.',
date: '2025-09-12', tags: ['type-approval', 'UNECE'], impact: 'Low'
},
];
const MOCK_DOCS: DocResult[] = [
{ score: 94, name: 'Vehicle AI Safety Manual v3.2', clause: '§4.2.1', snippet: 'The risk management process shall identify and evaluate risks arising from AI system decisions in safety-critical scenarios...' },
{ score: 87, name: 'ADAS System Requirements', clause: '§7.1', snippet: 'Automated driving functions must document training data lineage and model performance envelopes prior to deployment.' },
{ score: 71, name: 'Type Approval Documentation', clause: 'Annex B', snippet: 'Cybersecurity management system certification requires third-party audit of AI decision audit logs retention policy.' },
];
export function PerceptionPage() {
const [stats, setStats] = useState<Stats | null>(null);
const [signals, setSignals] = useState<Signal[]>(MOCK_SIGNALS);
const [searchQuery, setSearchQuery] = useState('');
const [sourceFilter, setSourceFilter] = useState('All');
const [impactFilter, setImpactFilter] = useState('All');
const [selected, setSelected] = useState<Signal | null>(null);
const [streaming, setStreaming] = useState(false);
const [aiOutput, setAiOutput] = useState('');
const abortRef = useRef<AbortController | null>(null);
const { t } = useLanguage();
// Persistent state lives in PageStateContext — survives route changes
const { perceptionState, setPerceptionState, perceptionAbortRef, perceptionCrawlAbortRef } = usePageState();
const { signals, searchQuery, sourceFilter, impactFilter, selectedId, aiOutput, detailTab, crawlStatus } = perceptionState;
// Stats and selectedFull are lightweight to re-fetch on mount
const [stats, setStats] = useState<Stats | null>(null);
const [streaming, setStreaming] = useState(false);
const [crawling, setCrawling] = useState(false);
// Full event detail — re-fetched when selected changes or page mounts with a selection
const [selectedFull, setSelectedFull] = useState<Record<string, unknown> | null>(null);
// Re-fetch stats every time the page mounts
useEffect(() => {
fetch('/api/v1/perception/stats')
fetch('/api/v1/perception/stats', { headers: authHeader() })
.then(r => r.json())
.then(setStats)
.catch(() => setStats({ total: 47, high_impact: 7, medium_impact: 18, last_90_days: 14 }));
}, []);
// Fetch signal list on first mount only (if empty), otherwise preserve context state
useEffect(() => {
fetch('/api/v1/perception/events?limit=100')
if (signals.length > 0) return; // already loaded
fetch('/api/v1/perception/events?limit=100', { headers: authHeader() })
.then(r => r.json())
.then(d => {
if (Array.isArray(d?.events) && d.events.length > 0) {
setSignals(d.events.map(mapEvent));
setPerceptionState(s => ({ ...s, signals: d.events.map(mapEvent) }));
} else {
setPerceptionState(s => ({ ...s, signals: MOCK_SIGNALS }));
}
})
.catch(() => { /* keep mock data on error */ });
}, []);
.catch(() => {
setPerceptionState(s => ({ ...s, signals: s.signals.length > 0 ? s.signals : MOCK_SIGNALS }));
});
}, []); // eslint-disable-line react-hooks/exhaustive-deps
// Re-fetch full event detail when navigating back with a selected signal
useEffect(() => {
if (selectedId) {
fetch(`/api/v1/perception/events/${selectedId}`, { headers: authHeader() })
.then(r => r.ok ? r.json() : null)
.then(d => { if (d) setSelectedFull(d); })
.catch(() => {});
} else {
setSelectedFull(null);
}
}, [selectedId]);
const selected = signals.find(s => s.id === selectedId) ?? null;
const filtered = signals.filter(s => {
if (sourceFilter !== 'All' && s.source !== sourceFilter) return false;
@@ -131,13 +127,20 @@ export function PerceptionPage() {
function runAnalysis() {
if (!selected) return;
setStreaming(true);
setAiOutput('');
setPerceptionState(s => ({ ...s, aiOutput: '' }));
const ctrl = new AbortController();
abortRef.current = ctrl;
// Backend: POST /api/v1/perception/events/{id}/analyze → SSE stream
fetch(`/api/v1/perception/events/${selected.id}/analyze`, { method: 'POST', signal: ctrl.signal })
perceptionAbortRef.current = ctrl;
fetch(`/api/v1/perception/events/${selected.id}/analyze`, {
method: 'POST',
headers: authHeader(),
signal: ctrl.signal,
})
.then(async res => {
if (!res.body) { setAiOutput('No stream available.'); setStreaming(false); return; }
if (!res.body) {
setPerceptionState(s => ({ ...s, aiOutput: 'No stream available.' }));
setStreaming(false);
return;
}
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '';
@@ -154,47 +157,124 @@ export function PerceptionPage() {
if (!raw || raw === '[DONE]') continue;
try {
const j = JSON.parse(raw);
if (j.text) setAiOutput(p => p + j.text);
else if (typeof j === 'string') setAiOutput(p => p + j);
if (j.text) setPerceptionState(s => ({ ...s, aiOutput: s.aiOutput + j.text }));
else if (typeof j === 'string') setPerceptionState(s => ({ ...s, aiOutput: s.aiOutput + j }));
} catch {
setAiOutput(p => p + raw);
setPerceptionState(s => ({ ...s, aiOutput: s.aiOutput + raw }));
}
}
}
setStreaming(false);
})
.catch(e => {
if (e.name !== 'AbortError') setAiOutput('Analysis failed. Check API connection.');
if (e.name !== 'AbortError') setPerceptionState(s => ({ ...s, aiOutput: 'Analysis failed. Check API connection.' }));
setStreaming(false);
});
}
function stopAnalysis() {
abortRef.current?.abort();
perceptionAbortRef.current?.abort();
setStreaming(false);
}
function selectSignal(sig: Signal) {
setSelected(sig);
setAiOutput('');
async function runCrawl() {
setCrawling(true);
setPerceptionState(s => ({ ...s, crawlStatus: t.signals.statusConnecting }));
try {
const res = await fetch('/api/v1/perception/crawl', {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...authHeader() },
body: JSON.stringify({}),
});
if (!res.body) {
setPerceptionState(s => ({ ...s, crawlStatus: 'No stream' }));
setCrawling(false);
return;
}
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buf += dec.decode(value);
const parts = buf.split('\n\n');
buf = parts.pop() ?? '';
for (const block of parts) {
const eventLine = block.split('\n').find(l => l.startsWith('event: '));
const dataLine = block.split('\n').find(l => l.startsWith('data: '));
const evtName = eventLine?.slice(7).trim();
const raw = dataLine?.slice(6).trim();
if (!raw) continue;
try {
const d = JSON.parse(raw);
if (evtName === 'progress') {
setPerceptionState(s => ({
...s,
crawlStatus: `${d.source}: ${d.stage === 'fetching' ? t.signals.statusCrawling : d.stage === 'processing' ? t.signals.statusProcessing.replace('{count}', String(d.fetched)) : t.signals.statusComplete.replace('{count}', String(d.new))}`,
}));
} else if (evtName === 'done') {
setPerceptionState(s => ({ ...s, crawlStatus: t.signals.statusUpdateComplete.replace('{new}', String(d.total_new)).replace('{updated}', String(d.total_updated)) }));
fetch('/api/v1/perception/events?limit=100', { headers: authHeader() })
.then(r => r.json())
.then(d2 => {
if (Array.isArray(d2?.events)) {
setPerceptionState(s => ({ ...s, signals: d2.events.map(mapEvent) }));
}
});
} else if (evtName === 'error') {
setPerceptionState(s => ({
...s,
crawlStatus: t.signals.statusError.replace('{message}', typeof d === 'string' ? d : String(d.message)),
}));
}
} catch { /* ignore */ }
}
}
} catch (e: unknown) {
setPerceptionState(s => ({
...s,
crawlStatus: t.signals.statusConnFailed.replace('{message}', e instanceof Error ? e.message : String(e)),
}));
}
setCrawling(false);
}
function selectSignal(sig: PerceptionSignal) {
setPerceptionState(s => ({
...s,
selectedId: sig.id,
aiOutput: '',
detailTab: 'overview',
}));
setSelectedFull(null);
setStreaming(false);
perceptionAbortRef.current?.abort();
}
return (
<div className="perception-page">
<Topbar
title="Regulatory Signals"
subtitle="ai-powered · live feed"
title={t.signals.topbarTitle}
subtitle={t.signals.topbarSub}
actions={
<>
<div className="search-box">
<input
placeholder="Search signals..."
placeholder={t.signals.searchPlaceholder}
value={searchQuery}
onChange={e => setSearchQuery(e.target.value)}
onChange={e => setPerceptionState(s => ({ ...s, searchQuery: e.target.value }))}
/>
</div>
<button className="btn sm"><RefreshCw size={13} />Refresh</button>
<button className="btn sm primary" onClick={runCrawl} disabled={crawling}>
<RefreshCw size={13} className={crawling ? 'spin' : ''} />
{crawling ? t.signals.crawlingBtn : t.signals.refreshBtn}
</button>
{crawlStatus && (
<span style={{ fontSize: 12, color: 'var(--text-secondary)', marginLeft: 8 }}>
{crawlStatus}
</span>
)}
</>
}
/>
@@ -202,32 +282,44 @@ export function PerceptionPage() {
<div className="stats-bar">
<div className="sbar-cell">
<span className="sbar-val">{stats?.total ?? '—'}</span>
<span className="sbar-lbl">Total signals</span>
<span className="sbar-lbl">{t.signals.statTotal}</span>
</div>
<div className="sbar-cell danger">
<span className="sbar-val">{stats?.high_impact ?? '—'}</span>
<span className="sbar-lbl">High impact</span>
<span className="sbar-lbl">{t.signals.statHigh}</span>
</div>
<div className="sbar-cell warn">
<span className="sbar-val">{stats?.medium_impact ?? '—'}</span>
<span className="sbar-lbl">Medium impact</span>
<span className="sbar-lbl">{t.signals.statMedium}</span>
</div>
<div className="sbar-cell accent">
<span className="sbar-val">{stats?.last_90_days ?? '—'}</span>
<span className="sbar-lbl">Last 90 days</span>
<span className="sbar-lbl">{t.signals.statLast90}</span>
</div>
</div>
<div className="filter-bar">
<div className="chip-group">
{SOURCES.map(s => (
<button key={s} className={`chip${sourceFilter === s ? ' active' : ''}`} onClick={() => setSourceFilter(s)}>{s}</button>
<button
key={s}
className={`chip${sourceFilter === s ? ' active' : ''}`}
onClick={() => setPerceptionState(st => ({ ...st, sourceFilter: s }))}
>
{s}
</button>
))}
</div>
<div className="filter-sep" />
<div className="chip-group">
{IMPACTS.map(i => (
<button key={i} className={`chip${impactFilter === i ? ' active' : ''}`} onClick={() => setImpactFilter(i)}>{i}</button>
<button
key={i}
className={`chip${impactFilter === i ? ' active' : ''}`}
onClick={() => setPerceptionState(st => ({ ...st, impactFilter: i }))}
>
{i}
</button>
))}
</div>
</div>
@@ -237,21 +329,21 @@ export function PerceptionPage() {
{filtered.map(sig => (
<div
key={sig.id}
className={`ev-card${selected?.id === sig.id ? ' selected' : ''}`}
className={`ev-card${selectedId === sig.id ? ' selected' : ''}`}
onClick={() => selectSignal(sig)}
>
<div className="ev-top">
<span className="source-tag">{sig.source}</span>
<span className="ev-std">{sig.standard}</span>
<span className={`status ${sig.status}`}>
{sig.status === 'ok' ? 'Final' : sig.status === 'warn' ? 'Draft' : sig.status === 'risk' ? 'Urgent' : 'Published'}
{sig.status === 'ok' ? t.signals.badgeFinal : sig.status === 'warn' ? t.signals.badgeDraft : sig.status === 'risk' ? t.signals.badgeUrgent : t.signals.badgePublished}
</span>
</div>
<div className="ev-title">{sig.title}</div>
<div className="ev-summary">{sig.summary}</div>
<div className="ev-bottom">
<span className="ev-date">{sig.date}</span>
<div className="ev-tags">{sig.tags.map(t => <span key={t} className="ev-tag">{t}</span>)}</div>
<div className="ev-tags">{sig.tags.map(tag => <span key={tag} className="ev-tag">{tag}</span>)}</div>
<span className={`impact-dot impact-${sig.impact.toLowerCase()}`}>{sig.impact}</span>
</div>
</div>
@@ -262,7 +354,7 @@ export function PerceptionPage() {
{!selected ? (
<div className="analysis-empty">
<div className="empty-ring" />
<p>Select a signal to run impact analysis</p>
<p>{t.signals.emptySelectSignal}</p>
</div>
) : (
<>
@@ -271,36 +363,176 @@ export function PerceptionPage() {
<span className="source-tag">{selected.source}</span>
<span className="ev-std">{selected.standard}</span>
<span className={`status ${selected.status}`}>
{selected.status === 'risk' ? 'Urgent' : 'Published'}
{selected.status === 'risk' ? t.signals.badgeUrgent : selected.status === 'warn' ? t.signals.badgeDraft : t.signals.badgePublished}
</span>
{selectedFull?.change_summary && (
<span className="status warn" style={{ marginLeft: 'auto' }}>CHANGED</span>
)}
</div>
<div className="detail-title">{selected.title}</div>
<p className="detail-summary">{selected.summary}</p>
<div className="detail-actions">
{!streaming
? <button className="btn sm primary" onClick={runAnalysis}><Play size={12} />Run impact analysis</button>
: <button className="btn sm" onClick={stopAnalysis}><Square size={12} />Stop</button>
? <button className="btn sm primary" onClick={runAnalysis}><Play size={12} />{t.signals.runAnalysis}</button>
: <button className="btn sm" onClick={stopAnalysis}><Square size={12} />{t.signals.stopBtn}</button>
}
<button className="btn sm"><ExternalLink size={12} />Source</button>
{selected && (
<a
href={(selectedFull?.full_text_url as string) || '#'}
target="_blank"
rel="noopener noreferrer"
className="btn sm"
>
<ExternalLink size={12} />{t.signals.sourceLink}
</a>
)}
</div>
</div>
<div className="card docs-card">
<div className="card-header">Affected documents</div>
{MOCK_DOCS.map(d => (
<div key={d.name} className="doc-row">
<span className="doc-score">{d.score}%</span>
<div>
<div className="doc-name">{d.name} <span className="doc-clause">{d.clause}</span></div>
<div className="doc-snippet">{d.snippet}</div>
</div>
</div>
<div className="detail-tabs">
{(['overview', 'obligations', 'assessment', 'diff'] as const).map(tab => (
<button
key={tab}
className={`detail-tab${detailTab === tab ? ' active' : ''}${tab === 'diff' && !selectedFull?.change_summary ? ' disabled' : ''}`}
onClick={() => {
if (tab !== 'diff' || selectedFull?.change_summary) {
setPerceptionState(s => ({ ...s, detailTab: tab }));
}
}}
>
{tab === 'overview' ? t.signals.tabOverview : tab === 'obligations' ? t.signals.tabObligations : tab === 'assessment' ? t.signals.tabImpact : t.signals.tabChanges}
</button>
))}
</div>
{detailTab === 'overview' && (
<div className="card">
<div className="card-header">{t.signals.cardScopeHeader}</div>
<p className="detail-summary" style={{ marginTop: 8 }}>
{(selectedFull?.scope as string) || selected.summary}
</p>
{selectedFull?.penalties && (
<p style={{ fontSize: 13, color: 'var(--danger)', marginTop: 6 }}>
{selectedFull.penalties as string}
</p>
)}
</div>
)}
{detailTab === 'obligations' && (
<div className="card">
<div className="card-header">{t.signals.cardObligationsHeader}</div>
{(() => {
const obs = (selectedFull?.obligations as Array<Record<string, string>>) || [];
const deadlines = (selectedFull?.deadlines as Array<Record<string, string>>) || [];
return obs.length === 0 && deadlines.length === 0 ? (
<p className="detail-summary" style={{ marginTop: 8 }}>{t.signals.obligationsEmpty}</p>
) : (
<>
{obs.length > 0 && (
<table style={{ width: '100%', fontSize: 13, borderCollapse: 'collapse', marginTop: 8 }}>
<thead>
<tr style={{ borderBottom: '1px solid var(--border)' }}>
<th style={{ textAlign: 'left', padding: '4px 8px' }}>{t.signals.colObligationDesc}</th>
<th style={{ textAlign: 'left', padding: '4px 8px', width: 80 }}>{t.signals.colSubject}</th>
<th style={{ textAlign: 'left', padding: '4px 8px', width: 60 }}>{t.signals.colType}</th>
</tr>
</thead>
<tbody>
{obs.map((ob, i) => (
<tr key={i} style={{ borderBottom: '1px solid var(--border-faint)' }}>
<td style={{ padding: '6px 8px' }}>{ob.text}</td>
<td style={{ padding: '6px 8px', color: 'var(--text-secondary)' }}>{ob.subject}</td>
<td style={{ padding: '6px 8px' }}>
<span className={`status ${ob.deontic === 'must' || ob.deontic === 'shall' ? 'risk' : ob.deontic === 'prohibited' ? 'risk' : 'info'}`}>
{ob.deontic}
</span>
</td>
</tr>
))}
</tbody>
</table>
)}
{deadlines.length > 0 && (
<div style={{ marginTop: 12 }}>
<div className="card-header">{t.signals.colDeadline}</div>
{deadlines.map((d, i) => (
<div key={i} style={{ fontSize: 13, padding: '4px 0', display: 'flex', gap: 12 }}>
<span style={{ fontWeight: 600, color: 'var(--danger)' }}>{d.date || t.signals.deadlinePending}</span>
<span style={{ color: 'var(--text-secondary)' }}>{d.description}</span>
</div>
))}
</div>
)}
</>
);
})()}
</div>
)}
{detailTab === 'assessment' && (
<div className="card docs-card">
<div className="card-header">{t.signals.cardAffectedDocs}</div>
{(() => {
const docs = (selectedFull?.affected_docs as Array<Record<string, unknown>>);
const displayDocs = docs && docs.length > 0 ? docs : [];
return displayDocs.length === 0
? <p className="detail-summary" style={{ marginTop: 8 }}>{t.signals.noAffectedDocs}</p>
: displayDocs.map((d, i) => (
<div key={i} className="doc-row">
<span className="doc-score">{Math.round(Number(d.score ?? 0) * 100)}%</span>
<div>
<div className="doc-name">
{String(d.doc_name || '')}
<span className="doc-clause">{String(d.key_clauses || d.clause || '')}</span>
</div>
{d.snippet && <div className="doc-snippet">{String(d.snippet)}</div>}
{d.recommendation && (
<div style={{ fontSize: 12, color: 'var(--accent)', marginTop: 2 }}> {String(d.recommendation)}</div>
)}
</div>
</div>
));
})()}
</div>
)}
{detailTab === 'diff' && selectedFull?.change_summary && (
<div className="card">
<div className="card-header">{t.signals.diffCardHeader}</div>
<p style={{ fontSize: 13, color: 'var(--text-secondary)', marginTop: 8 }}>
{selectedFull.change_summary as string}
</p>
{(() => {
const sections = (selectedFull.changed_sections as Array<Record<string, unknown>>) || [];
return sections.map((s, i) => (
<div key={i} style={{ marginTop: 12, borderTop: '1px solid var(--border)', paddingTop: 10 }}>
<div style={{ display: 'flex', gap: 8, marginBottom: 6 }}>
<span className={`status ${s.change_type === 'tightened' || s.change_type === 'added' ? 'risk' : s.change_type === 'removed' ? 'warn' : 'info'}`}>
{String(s.change_type)}
</span>
<span style={{ fontSize: 12, color: 'var(--text-secondary)' }}>cosine: {String(s.similarity)}</span>
</div>
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8, fontSize: 12 }}>
<div style={{ background: 'var(--danger-bg)', padding: 8, borderRadius: 4 }}>
<div style={{ fontWeight: 600, marginBottom: 4 }}>{t.signals.diffOld}</div>
{String(s.old_text || '')}
</div>
<div style={{ background: 'var(--success-bg)', padding: 8, borderRadius: 4 }}>
<div style={{ fontWeight: 600, marginBottom: 4 }}>{t.signals.diffNew}</div>
{String(s.new_text || '')}
</div>
</div>
{s.summary && <p style={{ fontSize: 12, marginTop: 6, color: 'var(--text-secondary)' }}>{String(s.summary)}</p>}
</div>
));
})()}
</div>
)}
{(aiOutput || streaming) && (
<div className="card ai-card">
<div className="card-header">AI Impact Analysis</div>
<div className="card-header">{t.signals.cardAIImpact}</div>
<div className="ai-output">
{aiOutput}
{streaming && <span className="blink-cursor"></span>}
@@ -314,7 +546,7 @@ export function PerceptionPage() {
<footer className="page-footer">
<div className="live-dot" />
<span>Live feed · Regulation Hub</span>
<span>{t.signals.footerText}</span>
</footer>
</div>
);

View File

@@ -1,27 +1,18 @@
import { useState, useRef, useEffect, useCallback } from 'react';
import { useRef, useEffect, useCallback, useState } from 'react';
import { Topbar } from '../../components/layout/Topbar';
import { Send, Download } from 'lucide-react';
import { usePageState } from '../../contexts';
import type { RagCitation } from '../../contexts';
import { useLanguage } from '../../contexts/LanguageContext';
interface Message {
id: string;
role: 'user' | 'assistant';
text: string;
// citation indices mentioned in this assistant message (1-based, matching citations array)
citationRefs?: number[];
}
interface Citation {
index: number; // 1-based, matches [N] markers in text
score: number; // 0100 display percentage
name: string; // doc_name
clause: string; // section_title or clause
snippet: string; // preview text
docId?: string;
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
// Map a raw source doc from the backend "retrieved" event to our Citation shape.
// Backend fields: { id, score(0-1), preview, doc_name, clause, doc_id }
function mapSource(s: Record<string, unknown>, idx: number): Citation {
function mapSource(s: Record<string, unknown>, idx: number): RagCitation {
const rawScore = typeof s.score === 'number' ? s.score : 0;
const displayScore = rawScore <= 1 ? Math.round(rawScore * 100) : Math.round(rawScore);
return {
@@ -67,27 +58,24 @@ const MOCK_QUICK = [
];
export function RagChatPage() {
const [messages, setMessages] = useState<Message[]>([
{
id: 'init', role: 'assistant',
text: 'Hello! I can answer questions about your indexed regulations and compliance documents. Try asking about EU AI Act requirements, MIIT rules, or ISO/SAE 21434 scope.',
}
]);
const [quickPrompts, setQuickPrompts] = useState<string[]>(MOCK_QUICK);
const [input, setInput] = useState('');
const [streaming, setStreaming] = useState(false);
const [citations, setCitations] = useState<Citation[]>([]);
// All persistent state lives in PageStateContext — survives route changes
const { ragState, setRagState, ragStreamingRef, ragAbortRef } = usePageState();
const { t } = useLanguage();
const { messages, citations, sessionId, inputDraft } = ragState;
// Local-only UI state: highlighted citation and streaming indicator
// These are fine to reset on navigation since they're transient UI feedback
const [highlightedCit, setHighlightedCit] = useState<number | null>(null);
const [sessionId, setSessionId] = useState<string | null>(null);
const [streaming, setStreaming] = useState(ragStreamingRef.current);
const [quickPrompts, setQuickPrompts] = useState<string[]>(MOCK_QUICK);
const bottomRef = useRef<HTMLDivElement>(null);
const citRailRef = useRef<HTMLDivElement>(null);
const citItemRefs = useRef<Record<number, HTMLDivElement | null>>({});
const abortRef = useRef<AbortController | null>(null);
// Fetch quick questions from backend on mount
// Fetch quick questions from backend on mount (only once per session)
useEffect(() => {
fetch('/api/v1/rag/quick-questions')
fetch('/api/v1/rag/quick-questions', { headers: authHeader() })
.then(r => r.json())
.then(d => {
if (Array.isArray(d?.questions) && d.questions.length > 0) {
@@ -109,26 +97,33 @@ export function RagChatPage() {
if (el) {
el.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
}
// Clear highlight after 3s
setTimeout(() => setHighlightedCit(h => h === n ? null : h), 3000);
}, []);
async function send(text?: string) {
const q = (text ?? input).trim();
if (!q || streaming) return;
setInput('');
const userMsg: Message = { id: Date.now().toString(), role: 'user', text: q };
setMessages(m => [...m, userMsg]);
const q = (text ?? inputDraft).trim();
if (!q || ragStreamingRef.current) return;
setRagState(s => ({ ...s, inputDraft: '' }));
const userMsgId = Date.now().toString();
const assistantId = (Date.now() + 1).toString();
setMessages(m => [...m, { id: assistantId, role: 'assistant', text: '' }]);
setRagState(s => ({
...s,
messages: [
...s.messages,
{ id: userMsgId, role: 'user', text: q },
{ id: assistantId, role: 'assistant', text: '' },
],
citations: [],
}));
ragStreamingRef.current = true;
setStreaming(true);
setCitations([]);
setHighlightedCit(null);
const ctrl = new AbortController();
abortRef.current = ctrl;
ragAbortRef.current = ctrl;
try {
const body: Record<string, unknown> = { query: q, top_k: 5 };
@@ -136,7 +131,7 @@ export function RagChatPage() {
const res = await fetch('/api/v1/rag/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...authHeader() },
body: JSON.stringify(body),
signal: ctrl.signal,
});
@@ -145,14 +140,13 @@ export function RagChatPage() {
const reader = res.body.getReader();
const dec = new TextDecoder();
let buffer = '';
const newCitations: Citation[] = [];
const newCitations: RagCitation[] = [];
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += dec.decode(value, { stream: true });
// SSE blocks separated by double newline
const blocks = buffer.split('\n\n');
buffer = blocks.pop() ?? '';
@@ -165,56 +159,62 @@ export function RagChatPage() {
const j = JSON.parse(raw);
if (j.type === 'session') {
// Backend assigned a session_id — persist for next request
if (j.session_id) setSessionId(j.session_id);
if (j.session_id) setRagState(s => ({ ...s, sessionId: j.session_id }));
} else if (j.type === 'retrieved' && Array.isArray(j.docs)) {
// Sources arrive before the answer starts
const mapped = j.docs.map((d: Record<string, unknown>, i: number) => mapSource(d, i + 1));
newCitations.push(...mapped);
setCitations([...mapped]);
setRagState(s => ({ ...s, citations: [...mapped] }));
} else if (j.type === 'chunk' && j.text) {
setMessages(m => m.map(msg =>
setRagState(s => ({
...s,
messages: s.messages.map(msg =>
msg.id === assistantId
? { ...msg, text: msg.text + (j.text as string) }
: msg
));
} else if (j.type === 'status') {
// Status message (e.g. "找到N条相关法规…") — could show in UI if desired
// For now we ignore it to keep the bubble clean
),
}));
} else if (j.type === 'done') {
// Extract which citation numbers appear in the final answer
setMessages(m => m.map(msg => {
setRagState(s => ({
...s,
messages: s.messages.map(msg => {
if (msg.id !== assistantId) return msg;
const refs = [...new Set(
[...msg.text.matchAll(/\[(\d+)\]/g)].map(r => parseInt(r[1], 10))
)].filter(n => n >= 1 && n <= newCitations.length);
return { ...msg, citationRefs: refs };
}),
}));
break;
} else if (j.type === 'error') {
setMessages(m => m.map(msg =>
setRagState(s => ({
...s,
messages: s.messages.map(msg =>
msg.id === assistantId
? { ...msg, text: `Error: ${j.text ?? 'Unknown error'}` }
: msg
));
),
}));
}
} catch { /* malformed JSON chunk, skip */ }
}
}
} catch (e: unknown) {
if (e instanceof Error && e.name !== 'AbortError') {
setMessages(m => m.map(msg =>
setRagState(s => ({
...s,
messages: s.messages.map(msg =>
msg.id === assistantId
? { ...msg, text: 'Could not reach the RAG API. Please check the backend.' }
? { ...msg, text: t.ragchat.apiError }
: msg
));
),
}));
}
} finally {
ragStreamingRef.current = false;
setStreaming(false);
}
}
@@ -224,7 +224,7 @@ export function RagChatPage() {
return (
<div className="chat-page">
<Topbar
title="Regulation Q&A"
title={t.ragchat.topbarTitle}
actions={
<button
className="btn sm"
@@ -236,7 +236,7 @@ export function RagChatPage() {
URL.revokeObjectURL(url);
}}
>
<Download size={13} />Export chat
<Download size={13} />{t.ragchat.exportBtn}
</button>
}
/>
@@ -244,7 +244,7 @@ export function RagChatPage() {
<div className="chat-body">
{/* ── History pane ── */}
<div className="history-pane">
<div className="history-header">Quick prompts</div>
<div className="history-header">{t.ragchat.quickPromptsHeader}</div>
{quickPrompts.map(q => (
<button key={q} className="quick-item" onClick={() => send(q)}>
{q}
@@ -284,16 +284,16 @@ export function RagChatPage() {
<div className="composer-row">
<textarea
className="composer-input"
placeholder="Ask about your regulations…"
value={input}
onChange={e => setInput(e.target.value)}
placeholder={t.ragchat.inputPlaceholder}
value={inputDraft}
onChange={e => setRagState(s => ({ ...s, inputDraft: e.target.value }))}
onKeyDown={e => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); } }}
rows={2}
/>
<button
className="btn primary"
onClick={() => send()}
disabled={!input.trim() || streaming}
disabled={!inputDraft.trim() || streaming}
>
<Send size={14} />
</button>
@@ -304,11 +304,11 @@ export function RagChatPage() {
{/* ── Citation rail ── */}
<div className="citation-rail" ref={citRailRef}>
<div className="citation-header">
Sources {citations.length > 0 && `(${citations.length})`}
{t.ragchat.citationsHeader}{citations.length > 0 && ` (${citations.length})`}
</div>
{citations.length === 0 && (
<p style={{ padding: '12px 16px', fontSize: 12, color: 'var(--muted)', lineHeight: 1.5 }}>
Citations will appear here after a response is generated.
{t.ragchat.citationsEmpty}
</p>
)}
{citations.map(c => (

View File

@@ -1,180 +1,362 @@
import { useState, useEffect } from 'react';
import { Topbar } from '../../components/layout/Topbar';
import { Search, Upload, Download, RefreshCw } from 'lucide-react';
import { Search, Upload, Download, RefreshCw, CheckCircle, XCircle, AlertTriangle, Info } from 'lucide-react';
import { UploadModal } from '../Docs/UploadModal';
import { useLanguage } from '../../contexts/LanguageContext';
// Backend /api/v1/status/stats returns:
// { documents_total, documents_indexed, documents_failed, chunks_total }
interface Stats { documents_total: number; documents_indexed: number; documents_failed: number; chunks_total: number; }
const TOKEN_KEY = 'auth_token';
function authHeader(): Record<string, string> {
const t = localStorage.getItem(TOKEN_KEY);
return t ? { Authorization: `Bearer ${t}` } : {};
}
const TASKS = [
{ name: 'EU AI Act — Article 13 check', status: 'ok', progress: 88, cta: 'View report' },
{ name: 'GB/T 42118 compliance scan', status: 'warn', progress: 54, cta: 'Continue' },
{ name: 'MIIT Draft — automotive AI embedding', status: 'info', progress: 12, cta: 'Start' },
];
// ── API types ──────────────────────────────────────────────────────────────
interface Stats {
documents_total: number;
documents_indexed: number;
documents_failed: number;
chunks_total: number;
}
const PROGRAMS = [
{ name: 'EU AI Act Readiness', status: 'ok', coverage: 88 },
{ name: 'China MIIT Compliance', status: 'warn', coverage: 54 },
{ name: 'ISO/SAE 21434 Audit', status: 'info', coverage: 32 },
];
interface Health {
milvus: { status: string; connected?: boolean; collection_name?: string; num_entities?: number; error?: string };
minio: { status: string; connected: boolean };
bm25: { available: boolean };
reranker: { enabled: boolean; model: string | null };
sessions: { active: number; max: number };
}
const KPIS = [
{ label: 'Retrieval hit rate', value: 94, unit: '%' },
{ label: 'Evidence coverage', value: 78, unit: '%' },
{ label: 'Reviewer SLA', value: 91, unit: '%' },
];
interface Config {
embedding_model: string;
embedding_dim: number;
embedding_base_url: string;
milvus_collection: string;
parser_backend: string;
chunk_backend: string;
llm_provider: string;
llm_model: string;
parser_failure_mode: string;
artifact_prefix?: string;
document_metadata_path?: string;
}
const SERVICES = [
{ name: 'Vector store (Chroma)', status: 'ok' },
{ name: 'LLM gateway (Claude)', status: 'ok' },
{ name: 'Document parser', status: 'ok' },
{ name: 'SSE stream endpoint', status: 'ok' },
{ name: 'Regulation feed sync', status: 'warn' },
];
// ── Small helpers ──────────────────────────────────────────────────────────
function StatusIcon({ status }: { status: 'ok' | 'error' | 'warn' | 'info' }) {
if (status === 'ok') return <CheckCircle size={14} color="var(--ok)" />;
if (status === 'error') return <XCircle size={14} color="var(--danger)" />;
if (status === 'warn') return <AlertTriangle size={14} color="var(--warn)" />;
return <Info size={14} color="var(--muted)" />;
}
const EVENTS = [
{ date: '2025-11-18', title: 'EU AI Act — Delegated acts published', summary: 'European Commission releases implementing rules for high-risk AI classification under Annex III.' },
{ date: '2025-10-30', title: 'MIIT Draft — automotive AI', summary: 'New draft regulation covers in-vehicle AI training data provenance and OTA update governance.' },
{ date: '2025-10-05', title: 'ISO/SAE 21434 amendment', summary: 'Amendment 1 clarifies cybersecurity management system scope for software-only updates.' },
];
function ServiceRow({ name, status, detail }: { name: string; status: 'ok' | 'error' | 'warn' | 'info'; detail?: string }) {
const { t } = useLanguage();
return (
<div className="service-row">
<StatusIcon status={status} />
<span className="service-name" style={{ marginLeft: 8 }}>{name}</span>
{detail && <span style={{ fontSize: 11, color: 'var(--muted)', marginLeft: 6 }}>{detail}</span>}
<span className={`status ${status}`} style={{ marginLeft: 'auto' }}>
{status === 'ok' ? t.status.badgeOnline : status === 'error' ? t.status.badgeError : status === 'warn' ? t.status.badgeDegraded : t.status.badgeUnknown}
</span>
</div>
);
}
const STATUS_LABEL: Record<string, string> = { ok: 'Complete', warn: 'In progress', info: 'Pending' };
function ConfigRow({ label, value }: { label: string; value: string | number | null | undefined }) {
return (
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', padding: '7px 0', borderBottom: '1px solid var(--border)', fontSize: 12 }}>
<span style={{ color: 'var(--muted)' }}>{label}</span>
<span style={{ fontFamily: 'var(--font-mono)', color: 'var(--fg)', fontSize: 11, maxWidth: '60%', textAlign: 'right', wordBreak: 'break-all' }}>
{value ?? '—'}
</span>
</div>
);
}
// ── Main component ─────────────────────────────────────────────────────────
export function StatusPage() {
const { t } = useLanguage();
const [stats, setStats] = useState<Stats | null>(null);
const [health, setHealth] = useState<Health | null>(null);
const [config, setConfig] = useState<Config | null>(null);
const [loading, setLoading] = useState(true);
const [healthLoading, setHealthLoading] = useState(true);
const [configOpen, setConfigOpen] = useState(false);
const [refreshKey, setRefreshKey] = useState(0);
const [showUpload, setShowUpload] = useState(false);
const [lastRefresh, setLastRefresh] = useState<Date | null>(null);
useEffect(() => {
setLoading(true);
fetch('/api/v1/status/stats')
.then(r => r.json())
.then(d => { setStats(d); setLoading(false); })
.catch(() => {
setStats({ documents_total: 42, documents_indexed: 38, documents_failed: 1, chunks_total: 3841 });
setHealthLoading(true);
// Fetch all three endpoints in parallel
Promise.allSettled([
fetch('/api/v1/status/stats', { headers: authHeader() }).then(r => r.json()),
fetch('/api/v1/status/health', { headers: authHeader() }).then(r => r.json()),
fetch('/api/v1/status/config', { headers: authHeader() }).then(r => r.json()),
]).then(([statsRes, healthRes, configRes]) => {
if (statsRes.status === 'fulfilled') setStats(statsRes.value);
else setStats({ documents_total: 0, documents_indexed: 0, documents_failed: 0, chunks_total: 0 });
if (healthRes.status === 'fulfilled') setHealth(healthRes.value);
if (configRes.status === 'fulfilled') setConfig(configRes.value);
setLoading(false);
setHealthLoading(false);
setLastRefresh(new Date());
});
}, [refreshKey]);
// ── Derived values ───────────────────────────────────────────────────────
const indexedPct = stats && stats.documents_total > 0
? Math.round((stats.documents_indexed / stats.documents_total) * 100)
: 0;
function milvusStatus(): 'ok' | 'error' | 'warn' | 'info' {
if (!health) return 'info';
return health.milvus.status === 'ok' ? 'ok' : 'error';
}
function milvusDetail() {
if (!health) return undefined;
if (health.milvus.error) return health.milvus.error.slice(0, 60);
const parts: string[] = [];
if (health.milvus.collection_name) parts.push(health.milvus.collection_name);
if (health.milvus.num_entities !== undefined) parts.push(`${health.milvus.num_entities.toLocaleString()} entities`);
return parts.join(' · ') || undefined;
}
// ── Export ───────────────────────────────────────────────────────────────
function handleExport() {
const data = { stats, health, config, exportedAt: new Date().toISOString() };
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url; a.download = `regulation-hub-status-${Date.now()}.json`; a.click();
URL.revokeObjectURL(url);
}
return (
<div className="status-page">
<Topbar
title="System Status"
title={t.status.topbarTitle}
actions={
<>
<div className="search-box">
<Search size={13} />
<input placeholder="Search..." />
<input placeholder={t.status.searchPlaceholder} />
</div>
<button
className="btn sm"
onClick={() => {
const blob = new Blob([JSON.stringify(stats, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url; a.download = 'regulation-hub-status.json'; a.click();
URL.revokeObjectURL(url);
}}
>
<Download size={13} />Export status
<button className="btn sm" onClick={handleExport}>
<Download size={13} />{t.status.exportBtn}
</button>
<button className="btn sm" onClick={() => setRefreshKey(k => k + 1)}>
<RefreshCw size={13} />Refresh
<RefreshCw size={13} />{t.status.refreshBtn}
</button>
<button className="btn sm primary" onClick={() => setShowUpload(true)}>
<Upload size={13} />New upload
<Upload size={13} />{t.status.newUploadBtn}
</button>
</>
}
/>
<div className="page-content">
{/* ── Stats grid ────────────────────────────────────────────────── */}
<div className="stats-grid">
<div className="stat-cell">
{loading ? <span className="loading-shimmer stat-value-loading" /> : <div className="stat-value">{stats?.documents_total ?? '—'}</div>}
<div className="stat-label">Documents total</div>
<div className="stat-label">{t.status.statTotal}</div>
</div>
<div className="stat-cell">
{loading ? <span className="loading-shimmer stat-value-loading" /> : <div className="stat-value">{stats?.documents_indexed ?? '—'}</div>}
<div className="stat-label">Indexed</div>
<div className="stat-label">{t.status.statIndexed}</div>
</div>
<div className="stat-cell danger">
{loading ? <span className="loading-shimmer stat-value-loading" /> : <div className="stat-value">{stats?.documents_failed ?? '—'}</div>}
<div className="stat-label">Failed</div>
<div className="stat-label">{t.status.statFailed}</div>
</div>
<div className="stat-cell">
{loading ? <span className="loading-shimmer stat-value-loading" /> : <div className="stat-value">{stats?.chunks_total?.toLocaleString() ?? '—'}</div>}
<div className="stat-label">Vector chunks</div>
<div className="stat-label">{t.status.statChunks}</div>
</div>
</div>
{/* Indexed progress bar */}
{!loading && stats && stats.documents_total > 0 && (
<div style={{ padding: '0 0 20px', display: 'flex', alignItems: 'center', gap: 12 }}>
<span style={{ fontSize: 12, color: 'var(--muted)', whiteSpace: 'nowrap' }}>{t.status.statCoverage}</span>
<div style={{ flex: 1, height: 6, background: 'var(--border)', borderRadius: 3, overflow: 'hidden' }}>
<div style={{
height: '100%', borderRadius: 3,
width: `${indexedPct}%`,
background: indexedPct === 100 ? 'var(--ok)' : indexedPct > 60 ? 'var(--accent)' : 'var(--warn)',
transition: 'width 0.6s ease',
}} />
</div>
<span style={{ fontSize: 12, fontFamily: 'var(--font-mono)', color: 'var(--fg)', whiteSpace: 'nowrap' }}>
{indexedPct}% ({stats.documents_indexed}/{stats.documents_total})
</span>
</div>
)}
{/* ── Main panel grid ───────────────────────────────────────────── */}
<div className="panel-grid">
<div className="panel-left">
{/* System health */}
<div className="card">
<div className="card-header">Workflow queue</div>
{TASKS.map(t => (
<div key={t.name} className="task-row">
<div className="task-info">
<div className="task-name">{t.name}</div>
<div className="task-progress-bar">
<div className="task-progress-fill" style={{ width: `${t.progress}%` }} />
</div>
</div>
<span className={`status ${t.status}`}>{STATUS_LABEL[t.status]}</span>
<button className="btn sm">{t.cta}</button>
</div>
))}
<div className="card-header" style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
<span>{t.status.cardHealth}</span>
{lastRefresh && (
<span style={{ fontSize: 11, color: 'var(--muted)', fontWeight: 400 }}>
Updated {lastRefresh.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit', second: '2-digit' })}
</span>
)}
</div>
{healthLoading ? (
<div style={{ padding: '12px 0', display: 'flex', flexDirection: 'column', gap: 10 }}>
{[1, 2, 3, 4, 5].map(i => (
<div key={i} className="loading-shimmer" style={{ height: 28, borderRadius: 6 }} />
))}
</div>
) : health ? (
<>
<ServiceRow
name="Milvus vector store"
status={milvusStatus()}
detail={milvusDetail()}
/>
<ServiceRow
name="MinIO object storage"
status={health.minio.connected ? 'ok' : 'error'}
/>
<ServiceRow
name="BM25 keyword retriever"
status={health.bm25.available ? 'ok' : 'warn'}
detail={health.bm25.available ? undefined : t.status.serviceNotLoaded}
/>
<ServiceRow
name={`Reranker${health.reranker.model ? ` (${health.reranker.model})` : ''}`}
status={health.reranker.enabled ? 'ok' : 'info'}
detail={health.reranker.enabled ? t.status.serviceEnabled : t.status.serviceDisabled}
/>
<ServiceRow
name="Active sessions"
status={health.sessions.active < health.sessions.max ? 'ok' : 'warn'}
detail={`${health.sessions.active} / ${health.sessions.max} max`}
/>
</>
) : (
<div style={{ padding: '12px 0', color: 'var(--muted)', fontSize: 13 }}>
{t.status.healthEndpointError}
</div>
)}
</div>
{/* System config (collapsible) */}
<div className="card">
<div className="card-header">Active compliance programs</div>
{PROGRAMS.map(p => (
<div key={p.name} className="program-row">
<span className={`status ${p.status}`} style={{ marginRight: 'auto' }}>{p.name}</span>
<span className="program-pct">{p.coverage}%</span>
</div>
))}
<div className="kpi-strip">
{KPIS.map(k => (
<div key={k.label} className="kpi-item">
<div className="kpi-label">{k.label}</div>
<div className="kpi-bar"><div className="kpi-fill" style={{ width: `${k.value}%` }} /></div>
<div className="kpi-value">{k.value}{k.unit}</div>
</div>
))}
<button
onClick={() => setConfigOpen(v => !v)}
style={{ width: '100%', display: 'flex', alignItems: 'center', justifyContent: 'space-between', background: 'none', border: 'none', cursor: 'pointer', padding: 0 }}
>
<div className="card-header" style={{ margin: 0, padding: 0, flex: 1, textAlign: 'left' }}>{t.status.cardConfig}</div>
<span style={{ fontSize: 11, color: 'var(--muted)', transform: configOpen ? 'rotate(180deg)' : 'none', transition: 'transform 0.2s' }}></span>
</button>
{configOpen && (
<div style={{ marginTop: 12 }}>
{config ? (
<>
<ConfigRow label={t.status.labelLLMProvider} value={config.llm_provider} />
<ConfigRow label={t.status.labelLLMModel} value={config.llm_model} />
<ConfigRow label={t.status.labelEmbeddingModel} value={config.embedding_model} />
<ConfigRow label={t.status.labelEmbeddingDim} value={config.embedding_dim} />
<ConfigRow label={t.status.labelMilvusCollection} value={config.milvus_collection} />
<ConfigRow label={t.status.labelParserBackend} value={config.parser_backend} />
<ConfigRow label={t.status.labelChunkBackend} value={config.chunk_backend} />
<ConfigRow label={t.status.labelParserFailureMode} value={config.parser_failure_mode} />
</>
) : (
<div style={{ color: 'var(--muted)', fontSize: 13 }}>{t.status.configLoadError}</div>
)}
</div>
)}
</div>
</div>
<div className="panel-right">
{/* Document breakdown */}
<div className="card">
<div className="card-header">System health</div>
{SERVICES.map(s => (
<div key={s.name} className="service-row">
<span className="service-name">{s.name}</span>
<span className={`status ${s.status}`}>{s.status === 'ok' ? 'Online' : 'Degraded'}</span>
<div className="card-header">{t.status.cardBreakdown}</div>
{loading ? (
<div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
{[1, 2, 3].map(i => <div key={i} className="loading-shimmer" style={{ height: 24, borderRadius: 4 }} />)}
</div>
))}
) : stats ? (
<>
{[
{ label: t.status.breakdownIndexed, value: stats.documents_indexed, total: stats.documents_total, color: 'var(--ok)' },
{ label: t.status.breakdownProcessing, value: stats.documents_total - stats.documents_indexed - stats.documents_failed, total: stats.documents_total, color: 'var(--warn)' },
{ label: t.status.breakdownFailed, value: stats.documents_failed, total: stats.documents_total, color: 'var(--danger)' },
].map(row => {
const pct = stats.documents_total > 0 ? Math.round((Math.max(0, row.value) / stats.documents_total) * 100) : 0;
return (
<div key={row.label} style={{ marginBottom: 10 }}>
<div style={{ display: 'flex', justifyContent: 'space-between', fontSize: 12, marginBottom: 4 }}>
<span style={{ color: 'var(--muted)' }}>{row.label}</span>
<span style={{ fontFamily: 'var(--font-mono)', color: 'var(--fg)' }}>{Math.max(0, row.value)} ({pct}%)</span>
</div>
<div style={{ height: 5, background: 'var(--border)', borderRadius: 2, overflow: 'hidden' }}>
<div style={{ height: '100%', width: `${pct}%`, background: row.color, borderRadius: 2, transition: 'width 0.5s' }} />
</div>
</div>
);
})}
<div style={{ marginTop: 8, paddingTop: 8, borderTop: '1px solid var(--border)', display: 'flex', justifyContent: 'space-between', fontSize: 12 }}>
<span style={{ color: 'var(--muted)' }}>{t.status.totalChunks}</span>
<span style={{ fontFamily: 'var(--font-mono)', fontWeight: 600 }}>{stats.chunks_total.toLocaleString()}</span>
</div>
</>
) : null}
</div>
{/* Sessions & reranker quick facts */}
{health && (
<div className="card">
<div className="card-header">Regulatory watch</div>
{EVENTS.map(e => (
<div key={e.title} className="event-row">
<div className="event-date">{e.date}</div>
<div className="event-title">{e.title}</div>
<div className="event-summary">{e.summary}</div>
<div className="card-header">{t.status.cardRuntime}</div>
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
<div style={{ display: 'flex', justifyContent: 'space-between', fontSize: 12, padding: '4px 0' }}>
<span style={{ color: 'var(--muted)' }}>{t.status.labelActiveSessions}</span>
<span style={{ fontFamily: 'var(--font-mono)' }}>{health.sessions.active}</span>
</div>
))}
<div style={{ display: 'flex', justifyContent: 'space-between', fontSize: 12, padding: '4px 0' }}>
<span style={{ color: 'var(--muted)' }}>{t.status.labelSessionCapacity}</span>
<span style={{ fontFamily: 'var(--font-mono)' }}>{health.sessions.max}</span>
</div>
<div style={{ display: 'flex', justifyContent: 'space-between', fontSize: 12, padding: '4px 0' }}>
<span style={{ color: 'var(--muted)' }}>{t.status.labelReranker}</span>
<span style={{ fontFamily: 'var(--font-mono)', color: health.reranker.enabled ? 'var(--ok)' : 'var(--muted)' }}>
{health.reranker.enabled ? (health.reranker.model ?? t.status.serviceEnabled) : t.status.serviceDisabled}
</span>
</div>
<div style={{ display: 'flex', justifyContent: 'space-between', fontSize: 12, padding: '4px 0' }}>
<span style={{ color: 'var(--muted)' }}>{t.status.labelBM25}</span>
<span style={{ fontFamily: 'var(--font-mono)', color: health.bm25.available ? 'var(--ok)' : 'var(--muted)' }}>
{health.bm25.available ? t.status.statusActive : t.status.statusUnavailable}
</span>
</div>
</div>
</div>
)}
</div>
</div>
</div>
<footer className="page-footer">
<div className="live-dot" />
<span>Regulation Hub · T-Systems AI · Online</span>
<span>Regulation Hub · T-Systems AI · {health ? (health.milvus.status === 'ok' && health.minio.connected ? t.status.footerAllOk : t.status.footerDegraded) : t.status.footerChecking}</span>
</footer>
{showUpload && <UploadModal onClose={() => setShowUpload(false)} />}

View File

@@ -1,5 +1,8 @@
import { BrowserRouter, Routes, Route } from 'react-router-dom';
import { useEffect } from 'react';
import { BrowserRouter, Navigate, Routes, Route } from 'react-router-dom';
import { useAuth } from '../contexts';
import { AppShell } from '../components/layout/AppShell';
import { LoginPage } from '../pages/Login/LoginPage';
import { OverviewPage } from '../pages/Overview/OverviewPage';
import { StatusPage } from '../pages/Status/StatusPage';
import { PerceptionPage } from '../pages/Perception/PerceptionPage';
@@ -7,11 +10,47 @@ import { DocsPage } from '../pages/Docs/DocsPage';
import { CompliancePage } from '../pages/Compliance/CompliancePage';
import { RagChatPage } from '../pages/RagChat/RagChatPage';
/** Redirect to /login when not authenticated. */
function RequireAuth({ children }: { children: React.ReactNode }) {
const { token, loading } = useAuth();
if (loading) return null; // wait for localStorage token validation
if (!token) return <Navigate to="/login" replace />;
return <>{children}</>;
}
/** Redirect to / when already authenticated. */
function GuestOnly({ children }: { children: React.ReactNode }) {
const { token, loading } = useAuth();
if (loading) return null;
if (token) return <Navigate to="/" replace />;
return <>{children}</>;
}
export function AppRouter() {
const { logout } = useAuth();
// Listen for global 401 events emitted by the API layer.
useEffect(() => {
function onUnauthorized() { logout(); }
window.addEventListener('auth:unauthorized', onUnauthorized);
return () => window.removeEventListener('auth:unauthorized', onUnauthorized);
}, [logout]);
return (
<BrowserRouter>
<Routes>
<Route path="/" element={<AppShell />}>
{/* Public route */}
<Route path="/login" element={<GuestOnly><LoginPage /></GuestOnly>} />
{/* Protected routes */}
<Route
path="/"
element={
<RequireAuth>
<AppShell />
</RequireAuth>
}
>
<Route index element={<OverviewPage />} />
<Route path="status" element={<StatusPage />} />
<Route path="signals" element={<PerceptionPage />} />
@@ -19,6 +58,9 @@ export function AppRouter() {
<Route path="compliance" element={<CompliancePage />} />
<Route path="chat" element={<RagChatPage />} />
</Route>
{/* Catch-all */}
<Route path="*" element={<Navigate to="/" replace />} />
</Routes>
</BrowserRouter>
);

Some files were not shown because too many files have changed in this diff Show More