update for 1. 优化 2.中英切换

2026-06-10 11:10:36 +08:00
parent e7963b267e
commit 9212747e1b
42 changed files with 7866 additions and 278 deletions
--- a/backend/app/application/compliance/pipeline.py
+++ b/backend/app/application/compliance/pipeline.py
@@ -5,6 +5,7 @@ All functions are synchronous — call them via asyncio.to_thread() in async SSE

 from __future__ import annotations

+import asyncio
 import json
 import os
 import re
@@ -12,10 +13,20 @@ import tempfile
 from typing import TYPE_CHECKING

 from loguru import logger
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
+
+# Shared retry policy for LLM calls: 3 attempts, exponential back-off 1–4 s.
+_llm_retry = retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=1, max=4),
+    retry=retry_if_exception_type((ValueError, TimeoutError, ConnectionError)),
+    reraise=True,
+)

 if TYPE_CHECKING:
    from app.application.knowledge import KnowledgeRetrievalService
    from app.domain.retrieval import RetrievedChunk
+    from app.domain.compliance.ports import AnalysisRecord, FindingRecord
    from app.services.llm.base_client import BaseLLMClient


@@ -109,17 +120,67 @@ def retrieve_for_clause(
    return retrieval_service.retrieve(query=clause, top_k=top_k, filters=domains)


+def process_single_clause(
+    clause: str,
+    index: int,
+    retrieval_service: "KnowledgeRetrievalService",
+    client: "BaseLLMClient",
+    top_k: int = 5,
+    domains: str | None = None,
+) -> dict:
+    """Process one clause: retrieve relevant regulations then check compliance.
+
+    Returns a dict with keys: index, chunks, finding (may be None on LLM failure).
+    Designed to run inside asyncio.to_thread() for parallel execution.
+    """
+    chunks = retrieve_for_clause(clause, retrieval_service, top_k, domains)
+    finding = check_clause_compliance(clause, chunks, client)
+    return {"index": index, "chunks": chunks, "finding": finding}
+
+
+async def run_clauses_parallel(
+    clauses: list[str],
+    retrieval_service: "KnowledgeRetrievalService",
+    client: "BaseLLMClient",
+    top_k: int = 5,
+    domains: str | None = None,
+) -> list[dict]:
+    """Run all clauses through retrieve+gap-check in parallel.
+
+    Results are returned in the original clause order even though processing
+    is concurrent. Exceptions in individual clauses are caught and returned as
+    dicts with finding=None so the stream continues for remaining clauses.
+
+    Both retrieval_service and client must be thread-safe — they are shared
+    across all asyncio.to_thread() calls without locking.
+    """
+    tasks = [
+        asyncio.to_thread(
+            process_single_clause,
+            clause, i, retrieval_service, client, top_k, domains,
+        )
+        for i, clause in enumerate(clauses)
+    ]
+    raw = await asyncio.gather(*tasks, return_exceptions=True)
+    results = []
+    for i, r in enumerate(raw):
+        if isinstance(r, Exception):
+            logger.warning("Clause {} processing failed: {}", i, r)
+            results.append({"index": i, "chunks": [], "finding": None})
+        else:
+            results.append(r)
+    return results
+
+
 def check_clause_compliance(
    clause: str,
    chunks: list["RetrievedChunk"],
    client: "BaseLLMClient",
 ) -> dict | None:
-    if not chunks:
-        return None
    reg_context = "\n".join(
        f"[{i+1}] {c.doc_title} {c.section_title or ''}: {c.text[:300]}"
        for i, c in enumerate(chunks[:5])
-    )
+    ) if chunks else "(no regulatory context retrieved)"
    prompt = (
        "You are a compliance expert. Judge whether the following business clause "
        "complies with the retrieved regulations.\n\n"
@@ -135,9 +196,19 @@ def check_clause_compliance(
        "status: ok=compliant, warn=gap exists, risk=critical/missing\n"
        "Return ONLY the JSON object."
    )
-    response = client.chat([{"role": "user", "content": prompt}], max_tokens=500)
-    if not response.is_success:
+
+    def _do_check():
+        resp = client.chat([{"role": "user", "content": prompt}], max_tokens=500)
+        if not resp.is_success:
+            raise ValueError("LLM returned non-success for gap check")
+        return resp
+
+    try:
+        response = _llm_retry(_do_check)()
+    except Exception as exc:
+        logger.warning("check_clause_compliance LLM call failed after retries: {}", exc)
        return None
+
    try:
        result = _extract_json(response.content)
        if isinstance(result, dict) and "status" in result:
@@ -182,12 +253,11 @@ def synthesize_conclusion(
        '    {"label": "Priority", "value": "High/Medium/Low", "risk": true}\n'
        '  ],\n'
        '  "risk_score": 0-100 (integer, higher=riskier),\n'
-        '  "highlight_terms": ["Key terms to highlight, max 10 terms"],\n'
+        '  "highlight_terms": ["term1", "term2"],  // up to 10 key technical/legal terms actually present in the text\n'
        '  "para_text": "Original text or summary (max 600 chars)"\n'
        "}\n"
        "Return ONLY the JSON object."
    )
-    response = client.chat([{"role": "user", "content": prompt}], max_tokens=1200)
    fallback = {
        "conclusion": "Compliance analysis complete. Review findings and create remediation plan.",
        "actions": [
@@ -198,8 +268,19 @@ def synthesize_conclusion(
        "highlight_terms": [],
        "para_text": para_text[:800],
    }
-    if not response.is_success:
+
+    def _do_synthesize():
+        resp = client.chat([{"role": "user", "content": prompt}], max_tokens=1200)
+        if not resp.is_success:
+            raise ValueError("LLM returned non-success for synthesis")
+        return resp
+
+    try:
+        response = _llm_retry(_do_synthesize)()
+    except Exception as exc:
+        logger.warning("synthesize_conclusion LLM call failed after retries: {}", exc)
        return fallback
+
    try:
        result = _extract_json(response.content)
        if isinstance(result, dict):
@@ -212,4 +293,78 @@ def synthesize_conclusion(
            }
    except (ValueError, TypeError) as exc:
        logger.warning("Conclusion synthesis JSON parse failed: {}", exc)
-    return fallback
+    return fallback
+
+
+_SUGGESTION_FOCUS = {
+    "risk": "Focus on remediation steps, required certifications, and timeline to resolve.",
+    "warn": "Focus on identifying the specific compliance gap and how to close it.",
+    "ok": "Focus on maintaining compliance evidence and monitoring future changes.",
+}
+
+_SUGGESTION_FALLBACK = {
+    "risk": [
+        "What specific certifications or documents are required to remediate this finding?",
+        "What is the typical remediation timeline for this type of non-compliance?",
+        "Which regulation clause defines the exact requirement?",
+    ],
+    "warn": [
+        "What is the exact gap between the current state and the requirement?",
+        "What evidence would demonstrate partial compliance?",
+        "Which regulation clause applies to this warning?",
+    ],
+    "ok": [
+        "What documentation should be maintained to evidence this compliance?",
+        "How should this area be monitored as regulations evolve?",
+        "Are there related clauses that may affect this compliant area?",
+    ],
+}
+
+
+def build_finding_context(finding: "FindingRecord", analysis: "AnalysisRecord") -> str:
+    """Build a grounded system context string for a finding chat thread.
+
+    Combines finding details with analysis metadata so the LLM has full
+    context without relying on the frontend to pass segment_context.
+    """
+    return (
+        f"Document: {analysis.doc_name}\n"
+        f"Standard: {analysis.standard_name}\n"
+        f"Finding [{finding.seq + 1}]: {finding.title}\n"
+        f"Status: {finding.status}\n"
+        f"Clause reference: {finding.clause_ref or 'N/A'}\n"
+        f"Description: {finding.description}\n"
+        f"Overall conclusion: {analysis.conclusion}\n"
+    )
+
+
+def generate_suggestions(
+    finding: "FindingRecord",
+    analysis: "AnalysisRecord",
+    client: "BaseLLMClient",
+) -> list[str]:
+    """Generate 3 context-aware follow-up questions for a finding chat thread.
+
+    Returns exactly 3 question strings. Falls back to static templates on error.
+    """
+    fallback = _SUGGESTION_FALLBACK.get(finding.status, _SUGGESTION_FALLBACK["warn"])
+    context = build_finding_context(finding, analysis)
+    focus = _SUGGESTION_FOCUS.get(finding.status, _SUGGESTION_FOCUS["warn"])
+    prompt = (
+        f"{context}\n\n"
+        f"Task: {focus}\n"
+        "Generate exactly 3 concise follow-up questions a compliance analyst would ask.\n"
+        'Return JSON: {"questions": ["question 1", "question 2", "question 3"]}\n'
+        "Return ONLY the JSON object."
+    )
+    response = client.chat([{"role": "user", "content": prompt}], max_tokens=300)
+    if not response.is_success:
+        return fallback
+    try:
+        result = _extract_json(response.content)
+        questions = result.get("questions", [])
+        if isinstance(questions, list) and len(questions) >= 3:
+            return [str(q) for q in questions[:3]]
+    except (ValueError, TypeError) as exc:
+        logger.warning("generate_suggestions JSON parse failed: {}", exc)
+    return fallback