Fix SSE route dependency and align architecture docs

2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions
--- a/backend/app/services/llm/document_summarizer.py
+++ b/backend/app/services/llm/document_summarizer.py
@@ -1,17 +1,20 @@
-"""文档摘要生成服务 - LLM生成法规文档摘要"""
+"""Provide service-layer logic for document summarizer."""

 from typing import Dict, Optional
 from dataclasses import dataclass
 from loguru import logger

-from app.services.llm import get_llm_client, BaseLLMClient
+from app.services.llm.base_client import BaseLLMClient
+from app.services.llm.llm_factory import get_llm_client
 from app.services.rag.prompt_templates import get_prompt_template
 from app.config.settings import settings
+# Keep provider-specific behavior explicit so debugging stays straightforward.
+


@dataclass
 class DocumentSummary:
-    """文档摘要结果"""
+    """Represent the Document Summary type."""
    doc_name: str
    summary: str
    applicable_scope: str
@@ -24,24 +27,12 @@ class DocumentSummary:

    @property
    def is_success(self) -> bool:
+        """Return whether success for the Document Summary instance."""
        return self.error is None


 class DocumentSummarizer:
-    """
-    文档摘要生成器
-
-    功能：
-    - 生成法规文档的核心要点摘要
-    - 提取适用范围
-    - 突出关键条款
-    - 列出合规要点
-
-    使用示例：
-        summarizer = DocumentSummarizer()
-        result = summarizer.summarize("GB 7258-2017", markdown_content)
-        print(result.summary)
-    """
+    """Represent the Document Summarizer type."""

    def __init__(
        self,
@@ -49,25 +40,18 @@ class DocumentSummarizer:
        model: str = None,
        max_tokens: int = None
    ):
-        """
-        初始化摘要生成器
-
-        Args:
-            provider: LLM提供商
-            model: LLM模型名称
-            max_tokens: 最大输出token数
-        """
+        """Initialize the Document Summarizer instance."""
        self.provider = provider or settings.llm_provider
        self.model = model or settings.llm_model
        self.max_tokens = max_tokens or settings.rag_summary_max_tokens

-        # LLM客户端（延迟加载）
+        # Keep provider-specific behavior explicit so debugging stays straightforward.
        self.llm: Optional[BaseLLMClient] = None

        logger.info(f"摘要生成器初始化: provider={self.provider}, model={self.model}")

    def _init_llm(self):
-        """延迟初始化LLM"""
+        """Handle init llm for this module for the Document Summarizer instance."""
        if self.llm is None:
            self.llm = get_llm_client(
                provider=self.provider,
@@ -81,18 +65,7 @@ class DocumentSummarizer:
        regulation_type: str = "",
        max_tokens: Optional[int] = None
    ) -> DocumentSummary:
-        """
-        生成文档摘要
-
-        Args:
-            doc_name: 文档名称
-            content: 文档内容（Markdown格式）
-            regulation_type: 法规类型
-            max_tokens: 最大输出token数
-
-        Returns:
-            DocumentSummary: 摘要结果
-        """
+        """Handle summarize for the Document Summarizer instance."""
        import time
        start_time = time.time()

@@ -101,23 +74,23 @@ class DocumentSummarizer:
        try:
            self._init_llm()

-            # 使用摘要模板
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            template = get_prompt_template("document_summary")

-            # 构建用户消息
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            user_content = template.user_template.format(
                doc_name=doc_name,
-                content=content[:8000]  # 截取前8000字符（避免超出token限制）
+                content=content[:8000]  # Keep provider-specific behavior explicit so debugging stays straightforward.
            )

-            # 调用LLM
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            response = self.llm.chat(
                messages=[
                    {"role": "system", "content": template.system_prompt},
                    {"role": "user", "content": user_content}
                ],
                max_tokens=max_tokens or self.max_tokens,
-                temperature=0.3  # 低温度保证摘要准确性
+                temperature=0.3  # Keep provider-specific behavior explicit so debugging stays straightforward.
            )

            latency_ms = int((time.time() - start_time) * 1000)
@@ -135,7 +108,7 @@ class DocumentSummarizer:
                    error=response.error
                )

-            # 解析摘要结构
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            summary_data = self._parse_summary(response.content)

            logger.success(f"摘要生成完成: {doc_name}, {latency_ms}ms")
@@ -166,7 +139,7 @@ class DocumentSummarizer:
            )

    def _parse_summary(self, content: str) -> Dict:
-        """解析摘要内容（提取结构化信息）"""
+        """Handle parse summary for this module for the Document Summarizer instance."""
        result = {
            "summary": content,
            "applicable_scope": "",
@@ -175,26 +148,26 @@ class DocumentSummarizer:
            "compliance_points": []
        }

-        # 简单解析（提取关键信息）
+        # Keep provider-specific behavior explicit so debugging stays straightforward.
        lines = content.split("\n")

        for line in lines:
            line = line.strip()

-            # 提取适用范围
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            if "适用范围" in line or "适用对象" in line:
                result["applicable_scope"] = line.split("：")[-1].strip() if "：" in line else line.split(":")[-1].strip()

-            # 提取关键条款
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            if line.startswith("- 【条款") or line.startswith("【条款"):
                result["key_clauses"].append(line)

-            # 提取关键术语
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            if "关键术语" in line or "术语定义" in line:
-                # 继续读取后续几行
+                # Keep provider-specific behavior explicit so debugging stays straightforward.
                pass

-            # 提取合规要点
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            if "合规要点" in line or "必须满足" in line:
                pass

@@ -204,15 +177,7 @@ class DocumentSummarizer:
        self,
        documents: list
    ) -> list:
-        """
-        批量生成摘要
-
-        Args:
-            documents: 文档列表 [{"doc_name": str, "content": str}, ...]
-
-        Returns:
-            list: 摘要结果列表
-        """
+        """Handle batch summarize for the Document Summarizer instance."""
        results = []
        for doc in documents:
            result = self.summarize(doc["doc_name"], doc["content"])
@@ -225,6 +190,6 @@ def summarize_document(
    content: str,
    **kwargs
 ) -> DocumentSummary:
-    """便捷函数：生成文档摘要"""
+    """Handle summarize document."""
    summarizer = DocumentSummarizer(**kwargs)
    return summarizer.summarize(doc_name, content)