Fix SSE route dependency and align architecture docs

2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions
--- a/backend/app/services/rag/context_builder.py
+++ b/backend/app/services/rag/context_builder.py
@@ -1,4 +1,4 @@
-"""RAG上下文构建服务 - 构建LLM输入上下文"""
+"""Provide service-layer logic for context builder."""

 from typing import List, Dict, Optional
 from dataclasses import dataclass
@@ -6,11 +6,13 @@ from loguru import logger

 from .retriever import RetrievedDocument
 from app.config.settings import settings
+# Keep service responsibilities explicit so downstream behavior stays predictable.
+


@dataclass
 class RAGContext:
-    """RAG构建的上下文"""
+    """Represent the R A G Context type."""
    system_prompt: str
    context_text: str
    user_query: str
@@ -20,14 +22,7 @@ class RAGContext:


 class ContextBuilder:
-    """
-    RAG上下文构建器
-
-    功能：
-    - 格式化检索结果为上下文文本
-    - 控制上下文长度（token限制）
-    - 构建完整的LLM输入格式
-    """
+    """Provide the Context Builder builder."""

    def __init__(
        self,
@@ -35,14 +30,7 @@ class ContextBuilder:
        include_metadata: bool = True,
        citation_format: str = "【条款{clause}】"
    ):
-        """
-        初始化上下文构建器
-
-        Args:
-            max_context_tokens: 最大上下文token数
-            include_metadata: 是否包含元数据（文档名、条款号等）
-            citation_format: 引用格式模板
-        """
+        """Initialize the Context Builder instance."""
        self.max_context_tokens = max_context_tokens or settings.rag_max_context_tokens
        self.include_metadata = include_metadata
        self.citation_format = citation_format
@@ -56,30 +44,19 @@ class ContextBuilder:
        system_prompt: Optional[str] = None,
        max_tokens: Optional[int] = None
    ) -> RAGContext:
-        """
-        构建RAG上下文
-
-        Args:
-            query: 用户查询
-            documents: 检索到的文档列表
-            system_prompt: 系统提示词（可选）
-            max_tokens: 最大token数（可选，覆盖默认值）
-
-        Returns:
-            RAGContext: 构建的上下文对象
-        """
+        """Handle build for the Context Builder instance."""
        max_tokens = max_tokens or self.max_context_tokens

-        # 格式化文档内容
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        context_text, sources, truncated = self._format_documents(
            documents,
            max_tokens
        )

-        # 构建系统提示词
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        system_prompt = system_prompt or self._default_system_prompt()

-        # 估算总token数
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        total_tokens = self._estimate_tokens(system_prompt + context_text + query)

        logger.info(f"上下文构建完成: {len(documents)}条文档, {total_tokens}tokens, truncated={truncated}")
@@ -98,29 +75,20 @@ class ContextBuilder:
        documents: List[RetrievedDocument],
        max_tokens: int
    ) -> tuple:
-        """
-        格式化文档内容
-
-        Args:
-            documents: 文档列表
-            max_tokens: 最大token数
-
-        Returns:
-            (context_text, sources, truncated)
-        """
+        """Handle format documents for this module for the Context Builder instance."""
        context_parts = []
        sources = []
        current_tokens = 0
        truncated = False

        for i, doc in enumerate(documents):
-            # 格式化单个文档
+            # Keep service responsibilities explicit so downstream behavior stays predictable.
            formatted = self._format_single_doc(doc, i + 1)

-            # 估算token数
+            # Keep service responsibilities explicit so downstream behavior stays predictable.
            doc_tokens = self._estimate_tokens(formatted)

-            # 检查是否超出限制
+            # Keep service responsibilities explicit so downstream behavior stays predictable.
            if current_tokens + doc_tokens > max_tokens:
                truncated = True
                logger.warning(f"上下文截断: 已达到{max_tokens}tokens限制")
@@ -129,7 +97,7 @@ class ContextBuilder:
            context_parts.append(formatted)
            current_tokens += doc_tokens

-            # 记录来源
+            # Keep service responsibilities explicit so downstream behavior stays predictable.
            sources.append({
                "index": i + 1,
                "doc_id": doc.doc_id,
@@ -148,13 +116,13 @@ class ContextBuilder:
        doc: RetrievedDocument,
        index: int
    ) -> str:
-        """格式化单个文档"""
+        """Handle format single doc for this module for the Context Builder instance."""
        parts = []

-        # 索引编号
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        parts.append(f"[{index}]")

-        # 元数据（可选）
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        if self.include_metadata:
            meta_parts = []

@@ -171,13 +139,13 @@ class ContextBuilder:
            if meta_parts:
                parts.append(" | ".join(meta_parts))

-        # 内容
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        parts.append(doc.content)

        return "\n".join(parts)

    def _default_system_prompt(self) -> str:
-        """默认系统提示词"""
+        """Handle default system prompt for this module for the Context Builder instance."""
        return """你是合规专家助手，基于提供的法规条款回答问题。

 回答要求：
@@ -192,8 +160,8 @@ class ContextBuilder:
 - 最后给出合规建议"""

    def _estimate_tokens(self, text: str) -> int:
-        """估算文本token数"""
-        # 中文字符约1.5 token，英文约0.25 token
+        """Handle estimate tokens for this module for the Context Builder instance."""
+        # Keep service responsibilities explicit so downstream behavior stays predictable.
        chinese_chars = sum(1 for c in text if '一' <= c <= '鿿')
        other_chars = len(text) - chinese_chars
        return int(chinese_chars * 1.5 + other_chars * 0.25)
@@ -202,15 +170,7 @@ class ContextBuilder:
        self,
        context: RAGContext
    ) -> List[Dict[str, str]]:
-        """
-        构建LLM消息格式
-
-        Args:
-            context: RAG上下文对象
-
-        Returns:
-            List[Dict]: [{"role": "system/user/assistant", "content": "..."}]
-        """
+        """Build messages for the Context Builder instance."""
        messages = [
            {"role": "system", "content": context.system_prompt},
            {"role": "user", "content": f"参考以下法规条款回答问题。\n\n{context.context_text}\n\n问题：{context.user_query}"}
@@ -224,6 +184,6 @@ def build_rag_context(
    documents: List[RetrievedDocument],
    **kwargs
 ) -> RAGContext:
-    """便捷函数：构建RAG上下文"""
+    """Build rag context."""
    builder = ContextBuilder()
    return builder.build(query, documents, **kwargs)