Fix SSE route dependency and align architecture docs

2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions
--- a/backend/app/services/llm/qwen_client.py
+++ b/backend/app/services/llm/qwen_client.py
@@ -1,4 +1,4 @@
-"""Qwen LLM客户端 - 支持OpenAI兼容API格式"""
+"""Provide service-layer logic for qwen client."""

 import time
 import json
@@ -7,21 +7,12 @@ from loguru import logger
 import httpx

 from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
+# Keep provider-specific behavior explicit so debugging stays straightforward.
+


 class QwenClient(BaseLLMClient):
-    """
-    Qwen API客户端（OpenAI兼容格式）
-
-    支持通过new-api等代理服务调用：
-    - qwen-turbo
-    - qwen-plus
-    - qwen-max
-    - qwen3.5-flash (推荐：快速响应)
-    - qwen3.5-plus
-    - qwen-long
-    - qwen2.5系列
-    """
+    """Represent the Qwen Client type."""

    SUPPORTED_MODELS = [
        "qwen-turbo",
@@ -39,14 +30,15 @@ class QwenClient(BaseLLMClient):
    ]

    def __init__(self, config: LLMConfig):
+        """Initialize the Qwen Client instance."""
        if config.provider not in [LLMProvider.QWEN, LLMProvider.QWEN_VL]:
            raise ValueError(f"配置provider应为Qwen，实际为{config.provider}")
        super().__init__(config)
        self._init_client()

    def _init_client(self):
-        """初始化HTTP客户端"""
-        # OpenAI兼容API格式
+        """Handle init client for this module for the Qwen Client instance."""
+        # Keep provider-specific behavior explicit so debugging stays straightforward.
        self._client = httpx.Client(
            base_url=self.config.base_url,
            headers={
@@ -64,11 +56,11 @@ class QwenClient(BaseLLMClient):
        temperature: Optional[float] = None,
        **kwargs
    ) -> LLMResponse:
-        """对话补全（OpenAI兼容格式）"""
+        """Handle chat for the Qwen Client instance."""
        start_time = time.time()

        try:
-            # OpenAI兼容格式的请求体
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            payload = {
                "model": self.config.model,
                "messages": messages,
@@ -78,7 +70,7 @@ class QwenClient(BaseLLMClient):
                "stream": False
            }

-            # OpenAI兼容接口路径
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            response = self._client.post("/chat/completions", json=payload)
            response.raise_for_status()

@@ -86,7 +78,7 @@ class QwenClient(BaseLLMClient):

            latency_ms = int((time.time() - start_time) * 1000)

-            # OpenAI兼容格式的响应解析
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            choices = data.get("choices", [{}])
            message = choices[0].get("message", {})

@@ -121,42 +113,33 @@ class QwenClient(BaseLLMClient):
        temperature: Optional[float] = None,
        **kwargs
    ) -> Generator[str, None, None]:
-        """
-        流式对话补全（SSE格式）
-
-        Yields:
-            str: 每次返回一个文本片段
-
-        使用示例:
-            for chunk in client.stream_chat(messages):
-                print(chunk, end="", flush=True)
-        """
+        """Stream chat for the Qwen Client instance."""
        try:
-            # OpenAI兼容格式的请求体，启用流式输出
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            payload = {
                "model": self.config.model,
                "messages": messages,
                "max_tokens": max_tokens or self.config.max_tokens,
                "temperature": temperature or self.config.temperature,
                "top_p": kwargs.get("top_p", self.config.top_p),
-                "stream": True  # 启用流式输出
+                "stream": True  # Keep provider-specific behavior explicit so debugging stays straightforward.
            }

-            # 使用stream模式发送请求
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            with self._client.stream("POST", "/chat/completions", json=payload) as response:
                for line in response.iter_lines():
                    if line:
                        line = line.strip()
-                        # SSE格式: data: {...}
+                        # Keep provider-specific behavior explicit so debugging stays straightforward.
                        if line.startswith("data: "):
-                            data_str = line[6:]  # 移除 "data: " 前缀
+                            data_str = line[6:]  # Keep provider-specific behavior explicit so debugging stays straightforward.
                            if data_str == "[DONE]":
                                break
                            try:
                                data = json.loads(data_str)
                                choices = data.get("choices", [])
                                if not choices:
-                                    continue  # 跳过空的choices
+                                    continue  # Keep provider-specific behavior explicit so debugging stays straightforward.
                                delta = choices[0].get("delta", {})
                                content = delta.get("content", "")
                                if content:
@@ -179,41 +162,27 @@ class QwenClient(BaseLLMClient):
        temperature: Optional[float] = None,
        **kwargs
    ) -> AsyncGenerator[str, None]:
-        """
-        异步流式对话补全（用于FastAPI SSE响应）
-
-        Yields:
-            str: 每次返回一个文本片段
-        """
+        """Handle async stream chat for the Qwen Client instance."""
        import asyncio

-        # 使用同步流式方法，包装为异步
+        # Keep provider-specific behavior explicit so debugging stays straightforward.
        for chunk in self.stream_chat(messages, max_tokens, temperature, **kwargs):
            yield chunk
-            # 给async循环一个小延迟，让其他任务有机会执行
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            await asyncio.sleep(0)

    def get_available_models(self) -> List[str]:
-        """获取可用模型列表"""
+        """Return available models for the Qwen Client instance."""
        return self.SUPPORTED_MODELS

    def close(self):
-        """关闭客户端"""
+        """Release the resources held by this component."""
        if self._client:
            self._client.close()


 class QwenVLClient(BaseLLMClient):
-    """
-    Qwen VL多模态客户端（OpenAI兼容格式）
-
-    支持模型：
-    - qwen-vl-plus
-    - qwen-vl-max
-    - qwen3-vl-plus
-    - qwen2-vl-7b-instruct
-    - qwen2-vl-72b-instruct
-    """
+    """Represent the Qwen V L Client type."""

    SUPPORTED_MODELS = [
        "qwen-vl-plus",
@@ -224,13 +193,14 @@ class QwenVLClient(BaseLLMClient):
    ]

    def __init__(self, config: LLMConfig):
+        """Initialize the Qwen V L Client instance."""
        if config.provider != LLMProvider.QWEN_VL:
            raise ValueError(f"配置provider应为QWEN_VL，实际为{config.provider}")
        super().__init__(config)
        self._init_client()

    def _init_client(self):
-        """初始化HTTP客户端"""
+        """Handle init client for this module for the Qwen V L Client instance."""
        self._client = httpx.Client(
            base_url=self.config.base_url,
            headers={
@@ -248,21 +218,11 @@ class QwenVLClient(BaseLLMClient):
        temperature: Optional[float] = None,
        **kwargs
    ) -> LLMResponse:
-        """多模态对话补全（OpenAI兼容格式）
-
-        支持图片输入，消息格式：
-        {
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
-                {"type": "text", "text": "描述这张图片"}
-            ]
-        }
-        """
+        """Handle chat for the Qwen V L Client instance."""
        start_time = time.time()

        try:
-            # OpenAI兼容格式的请求体
+            # Keep provider-specific behavior explicit so debugging stays straightforward.
            payload = {
                "model": self.config.model,
                "messages": messages,
@@ -312,7 +272,7 @@ class QwenVLClient(BaseLLMClient):
        temperature: Optional[float] = None,
        **kwargs
    ) -> Generator[str, None, None]:
-        """流式多模态对话补全"""
+        """Stream chat for the Qwen V L Client instance."""
        try:
            payload = {
                "model": self.config.model,
@@ -335,7 +295,7 @@ class QwenVLClient(BaseLLMClient):
                                data = json.loads(data_str)
                                choices = data.get("choices", [])
                                if not choices:
-                                    continue  # 跳过空的choices
+                                    continue  # Keep provider-specific behavior explicit so debugging stays straightforward.
                                delta = choices[0].get("delta", {})
                                content = delta.get("content", "")
                                if content:
@@ -348,11 +308,11 @@ class QwenVLClient(BaseLLMClient):
            yield f"[ERROR: {str(e)}]"

    def get_available_models(self) -> List[str]:
-        """获取可用模型列表"""
+        """Return available models for the Qwen V L Client instance."""
        return self.SUPPORTED_MODELS

    def close(self):
-        """关闭客户端"""
+        """Release the resources held by this component."""
        if self._client:
            self._client.close()

@@ -363,7 +323,7 @@ def create_qwen_client(
    base_url: str = "http://6.86.80.4:30080/v1",
    **kwargs
 ) -> QwenClient:
-    """便捷函数：创建Qwen客户端"""
+    """Create qwen client."""
    config = LLMConfig(
        provider=LLMProvider.QWEN,
        model=model,
@@ -380,7 +340,7 @@ def create_qwen_vl_client(
    base_url: str = "http://6.86.80.4:30080/v1",
    **kwargs
 ) -> QwenVLClient:
-    """便捷函数：创建QwenVL客户端"""
+    """Create qwen vl client."""
    config = LLMConfig(
        provider=LLMProvider.QWEN_VL,
        model=model,