Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,4 +1,4 @@
"""Qwen LLM客户端 - 支持OpenAI兼容API格式"""
"""Provide service-layer logic for qwen client."""
import time
import json
@@ -7,21 +7,12 @@ from loguru import logger
import httpx
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
# Keep provider-specific behavior explicit so debugging stays straightforward.
class QwenClient(BaseLLMClient):
"""
Qwen API客户端OpenAI兼容格式
支持通过new-api等代理服务调用
- qwen-turbo
- qwen-plus
- qwen-max
- qwen3.5-flash (推荐:快速响应)
- qwen3.5-plus
- qwen-long
- qwen2.5系列
"""
"""Represent the Qwen Client type."""
SUPPORTED_MODELS = [
"qwen-turbo",
@@ -39,14 +30,15 @@ class QwenClient(BaseLLMClient):
]
def __init__(self, config: LLMConfig):
"""Initialize the Qwen Client instance."""
if config.provider not in [LLMProvider.QWEN, LLMProvider.QWEN_VL]:
raise ValueError(f"配置provider应为Qwen实际为{config.provider}")
super().__init__(config)
self._init_client()
def _init_client(self):
"""初始化HTTP客户端"""
# OpenAI兼容API格式
"""Handle init client for this module for the Qwen Client instance."""
# Keep provider-specific behavior explicit so debugging stays straightforward.
self._client = httpx.Client(
base_url=self.config.base_url,
headers={
@@ -64,11 +56,11 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""对话补全OpenAI兼容格式"""
"""Handle chat for the Qwen Client instance."""
start_time = time.time()
try:
# OpenAI兼容格式的请求体
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
@@ -78,7 +70,7 @@ class QwenClient(BaseLLMClient):
"stream": False
}
# OpenAI兼容接口路径
# Keep provider-specific behavior explicit so debugging stays straightforward.
response = self._client.post("/chat/completions", json=payload)
response.raise_for_status()
@@ -86,7 +78,7 @@ class QwenClient(BaseLLMClient):
latency_ms = int((time.time() - start_time) * 1000)
# OpenAI兼容格式的响应解析
# Keep provider-specific behavior explicit so debugging stays straightforward.
choices = data.get("choices", [{}])
message = choices[0].get("message", {})
@@ -121,42 +113,33 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> Generator[str, None, None]:
"""
流式对话补全SSE格式
Yields:
str: 每次返回一个文本片段
使用示例:
for chunk in client.stream_chat(messages):
print(chunk, end="", flush=True)
"""
"""Stream chat for the Qwen Client instance."""
try:
# OpenAI兼容格式的请求体启用流式输出
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
"max_tokens": max_tokens or self.config.max_tokens,
"temperature": temperature or self.config.temperature,
"top_p": kwargs.get("top_p", self.config.top_p),
"stream": True # 启用流式输出
"stream": True # Keep provider-specific behavior explicit so debugging stays straightforward.
}
# 使用stream模式发送请求
# Keep provider-specific behavior explicit so debugging stays straightforward.
with self._client.stream("POST", "/chat/completions", json=payload) as response:
for line in response.iter_lines():
if line:
line = line.strip()
# SSE格式: data: {...}
# Keep provider-specific behavior explicit so debugging stays straightforward.
if line.startswith("data: "):
data_str = line[6:] # 移除 "data: " 前缀
data_str = line[6:] # Keep provider-specific behavior explicit so debugging stays straightforward.
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
choices = data.get("choices", [])
if not choices:
continue # 跳过空的choices
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
@@ -179,41 +162,27 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> AsyncGenerator[str, None]:
"""
异步流式对话补全用于FastAPI SSE响应
Yields:
str: 每次返回一个文本片段
"""
"""Handle async stream chat for the Qwen Client instance."""
import asyncio
# 使用同步流式方法,包装为异步
# Keep provider-specific behavior explicit so debugging stays straightforward.
for chunk in self.stream_chat(messages, max_tokens, temperature, **kwargs):
yield chunk
# 给async循环一个小延迟让其他任务有机会执行
# Keep provider-specific behavior explicit so debugging stays straightforward.
await asyncio.sleep(0)
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Qwen Client instance."""
return self.SUPPORTED_MODELS
def close(self):
"""关闭客户端"""
"""Release the resources held by this component."""
if self._client:
self._client.close()
class QwenVLClient(BaseLLMClient):
"""
Qwen VL多模态客户端OpenAI兼容格式
支持模型:
- qwen-vl-plus
- qwen-vl-max
- qwen3-vl-plus
- qwen2-vl-7b-instruct
- qwen2-vl-72b-instruct
"""
"""Represent the Qwen V L Client type."""
SUPPORTED_MODELS = [
"qwen-vl-plus",
@@ -224,13 +193,14 @@ class QwenVLClient(BaseLLMClient):
]
def __init__(self, config: LLMConfig):
"""Initialize the Qwen V L Client instance."""
if config.provider != LLMProvider.QWEN_VL:
raise ValueError(f"配置provider应为QWEN_VL实际为{config.provider}")
super().__init__(config)
self._init_client()
def _init_client(self):
"""初始化HTTP客户端"""
"""Handle init client for this module for the Qwen V L Client instance."""
self._client = httpx.Client(
base_url=self.config.base_url,
headers={
@@ -248,21 +218,11 @@ class QwenVLClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""多模态对话补全OpenAI兼容格式
支持图片输入,消息格式:
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
{"type": "text", "text": "描述这张图片"}
]
}
"""
"""Handle chat for the Qwen V L Client instance."""
start_time = time.time()
try:
# OpenAI兼容格式的请求体
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
@@ -312,7 +272,7 @@ class QwenVLClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> Generator[str, None, None]:
"""流式多模态对话补全"""
"""Stream chat for the Qwen V L Client instance."""
try:
payload = {
"model": self.config.model,
@@ -335,7 +295,7 @@ class QwenVLClient(BaseLLMClient):
data = json.loads(data_str)
choices = data.get("choices", [])
if not choices:
continue # 跳过空的choices
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
@@ -348,11 +308,11 @@ class QwenVLClient(BaseLLMClient):
yield f"[ERROR: {str(e)}]"
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Qwen V L Client instance."""
return self.SUPPORTED_MODELS
def close(self):
"""关闭客户端"""
"""Release the resources held by this component."""
if self._client:
self._client.close()
@@ -363,7 +323,7 @@ def create_qwen_client(
base_url: str = "http://6.86.80.4:30080/v1",
**kwargs
) -> QwenClient:
"""便捷函数创建Qwen客户端"""
"""Create qwen client."""
config = LLMConfig(
provider=LLMProvider.QWEN,
model=model,
@@ -380,7 +340,7 @@ def create_qwen_vl_client(
base_url: str = "http://6.86.80.4:30080/v1",
**kwargs
) -> QwenVLClient:
"""便捷函数创建QwenVL客户端"""
"""Create qwen vl client."""
config = LLMConfig(
provider=LLMProvider.QWEN_VL,
model=model,