Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,14 +1,36 @@
"""LLM服务模块"""
"""Initialize the app.services.llm package."""
from .llm_factory import LLMFactory, get_llm_client
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
from .base_client import BaseLLMClient, LLMConfig, LLMProvider, LLMResponse
from .deepseek_client import DeepSeekClient
from .llm_factory import LLMFactory, get_llm_client
from .qwen_client import QwenClient, QwenVLClient
from .document_summarizer import DocumentSummarizer, summarize_document, DocumentSummary
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = [
"LLMFactory", "get_llm_client",
"BaseLLMClient", "LLMResponse", "LLMConfig", "LLMProvider",
"DeepSeekClient", "QwenClient", "QwenVLClient",
"DocumentSummarizer", "summarize_document", "DocumentSummary"
"LLMFactory",
"get_llm_client",
"BaseLLMClient",
"LLMResponse",
"LLMConfig",
"LLMProvider",
"DeepSeekClient",
"QwenClient",
"QwenVLClient",
"DocumentSummarizer",
"summarize_document",
"DocumentSummary",
]
def __getattr__(name: str):
"""Handle getattr for this module."""
if name in {"DocumentSummarizer", "summarize_document", "DocumentSummary"}:
from .document_summarizer import DocumentSummarizer, DocumentSummary, summarize_document
return {
"DocumentSummarizer": DocumentSummarizer,
"summarize_document": summarize_document,
"DocumentSummary": DocumentSummary,
}[name]
raise AttributeError(name)

View File

@@ -1,13 +1,15 @@
"""LLM客户端基类 - 统一接口定义"""
"""Provide service-layer logic for base client."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from enum import Enum
# Keep provider-specific behavior explicit so debugging stays straightforward.
class LLMProvider(Enum):
"""LLM提供商"""
"""Define the L L M Provider enumeration."""
DEEPSEEK = "deepseek"
QWEN = "qwen"
QWEN_VL = "qwen_vl"
@@ -15,7 +17,7 @@ class LLMProvider(Enum):
@dataclass
class LLMResponse:
"""LLM响应结果"""
"""Represent the L L M Response type."""
content: str
model: str
usage: Dict[str, int] = field(default_factory=dict)
@@ -25,12 +27,13 @@ class LLMResponse:
@property
def is_success(self) -> bool:
"""Return whether success for the L L M Response instance."""
return self.error is None
@dataclass
class LLMConfig:
"""LLM配置"""
"""Define configuration for l l m config."""
provider: LLMProvider
model: str
api_key: str
@@ -38,19 +41,20 @@ class LLMConfig:
max_tokens: int = 4096
temperature: float = 0.7
top_p: float = 0.9
timeout: int = 300 # 默认超时300秒摘要/Skills生成可能需要较长时间
timeout: int = 300 # Keep provider-specific behavior explicit so debugging stays straightforward.
class BaseLLMClient(ABC):
"""LLM客户端基类"""
"""Represent the Base L L M Client type."""
def __init__(self, config: LLMConfig):
"""Initialize the Base L L M Client instance."""
self.config = config
self._client = None
@abstractmethod
def _init_client(self):
"""初始化客户端"""
"""Handle init client for this module for the Base L L M Client instance."""
pass
@abstractmethod
@@ -61,18 +65,7 @@ class BaseLLMClient(ABC):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""
对话补全
Args:
messages: 对话消息列表 [{"role": "user/assistant/system", "content": "..."}]
max_tokens: 最大输出token数
temperature: 温度参数
**kwargs: 其他参数
Returns:
LLMResponse: 响应结果
"""
"""Handle chat for the Base L L M Client instance."""
pass
def complete(
@@ -83,18 +76,7 @@ class BaseLLMClient(ABC):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""
单轮补全(便捷方法)
Args:
prompt: 用户输入
system_prompt: 系统提示词
max_tokens: 最大输出token数
temperature: 温度参数
Returns:
LLMResponse: 响应结果
"""
"""Handle complete for the Base L L M Client instance."""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
@@ -104,12 +86,12 @@ class BaseLLMClient(ABC):
@abstractmethod
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Base L L M Client instance."""
pass
def estimate_tokens(self, text: str) -> int:
"""估算文本token数粗略估计"""
# 中文字符约1.5 token英文约0.25 token
"""Handle estimate tokens for the Base L L M Client instance."""
# Keep provider-specific behavior explicit so debugging stays straightforward.
chinese_chars = sum(1 for c in text if '' <= c <= '鿿')
other_chars = len(text) - chinese_chars
return int(chinese_chars * 1.5 + other_chars * 0.25)

View File

@@ -1,4 +1,4 @@
"""DeepSeek LLM客户端 - OpenAI兼容API"""
"""Provide service-layer logic for deepseek client."""
import time
from typing import List, Dict, Optional
@@ -6,20 +6,12 @@ from loguru import logger
import httpx
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
# Keep provider-specific behavior explicit so debugging stays straightforward.
class DeepSeekClient(BaseLLMClient):
"""
DeepSeek API客户端OpenAI兼容格式
支持模型:
- deepseek-chat
- deepseek-coder
- deepseek-reasoner
- deepseek-v3
- deepseek-v3.2
- deepseek-v4-flash
"""
"""Represent the Deep Seek Client type."""
SUPPORTED_MODELS = [
"deepseek-chat",
@@ -31,13 +23,14 @@ class DeepSeekClient(BaseLLMClient):
]
def __init__(self, config: LLMConfig):
"""Initialize the Deep Seek Client instance."""
if config.provider != LLMProvider.DEEPSEEK:
raise ValueError(f"配置provider应为DEEPSEEK实际为{config.provider}")
super().__init__(config)
self._init_client()
def _init_client(self):
"""初始化HTTP客户端"""
"""Handle init client for this module for the Deep Seek Client instance."""
self._client = httpx.Client(
base_url=self.config.base_url,
headers={
@@ -55,7 +48,7 @@ class DeepSeekClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""对话补全"""
"""Handle chat for the Deep Seek Client instance."""
start_time = time.time()
try:
@@ -103,11 +96,11 @@ class DeepSeekClient(BaseLLMClient):
)
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Deep Seek Client instance."""
return self.SUPPORTED_MODELS
def close(self):
"""关闭客户端"""
"""Release the resources held by this component."""
if self._client:
self._client.close()
@@ -118,7 +111,7 @@ def create_deepseek_client(
base_url: str = "http://6.86.80.4:30080/v1",
**kwargs
) -> DeepSeekClient:
"""便捷函数创建DeepSeek客户端"""
"""Create deepseek client."""
config = LLMConfig(
provider=LLMProvider.DEEPSEEK,
model=model,

View File

@@ -1,17 +1,20 @@
"""文档摘要生成服务 - LLM生成法规文档摘要"""
"""Provide service-layer logic for document summarizer."""
from typing import Dict, Optional
from dataclasses import dataclass
from loguru import logger
from app.services.llm import get_llm_client, BaseLLMClient
from app.services.llm.base_client import BaseLLMClient
from app.services.llm.llm_factory import get_llm_client
from app.services.rag.prompt_templates import get_prompt_template
from app.config.settings import settings
# Keep provider-specific behavior explicit so debugging stays straightforward.
@dataclass
class DocumentSummary:
"""文档摘要结果"""
"""Represent the Document Summary type."""
doc_name: str
summary: str
applicable_scope: str
@@ -24,24 +27,12 @@ class DocumentSummary:
@property
def is_success(self) -> bool:
"""Return whether success for the Document Summary instance."""
return self.error is None
class DocumentSummarizer:
"""
文档摘要生成器
功能:
- 生成法规文档的核心要点摘要
- 提取适用范围
- 突出关键条款
- 列出合规要点
使用示例:
summarizer = DocumentSummarizer()
result = summarizer.summarize("GB 7258-2017", markdown_content)
print(result.summary)
"""
"""Represent the Document Summarizer type."""
def __init__(
self,
@@ -49,25 +40,18 @@ class DocumentSummarizer:
model: str = None,
max_tokens: int = None
):
"""
初始化摘要生成器
Args:
provider: LLM提供商
model: LLM模型名称
max_tokens: 最大输出token数
"""
"""Initialize the Document Summarizer instance."""
self.provider = provider or settings.llm_provider
self.model = model or settings.llm_model
self.max_tokens = max_tokens or settings.rag_summary_max_tokens
# LLM客户端延迟加载
# Keep provider-specific behavior explicit so debugging stays straightforward.
self.llm: Optional[BaseLLMClient] = None
logger.info(f"摘要生成器初始化: provider={self.provider}, model={self.model}")
def _init_llm(self):
"""延迟初始化LLM"""
"""Handle init llm for this module for the Document Summarizer instance."""
if self.llm is None:
self.llm = get_llm_client(
provider=self.provider,
@@ -81,18 +65,7 @@ class DocumentSummarizer:
regulation_type: str = "",
max_tokens: Optional[int] = None
) -> DocumentSummary:
"""
生成文档摘要
Args:
doc_name: 文档名称
content: 文档内容Markdown格式
regulation_type: 法规类型
max_tokens: 最大输出token数
Returns:
DocumentSummary: 摘要结果
"""
"""Handle summarize for the Document Summarizer instance."""
import time
start_time = time.time()
@@ -101,23 +74,23 @@ class DocumentSummarizer:
try:
self._init_llm()
# 使用摘要模板
# Keep provider-specific behavior explicit so debugging stays straightforward.
template = get_prompt_template("document_summary")
# 构建用户消息
# Keep provider-specific behavior explicit so debugging stays straightforward.
user_content = template.user_template.format(
doc_name=doc_name,
content=content[:8000] # 截取前8000字符避免超出token限制
content=content[:8000] # Keep provider-specific behavior explicit so debugging stays straightforward.
)
# 调用LLM
# Keep provider-specific behavior explicit so debugging stays straightforward.
response = self.llm.chat(
messages=[
{"role": "system", "content": template.system_prompt},
{"role": "user", "content": user_content}
],
max_tokens=max_tokens or self.max_tokens,
temperature=0.3 # 低温度保证摘要准确性
temperature=0.3 # Keep provider-specific behavior explicit so debugging stays straightforward.
)
latency_ms = int((time.time() - start_time) * 1000)
@@ -135,7 +108,7 @@ class DocumentSummarizer:
error=response.error
)
# 解析摘要结构
# Keep provider-specific behavior explicit so debugging stays straightforward.
summary_data = self._parse_summary(response.content)
logger.success(f"摘要生成完成: {doc_name}, {latency_ms}ms")
@@ -166,7 +139,7 @@ class DocumentSummarizer:
)
def _parse_summary(self, content: str) -> Dict:
"""解析摘要内容(提取结构化信息)"""
"""Handle parse summary for this module for the Document Summarizer instance."""
result = {
"summary": content,
"applicable_scope": "",
@@ -175,26 +148,26 @@ class DocumentSummarizer:
"compliance_points": []
}
# 简单解析(提取关键信息)
# Keep provider-specific behavior explicit so debugging stays straightforward.
lines = content.split("\n")
for line in lines:
line = line.strip()
# 提取适用范围
# Keep provider-specific behavior explicit so debugging stays straightforward.
if "适用范围" in line or "适用对象" in line:
result["applicable_scope"] = line.split("")[-1].strip() if "" in line else line.split(":")[-1].strip()
# 提取关键条款
# Keep provider-specific behavior explicit so debugging stays straightforward.
if line.startswith("- 【条款") or line.startswith("【条款"):
result["key_clauses"].append(line)
# 提取关键术语
# Keep provider-specific behavior explicit so debugging stays straightforward.
if "关键术语" in line or "术语定义" in line:
# 继续读取后续几行
# Keep provider-specific behavior explicit so debugging stays straightforward.
pass
# 提取合规要点
# Keep provider-specific behavior explicit so debugging stays straightforward.
if "合规要点" in line or "必须满足" in line:
pass
@@ -204,15 +177,7 @@ class DocumentSummarizer:
self,
documents: list
) -> list:
"""
批量生成摘要
Args:
documents: 文档列表 [{"doc_name": str, "content": str}, ...]
Returns:
list: 摘要结果列表
"""
"""Handle batch summarize for the Document Summarizer instance."""
results = []
for doc in documents:
result = self.summarize(doc["doc_name"], doc["content"])
@@ -225,6 +190,6 @@ def summarize_document(
content: str,
**kwargs
) -> DocumentSummary:
"""便捷函数:生成文档摘要"""
"""Handle summarize document."""
summarizer = DocumentSummarizer(**kwargs)
return summarizer.summarize(doc_name, content)

View File

@@ -1,4 +1,4 @@
"""LLM工厂 - 统一创建和管理LLM客户端"""
"""Provide service-layer logic for llm factory."""
from typing import Optional, Dict, Any
from loguru import logger
@@ -7,16 +7,18 @@ from functools import lru_cache
from .base_client import BaseLLMClient, LLMConfig, LLMProvider, LLMResponse
from .deepseek_client import DeepSeekClient
from .qwen_client import QwenClient, QwenVLClient
# Keep provider-specific behavior explicit so debugging stays straightforward.
# 默认模型映射
# Keep provider-specific behavior explicit so debugging stays straightforward.
DEFAULT_MODELS = {
LLMProvider.DEEPSEEK: "deepseek-v4-flash",
LLMProvider.QWEN: "qwen3.5-flash",
LLMProvider.QWEN_VL: "qwen3-vl-plus"
}
# API基础URL使用统一代理服务
# Keep provider-specific behavior explicit so debugging stays straightforward.
DEFAULT_BASE_URLS = {
LLMProvider.DEEPSEEK: "http://6.86.80.4:30080/v1",
LLMProvider.QWEN: "http://6.86.80.4:30080/v1",
@@ -25,31 +27,13 @@ DEFAULT_BASE_URLS = {
class LLMFactory:
"""
LLM客户端工厂支持全局缓存
"""Represent the L L M Factory type."""
支持的提供商和模型:
- DeepSeek: deepseek-chat (DeepSeek-V3), deepseek-coder
- Qwen: qwen-turbo, qwen-plus, qwen-max, qwen-long
- QwenVL: qwen-vl-plus, qwen-vl-max (多模态)
使用示例:
factory = LLMFactory()
# 使用默认配置
client = factory.create("deepseek")
# 自定义配置
client = factory.create("qwen", model="qwen-max", temperature=0.5)
# 调用LLM
response = client.complete("你好,介绍一下自己")
"""
# 全局客户端缓存(类级别,跨实例共享)
# Keep provider-specific behavior explicit so debugging stays straightforward.
_global_instances: Dict[str, BaseLLMClient] = {}
def __init__(self):
"""Initialize the L L M Factory instance."""
self._config_cache: Dict[str, Any] = {}
def create(
@@ -62,24 +46,10 @@ class LLMFactory:
temperature: float = 0.7,
**kwargs
) -> BaseLLMClient:
"""
创建LLM客户端
Args:
provider: 提供商名称 ("deepseek", "qwen", "qwen_vl")
api_key: API密钥如未提供从环境变量获取
model: 模型名称(如未提供,使用默认模型)
base_url: API基础URL
max_tokens: 最大输出token数
temperature: 温度参数
**kwargs: 其他配置参数
Returns:
BaseLLMClient: LLM客户端实例
"""
"""Handle create for the L L M Factory instance."""
provider_enum = self._parse_provider(provider)
# 获取配置
# Keep provider-specific behavior explicit so debugging stays straightforward.
api_key = api_key or self._get_api_key(provider_enum)
model = model or DEFAULT_MODELS.get(provider_enum)
base_url = base_url or DEFAULT_BASE_URLS.get(provider_enum)
@@ -87,7 +57,7 @@ class LLMFactory:
if not api_key:
raise ValueError(f"缺少API密钥请设置环境变量或传入api_key参数")
# 检查全局缓存
# Keep provider-specific behavior explicit so debugging stays straightforward.
cache_key = f"{provider}_{model}"
if cache_key in LLMFactory._global_instances:
logger.debug(f"使用缓存的LLM客户端: {cache_key}")
@@ -103,17 +73,17 @@ class LLMFactory:
**kwargs
)
# 创建客户端
# Keep provider-specific behavior explicit so debugging stays straightforward.
client = self._create_client(config)
# 缓存到全局实例
# Keep provider-specific behavior explicit so debugging stays straightforward.
LLMFactory._global_instances[cache_key] = client
logger.info(f"LLM客户端创建成功并缓存: {provider} - {model}")
return client
def _parse_provider(self, provider: str) -> LLMProvider:
"""解析提供商名称"""
"""Handle parse provider for this module for the L L M Factory instance."""
provider_map = {
"deepseek": LLMProvider.DEEPSEEK,
"deepseek-v3": LLMProvider.DEEPSEEK,
@@ -137,7 +107,7 @@ class LLMFactory:
return provider_map[provider_lower]
def _get_api_key(self, provider: LLMProvider) -> Optional[str]:
"""从环境变量获取API密钥"""
"""Handle get api key for this module for the L L M Factory instance."""
import os
key_map = {
@@ -154,7 +124,7 @@ class LLMFactory:
return None
def _create_client(self, config: LLMConfig) -> BaseLLMClient:
"""创建具体客户端"""
"""Handle create client for this module for the L L M Factory instance."""
client_map = {
LLMProvider.DEEPSEEK: DeepSeekClient,
LLMProvider.QWEN: QwenClient,
@@ -168,14 +138,14 @@ class LLMFactory:
return client_class(config)
def get_cached(self, provider: str, model: Optional[str] = None) -> Optional[BaseLLMClient]:
"""获取缓存的客户端"""
"""Return cached for the L L M Factory instance."""
provider_enum = self._parse_provider(provider)
model = model or DEFAULT_MODELS.get(provider_enum)
cache_key = f"{provider}_{model}"
return LLMFactory._global_instances.get(cache_key)
def list_available_providers(self) -> Dict[str, list]:
"""列出可用的提供商和模型"""
"""List available providers for the L L M Factory instance."""
return {
"deepseek": DeepSeekClient.SUPPORTED_MODELS,
"qwen": QwenClient.SUPPORTED_MODELS,
@@ -184,12 +154,7 @@ class LLMFactory:
@classmethod
def preload_clients(cls, providers: list = None):
"""
预加载LLM客户端应用启动时调用
Args:
providers: 要预加载的提供商列表默认加载qwen和deepseek
"""
"""Handle preload clients for the L L M Factory instance."""
if providers is None:
providers = ["qwen", "deepseek"]
@@ -203,9 +168,9 @@ class LLMFactory:
@classmethod
def get_global_client(cls, provider: str, model: Optional[str] = None) -> Optional[BaseLLMClient]:
"""获取全局缓存的客户端"""
"""Return global client for the L L M Factory instance."""
provider_lower = provider.lower()
# 处理模型名作为provider的情况(如 qwen3.5-flash
# Keep provider-specific behavior explicit so debugging stays straightforward.
if provider_lower.startswith("qwen"):
provider_lower = "qwen"
model = model or DEFAULT_MODELS.get(LLMProvider.QWEN if provider_lower == "qwen" else LLMProvider.DEEPSEEK)
@@ -214,7 +179,7 @@ class LLMFactory:
@classmethod
def cleanup(cls):
"""清理所有缓存的客户端"""
"""Handle cleanup for the L L M Factory instance."""
for cache_key, client in cls._global_instances.items():
try:
client.close()
@@ -227,7 +192,7 @@ class LLMFactory:
@lru_cache
def get_llm_factory() -> LLMFactory:
"""获取LLM工厂实例缓存"""
"""Return llm factory."""
return LLMFactory()
@@ -236,20 +201,10 @@ def get_llm_client(
model: Optional[str] = None,
**kwargs
) -> BaseLLMClient:
"""
便捷函数获取LLM客户端优先使用缓存
Args:
provider: 提供商名称
model: 模型名称
**kwargs: 其他配置
Returns:
BaseLLMClient: LLM客户端实例
"""
"""Return llm client."""
factory = get_llm_factory()
# 先尝试获取缓存的实例
# Keep provider-specific behavior explicit so debugging stays straightforward.
cached = factory.get_cached(provider, model)
if cached:
return cached

View File

@@ -1,4 +1,4 @@
"""Qwen LLM客户端 - 支持OpenAI兼容API格式"""
"""Provide service-layer logic for qwen client."""
import time
import json
@@ -7,21 +7,12 @@ from loguru import logger
import httpx
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
# Keep provider-specific behavior explicit so debugging stays straightforward.
class QwenClient(BaseLLMClient):
"""
Qwen API客户端OpenAI兼容格式
支持通过new-api等代理服务调用
- qwen-turbo
- qwen-plus
- qwen-max
- qwen3.5-flash (推荐:快速响应)
- qwen3.5-plus
- qwen-long
- qwen2.5系列
"""
"""Represent the Qwen Client type."""
SUPPORTED_MODELS = [
"qwen-turbo",
@@ -39,14 +30,15 @@ class QwenClient(BaseLLMClient):
]
def __init__(self, config: LLMConfig):
"""Initialize the Qwen Client instance."""
if config.provider not in [LLMProvider.QWEN, LLMProvider.QWEN_VL]:
raise ValueError(f"配置provider应为Qwen实际为{config.provider}")
super().__init__(config)
self._init_client()
def _init_client(self):
"""初始化HTTP客户端"""
# OpenAI兼容API格式
"""Handle init client for this module for the Qwen Client instance."""
# Keep provider-specific behavior explicit so debugging stays straightforward.
self._client = httpx.Client(
base_url=self.config.base_url,
headers={
@@ -64,11 +56,11 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""对话补全OpenAI兼容格式"""
"""Handle chat for the Qwen Client instance."""
start_time = time.time()
try:
# OpenAI兼容格式的请求体
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
@@ -78,7 +70,7 @@ class QwenClient(BaseLLMClient):
"stream": False
}
# OpenAI兼容接口路径
# Keep provider-specific behavior explicit so debugging stays straightforward.
response = self._client.post("/chat/completions", json=payload)
response.raise_for_status()
@@ -86,7 +78,7 @@ class QwenClient(BaseLLMClient):
latency_ms = int((time.time() - start_time) * 1000)
# OpenAI兼容格式的响应解析
# Keep provider-specific behavior explicit so debugging stays straightforward.
choices = data.get("choices", [{}])
message = choices[0].get("message", {})
@@ -121,42 +113,33 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> Generator[str, None, None]:
"""
流式对话补全SSE格式
Yields:
str: 每次返回一个文本片段
使用示例:
for chunk in client.stream_chat(messages):
print(chunk, end="", flush=True)
"""
"""Stream chat for the Qwen Client instance."""
try:
# OpenAI兼容格式的请求体启用流式输出
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
"max_tokens": max_tokens or self.config.max_tokens,
"temperature": temperature or self.config.temperature,
"top_p": kwargs.get("top_p", self.config.top_p),
"stream": True # 启用流式输出
"stream": True # Keep provider-specific behavior explicit so debugging stays straightforward.
}
# 使用stream模式发送请求
# Keep provider-specific behavior explicit so debugging stays straightforward.
with self._client.stream("POST", "/chat/completions", json=payload) as response:
for line in response.iter_lines():
if line:
line = line.strip()
# SSE格式: data: {...}
# Keep provider-specific behavior explicit so debugging stays straightforward.
if line.startswith("data: "):
data_str = line[6:] # 移除 "data: " 前缀
data_str = line[6:] # Keep provider-specific behavior explicit so debugging stays straightforward.
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
choices = data.get("choices", [])
if not choices:
continue # 跳过空的choices
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
@@ -179,41 +162,27 @@ class QwenClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> AsyncGenerator[str, None]:
"""
异步流式对话补全用于FastAPI SSE响应
Yields:
str: 每次返回一个文本片段
"""
"""Handle async stream chat for the Qwen Client instance."""
import asyncio
# 使用同步流式方法,包装为异步
# Keep provider-specific behavior explicit so debugging stays straightforward.
for chunk in self.stream_chat(messages, max_tokens, temperature, **kwargs):
yield chunk
# 给async循环一个小延迟让其他任务有机会执行
# Keep provider-specific behavior explicit so debugging stays straightforward.
await asyncio.sleep(0)
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Qwen Client instance."""
return self.SUPPORTED_MODELS
def close(self):
"""关闭客户端"""
"""Release the resources held by this component."""
if self._client:
self._client.close()
class QwenVLClient(BaseLLMClient):
"""
Qwen VL多模态客户端OpenAI兼容格式
支持模型:
- qwen-vl-plus
- qwen-vl-max
- qwen3-vl-plus
- qwen2-vl-7b-instruct
- qwen2-vl-72b-instruct
"""
"""Represent the Qwen V L Client type."""
SUPPORTED_MODELS = [
"qwen-vl-plus",
@@ -224,13 +193,14 @@ class QwenVLClient(BaseLLMClient):
]
def __init__(self, config: LLMConfig):
"""Initialize the Qwen V L Client instance."""
if config.provider != LLMProvider.QWEN_VL:
raise ValueError(f"配置provider应为QWEN_VL实际为{config.provider}")
super().__init__(config)
self._init_client()
def _init_client(self):
"""初始化HTTP客户端"""
"""Handle init client for this module for the Qwen V L Client instance."""
self._client = httpx.Client(
base_url=self.config.base_url,
headers={
@@ -248,21 +218,11 @@ class QwenVLClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> LLMResponse:
"""多模态对话补全OpenAI兼容格式
支持图片输入,消息格式:
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
{"type": "text", "text": "描述这张图片"}
]
}
"""
"""Handle chat for the Qwen V L Client instance."""
start_time = time.time()
try:
# OpenAI兼容格式的请求体
# Keep provider-specific behavior explicit so debugging stays straightforward.
payload = {
"model": self.config.model,
"messages": messages,
@@ -312,7 +272,7 @@ class QwenVLClient(BaseLLMClient):
temperature: Optional[float] = None,
**kwargs
) -> Generator[str, None, None]:
"""流式多模态对话补全"""
"""Stream chat for the Qwen V L Client instance."""
try:
payload = {
"model": self.config.model,
@@ -335,7 +295,7 @@ class QwenVLClient(BaseLLMClient):
data = json.loads(data_str)
choices = data.get("choices", [])
if not choices:
continue # 跳过空的choices
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
@@ -348,11 +308,11 @@ class QwenVLClient(BaseLLMClient):
yield f"[ERROR: {str(e)}]"
def get_available_models(self) -> List[str]:
"""获取可用模型列表"""
"""Return available models for the Qwen V L Client instance."""
return self.SUPPORTED_MODELS
def close(self):
"""关闭客户端"""
"""Release the resources held by this component."""
if self._client:
self._client.close()
@@ -363,7 +323,7 @@ def create_qwen_client(
base_url: str = "http://6.86.80.4:30080/v1",
**kwargs
) -> QwenClient:
"""便捷函数创建Qwen客户端"""
"""Create qwen client."""
config = LLMConfig(
provider=LLMProvider.QWEN,
model=model,
@@ -380,7 +340,7 @@ def create_qwen_vl_client(
base_url: str = "http://6.86.80.4:30080/v1",
**kwargs
) -> QwenVLClient:
"""便捷函数创建QwenVL客户端"""
"""Create qwen vl client."""
config = LLMConfig(
provider=LLMProvider.QWEN_VL,
model=model,