Fix SSE route dependency and align architecture docs
This commit is contained in:
@@ -1,14 +1,36 @@
|
||||
"""LLM服务模块"""
|
||||
"""Initialize the app.services.llm package."""
|
||||
|
||||
from .llm_factory import LLMFactory, get_llm_client
|
||||
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
|
||||
from .base_client import BaseLLMClient, LLMConfig, LLMProvider, LLMResponse
|
||||
from .deepseek_client import DeepSeekClient
|
||||
from .llm_factory import LLMFactory, get_llm_client
|
||||
from .qwen_client import QwenClient, QwenVLClient
|
||||
from .document_summarizer import DocumentSummarizer, summarize_document, DocumentSummary
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = [
|
||||
"LLMFactory", "get_llm_client",
|
||||
"BaseLLMClient", "LLMResponse", "LLMConfig", "LLMProvider",
|
||||
"DeepSeekClient", "QwenClient", "QwenVLClient",
|
||||
"DocumentSummarizer", "summarize_document", "DocumentSummary"
|
||||
"LLMFactory",
|
||||
"get_llm_client",
|
||||
"BaseLLMClient",
|
||||
"LLMResponse",
|
||||
"LLMConfig",
|
||||
"LLMProvider",
|
||||
"DeepSeekClient",
|
||||
"QwenClient",
|
||||
"QwenVLClient",
|
||||
"DocumentSummarizer",
|
||||
"summarize_document",
|
||||
"DocumentSummary",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Handle getattr for this module."""
|
||||
if name in {"DocumentSummarizer", "summarize_document", "DocumentSummary"}:
|
||||
from .document_summarizer import DocumentSummarizer, DocumentSummary, summarize_document
|
||||
|
||||
return {
|
||||
"DocumentSummarizer": DocumentSummarizer,
|
||||
"summarize_document": summarize_document,
|
||||
"DocumentSummary": DocumentSummary,
|
||||
}[name]
|
||||
raise AttributeError(name)
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
"""LLM客户端基类 - 统一接口定义"""
|
||||
"""Provide service-layer logic for base client."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Optional, Any
|
||||
from enum import Enum
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
|
||||
class LLMProvider(Enum):
|
||||
"""LLM提供商"""
|
||||
"""Define the L L M Provider enumeration."""
|
||||
DEEPSEEK = "deepseek"
|
||||
QWEN = "qwen"
|
||||
QWEN_VL = "qwen_vl"
|
||||
@@ -15,7 +17,7 @@ class LLMProvider(Enum):
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""LLM响应结果"""
|
||||
"""Represent the L L M Response type."""
|
||||
content: str
|
||||
model: str
|
||||
usage: Dict[str, int] = field(default_factory=dict)
|
||||
@@ -25,12 +27,13 @@ class LLMResponse:
|
||||
|
||||
@property
|
||||
def is_success(self) -> bool:
|
||||
"""Return whether success for the L L M Response instance."""
|
||||
return self.error is None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMConfig:
|
||||
"""LLM配置"""
|
||||
"""Define configuration for l l m config."""
|
||||
provider: LLMProvider
|
||||
model: str
|
||||
api_key: str
|
||||
@@ -38,19 +41,20 @@ class LLMConfig:
|
||||
max_tokens: int = 4096
|
||||
temperature: float = 0.7
|
||||
top_p: float = 0.9
|
||||
timeout: int = 300 # 默认超时300秒(摘要/Skills生成可能需要较长时间)
|
||||
timeout: int = 300 # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
class BaseLLMClient(ABC):
|
||||
"""LLM客户端基类"""
|
||||
"""Represent the Base L L M Client type."""
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
"""Initialize the Base L L M Client instance."""
|
||||
self.config = config
|
||||
self._client = None
|
||||
|
||||
@abstractmethod
|
||||
def _init_client(self):
|
||||
"""初始化客户端"""
|
||||
"""Handle init client for this module for the Base L L M Client instance."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -61,18 +65,7 @@ class BaseLLMClient(ABC):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
对话补全
|
||||
|
||||
Args:
|
||||
messages: 对话消息列表 [{"role": "user/assistant/system", "content": "..."}]
|
||||
max_tokens: 最大输出token数
|
||||
temperature: 温度参数
|
||||
**kwargs: 其他参数
|
||||
|
||||
Returns:
|
||||
LLMResponse: 响应结果
|
||||
"""
|
||||
"""Handle chat for the Base L L M Client instance."""
|
||||
pass
|
||||
|
||||
def complete(
|
||||
@@ -83,18 +76,7 @@ class BaseLLMClient(ABC):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
单轮补全(便捷方法)
|
||||
|
||||
Args:
|
||||
prompt: 用户输入
|
||||
system_prompt: 系统提示词
|
||||
max_tokens: 最大输出token数
|
||||
temperature: 温度参数
|
||||
|
||||
Returns:
|
||||
LLMResponse: 响应结果
|
||||
"""
|
||||
"""Handle complete for the Base L L M Client instance."""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
@@ -104,12 +86,12 @@ class BaseLLMClient(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def get_available_models(self) -> List[str]:
|
||||
"""获取可用模型列表"""
|
||||
"""Return available models for the Base L L M Client instance."""
|
||||
pass
|
||||
|
||||
def estimate_tokens(self, text: str) -> int:
|
||||
"""估算文本token数(粗略估计)"""
|
||||
# 中文字符约1.5 token,英文约0.25 token
|
||||
"""Handle estimate tokens for the Base L L M Client instance."""
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
chinese_chars = sum(1 for c in text if '一' <= c <= '鿿')
|
||||
other_chars = len(text) - chinese_chars
|
||||
return int(chinese_chars * 1.5 + other_chars * 0.25)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DeepSeek LLM客户端 - OpenAI兼容API"""
|
||||
"""Provide service-layer logic for deepseek client."""
|
||||
|
||||
import time
|
||||
from typing import List, Dict, Optional
|
||||
@@ -6,20 +6,12 @@ from loguru import logger
|
||||
import httpx
|
||||
|
||||
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
|
||||
class DeepSeekClient(BaseLLMClient):
|
||||
"""
|
||||
DeepSeek API客户端(OpenAI兼容格式)
|
||||
|
||||
支持模型:
|
||||
- deepseek-chat
|
||||
- deepseek-coder
|
||||
- deepseek-reasoner
|
||||
- deepseek-v3
|
||||
- deepseek-v3.2
|
||||
- deepseek-v4-flash
|
||||
"""
|
||||
"""Represent the Deep Seek Client type."""
|
||||
|
||||
SUPPORTED_MODELS = [
|
||||
"deepseek-chat",
|
||||
@@ -31,13 +23,14 @@ class DeepSeekClient(BaseLLMClient):
|
||||
]
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
"""Initialize the Deep Seek Client instance."""
|
||||
if config.provider != LLMProvider.DEEPSEEK:
|
||||
raise ValueError(f"配置provider应为DEEPSEEK,实际为{config.provider}")
|
||||
super().__init__(config)
|
||||
self._init_client()
|
||||
|
||||
def _init_client(self):
|
||||
"""初始化HTTP客户端"""
|
||||
"""Handle init client for this module for the Deep Seek Client instance."""
|
||||
self._client = httpx.Client(
|
||||
base_url=self.config.base_url,
|
||||
headers={
|
||||
@@ -55,7 +48,7 @@ class DeepSeekClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> LLMResponse:
|
||||
"""对话补全"""
|
||||
"""Handle chat for the Deep Seek Client instance."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
@@ -103,11 +96,11 @@ class DeepSeekClient(BaseLLMClient):
|
||||
)
|
||||
|
||||
def get_available_models(self) -> List[str]:
|
||||
"""获取可用模型列表"""
|
||||
"""Return available models for the Deep Seek Client instance."""
|
||||
return self.SUPPORTED_MODELS
|
||||
|
||||
def close(self):
|
||||
"""关闭客户端"""
|
||||
"""Release the resources held by this component."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
|
||||
@@ -118,7 +111,7 @@ def create_deepseek_client(
|
||||
base_url: str = "http://6.86.80.4:30080/v1",
|
||||
**kwargs
|
||||
) -> DeepSeekClient:
|
||||
"""便捷函数:创建DeepSeek客户端"""
|
||||
"""Create deepseek client."""
|
||||
config = LLMConfig(
|
||||
provider=LLMProvider.DEEPSEEK,
|
||||
model=model,
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
"""文档摘要生成服务 - LLM生成法规文档摘要"""
|
||||
"""Provide service-layer logic for document summarizer."""
|
||||
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
from loguru import logger
|
||||
|
||||
from app.services.llm import get_llm_client, BaseLLMClient
|
||||
from app.services.llm.base_client import BaseLLMClient
|
||||
from app.services.llm.llm_factory import get_llm_client
|
||||
from app.services.rag.prompt_templates import get_prompt_template
|
||||
from app.config.settings import settings
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentSummary:
|
||||
"""文档摘要结果"""
|
||||
"""Represent the Document Summary type."""
|
||||
doc_name: str
|
||||
summary: str
|
||||
applicable_scope: str
|
||||
@@ -24,24 +27,12 @@ class DocumentSummary:
|
||||
|
||||
@property
|
||||
def is_success(self) -> bool:
|
||||
"""Return whether success for the Document Summary instance."""
|
||||
return self.error is None
|
||||
|
||||
|
||||
class DocumentSummarizer:
|
||||
"""
|
||||
文档摘要生成器
|
||||
|
||||
功能:
|
||||
- 生成法规文档的核心要点摘要
|
||||
- 提取适用范围
|
||||
- 突出关键条款
|
||||
- 列出合规要点
|
||||
|
||||
使用示例:
|
||||
summarizer = DocumentSummarizer()
|
||||
result = summarizer.summarize("GB 7258-2017", markdown_content)
|
||||
print(result.summary)
|
||||
"""
|
||||
"""Represent the Document Summarizer type."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -49,25 +40,18 @@ class DocumentSummarizer:
|
||||
model: str = None,
|
||||
max_tokens: int = None
|
||||
):
|
||||
"""
|
||||
初始化摘要生成器
|
||||
|
||||
Args:
|
||||
provider: LLM提供商
|
||||
model: LLM模型名称
|
||||
max_tokens: 最大输出token数
|
||||
"""
|
||||
"""Initialize the Document Summarizer instance."""
|
||||
self.provider = provider or settings.llm_provider
|
||||
self.model = model or settings.llm_model
|
||||
self.max_tokens = max_tokens or settings.rag_summary_max_tokens
|
||||
|
||||
# LLM客户端(延迟加载)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
self.llm: Optional[BaseLLMClient] = None
|
||||
|
||||
logger.info(f"摘要生成器初始化: provider={self.provider}, model={self.model}")
|
||||
|
||||
def _init_llm(self):
|
||||
"""延迟初始化LLM"""
|
||||
"""Handle init llm for this module for the Document Summarizer instance."""
|
||||
if self.llm is None:
|
||||
self.llm = get_llm_client(
|
||||
provider=self.provider,
|
||||
@@ -81,18 +65,7 @@ class DocumentSummarizer:
|
||||
regulation_type: str = "",
|
||||
max_tokens: Optional[int] = None
|
||||
) -> DocumentSummary:
|
||||
"""
|
||||
生成文档摘要
|
||||
|
||||
Args:
|
||||
doc_name: 文档名称
|
||||
content: 文档内容(Markdown格式)
|
||||
regulation_type: 法规类型
|
||||
max_tokens: 最大输出token数
|
||||
|
||||
Returns:
|
||||
DocumentSummary: 摘要结果
|
||||
"""
|
||||
"""Handle summarize for the Document Summarizer instance."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
@@ -101,23 +74,23 @@ class DocumentSummarizer:
|
||||
try:
|
||||
self._init_llm()
|
||||
|
||||
# 使用摘要模板
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
template = get_prompt_template("document_summary")
|
||||
|
||||
# 构建用户消息
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
user_content = template.user_template.format(
|
||||
doc_name=doc_name,
|
||||
content=content[:8000] # 截取前8000字符(避免超出token限制)
|
||||
content=content[:8000] # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
)
|
||||
|
||||
# 调用LLM
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
response = self.llm.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": template.system_prompt},
|
||||
{"role": "user", "content": user_content}
|
||||
],
|
||||
max_tokens=max_tokens or self.max_tokens,
|
||||
temperature=0.3 # 低温度保证摘要准确性
|
||||
temperature=0.3 # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
)
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
@@ -135,7 +108,7 @@ class DocumentSummarizer:
|
||||
error=response.error
|
||||
)
|
||||
|
||||
# 解析摘要结构
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
summary_data = self._parse_summary(response.content)
|
||||
|
||||
logger.success(f"摘要生成完成: {doc_name}, {latency_ms}ms")
|
||||
@@ -166,7 +139,7 @@ class DocumentSummarizer:
|
||||
)
|
||||
|
||||
def _parse_summary(self, content: str) -> Dict:
|
||||
"""解析摘要内容(提取结构化信息)"""
|
||||
"""Handle parse summary for this module for the Document Summarizer instance."""
|
||||
result = {
|
||||
"summary": content,
|
||||
"applicable_scope": "",
|
||||
@@ -175,26 +148,26 @@ class DocumentSummarizer:
|
||||
"compliance_points": []
|
||||
}
|
||||
|
||||
# 简单解析(提取关键信息)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
lines = content.split("\n")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# 提取适用范围
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "适用范围" in line or "适用对象" in line:
|
||||
result["applicable_scope"] = line.split(":")[-1].strip() if ":" in line else line.split(":")[-1].strip()
|
||||
|
||||
# 提取关键条款
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if line.startswith("- 【条款") or line.startswith("【条款"):
|
||||
result["key_clauses"].append(line)
|
||||
|
||||
# 提取关键术语
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "关键术语" in line or "术语定义" in line:
|
||||
# 继续读取后续几行
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
pass
|
||||
|
||||
# 提取合规要点
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "合规要点" in line or "必须满足" in line:
|
||||
pass
|
||||
|
||||
@@ -204,15 +177,7 @@ class DocumentSummarizer:
|
||||
self,
|
||||
documents: list
|
||||
) -> list:
|
||||
"""
|
||||
批量生成摘要
|
||||
|
||||
Args:
|
||||
documents: 文档列表 [{"doc_name": str, "content": str}, ...]
|
||||
|
||||
Returns:
|
||||
list: 摘要结果列表
|
||||
"""
|
||||
"""Handle batch summarize for the Document Summarizer instance."""
|
||||
results = []
|
||||
for doc in documents:
|
||||
result = self.summarize(doc["doc_name"], doc["content"])
|
||||
@@ -225,6 +190,6 @@ def summarize_document(
|
||||
content: str,
|
||||
**kwargs
|
||||
) -> DocumentSummary:
|
||||
"""便捷函数:生成文档摘要"""
|
||||
"""Handle summarize document."""
|
||||
summarizer = DocumentSummarizer(**kwargs)
|
||||
return summarizer.summarize(doc_name, content)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""LLM工厂 - 统一创建和管理LLM客户端"""
|
||||
"""Provide service-layer logic for llm factory."""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
from loguru import logger
|
||||
@@ -7,16 +7,18 @@ from functools import lru_cache
|
||||
from .base_client import BaseLLMClient, LLMConfig, LLMProvider, LLMResponse
|
||||
from .deepseek_client import DeepSeekClient
|
||||
from .qwen_client import QwenClient, QwenVLClient
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
# 默认模型映射
|
||||
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
DEFAULT_MODELS = {
|
||||
LLMProvider.DEEPSEEK: "deepseek-v4-flash",
|
||||
LLMProvider.QWEN: "qwen3.5-flash",
|
||||
LLMProvider.QWEN_VL: "qwen3-vl-plus"
|
||||
}
|
||||
|
||||
# API基础URL(使用统一代理服务)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
DEFAULT_BASE_URLS = {
|
||||
LLMProvider.DEEPSEEK: "http://6.86.80.4:30080/v1",
|
||||
LLMProvider.QWEN: "http://6.86.80.4:30080/v1",
|
||||
@@ -25,31 +27,13 @@ DEFAULT_BASE_URLS = {
|
||||
|
||||
|
||||
class LLMFactory:
|
||||
"""
|
||||
LLM客户端工厂(支持全局缓存)
|
||||
"""Represent the L L M Factory type."""
|
||||
|
||||
支持的提供商和模型:
|
||||
- DeepSeek: deepseek-chat (DeepSeek-V3), deepseek-coder
|
||||
- Qwen: qwen-turbo, qwen-plus, qwen-max, qwen-long
|
||||
- QwenVL: qwen-vl-plus, qwen-vl-max (多模态)
|
||||
|
||||
使用示例:
|
||||
factory = LLMFactory()
|
||||
|
||||
# 使用默认配置
|
||||
client = factory.create("deepseek")
|
||||
|
||||
# 自定义配置
|
||||
client = factory.create("qwen", model="qwen-max", temperature=0.5)
|
||||
|
||||
# 调用LLM
|
||||
response = client.complete("你好,介绍一下自己")
|
||||
"""
|
||||
|
||||
# 全局客户端缓存(类级别,跨实例共享)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
_global_instances: Dict[str, BaseLLMClient] = {}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the L L M Factory instance."""
|
||||
self._config_cache: Dict[str, Any] = {}
|
||||
|
||||
def create(
|
||||
@@ -62,24 +46,10 @@ class LLMFactory:
|
||||
temperature: float = 0.7,
|
||||
**kwargs
|
||||
) -> BaseLLMClient:
|
||||
"""
|
||||
创建LLM客户端
|
||||
|
||||
Args:
|
||||
provider: 提供商名称 ("deepseek", "qwen", "qwen_vl")
|
||||
api_key: API密钥(如未提供,从环境变量获取)
|
||||
model: 模型名称(如未提供,使用默认模型)
|
||||
base_url: API基础URL
|
||||
max_tokens: 最大输出token数
|
||||
temperature: 温度参数
|
||||
**kwargs: 其他配置参数
|
||||
|
||||
Returns:
|
||||
BaseLLMClient: LLM客户端实例
|
||||
"""
|
||||
"""Handle create for the L L M Factory instance."""
|
||||
provider_enum = self._parse_provider(provider)
|
||||
|
||||
# 获取配置
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
api_key = api_key or self._get_api_key(provider_enum)
|
||||
model = model or DEFAULT_MODELS.get(provider_enum)
|
||||
base_url = base_url or DEFAULT_BASE_URLS.get(provider_enum)
|
||||
@@ -87,7 +57,7 @@ class LLMFactory:
|
||||
if not api_key:
|
||||
raise ValueError(f"缺少API密钥,请设置环境变量或传入api_key参数")
|
||||
|
||||
# 检查全局缓存
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
cache_key = f"{provider}_{model}"
|
||||
if cache_key in LLMFactory._global_instances:
|
||||
logger.debug(f"使用缓存的LLM客户端: {cache_key}")
|
||||
@@ -103,17 +73,17 @@ class LLMFactory:
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# 创建客户端
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
client = self._create_client(config)
|
||||
|
||||
# 缓存到全局实例
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
LLMFactory._global_instances[cache_key] = client
|
||||
|
||||
logger.info(f"LLM客户端创建成功并缓存: {provider} - {model}")
|
||||
return client
|
||||
|
||||
def _parse_provider(self, provider: str) -> LLMProvider:
|
||||
"""解析提供商名称"""
|
||||
"""Handle parse provider for this module for the L L M Factory instance."""
|
||||
provider_map = {
|
||||
"deepseek": LLMProvider.DEEPSEEK,
|
||||
"deepseek-v3": LLMProvider.DEEPSEEK,
|
||||
@@ -137,7 +107,7 @@ class LLMFactory:
|
||||
return provider_map[provider_lower]
|
||||
|
||||
def _get_api_key(self, provider: LLMProvider) -> Optional[str]:
|
||||
"""从环境变量获取API密钥"""
|
||||
"""Handle get api key for this module for the L L M Factory instance."""
|
||||
import os
|
||||
|
||||
key_map = {
|
||||
@@ -154,7 +124,7 @@ class LLMFactory:
|
||||
return None
|
||||
|
||||
def _create_client(self, config: LLMConfig) -> BaseLLMClient:
|
||||
"""创建具体客户端"""
|
||||
"""Handle create client for this module for the L L M Factory instance."""
|
||||
client_map = {
|
||||
LLMProvider.DEEPSEEK: DeepSeekClient,
|
||||
LLMProvider.QWEN: QwenClient,
|
||||
@@ -168,14 +138,14 @@ class LLMFactory:
|
||||
return client_class(config)
|
||||
|
||||
def get_cached(self, provider: str, model: Optional[str] = None) -> Optional[BaseLLMClient]:
|
||||
"""获取缓存的客户端"""
|
||||
"""Return cached for the L L M Factory instance."""
|
||||
provider_enum = self._parse_provider(provider)
|
||||
model = model or DEFAULT_MODELS.get(provider_enum)
|
||||
cache_key = f"{provider}_{model}"
|
||||
return LLMFactory._global_instances.get(cache_key)
|
||||
|
||||
def list_available_providers(self) -> Dict[str, list]:
|
||||
"""列出可用的提供商和模型"""
|
||||
"""List available providers for the L L M Factory instance."""
|
||||
return {
|
||||
"deepseek": DeepSeekClient.SUPPORTED_MODELS,
|
||||
"qwen": QwenClient.SUPPORTED_MODELS,
|
||||
@@ -184,12 +154,7 @@ class LLMFactory:
|
||||
|
||||
@classmethod
|
||||
def preload_clients(cls, providers: list = None):
|
||||
"""
|
||||
预加载LLM客户端(应用启动时调用)
|
||||
|
||||
Args:
|
||||
providers: 要预加载的提供商列表,默认加载qwen和deepseek
|
||||
"""
|
||||
"""Handle preload clients for the L L M Factory instance."""
|
||||
if providers is None:
|
||||
providers = ["qwen", "deepseek"]
|
||||
|
||||
@@ -203,9 +168,9 @@ class LLMFactory:
|
||||
|
||||
@classmethod
|
||||
def get_global_client(cls, provider: str, model: Optional[str] = None) -> Optional[BaseLLMClient]:
|
||||
"""获取全局缓存的客户端"""
|
||||
"""Return global client for the L L M Factory instance."""
|
||||
provider_lower = provider.lower()
|
||||
# 处理模型名作为provider的情况(如 qwen3.5-flash)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if provider_lower.startswith("qwen"):
|
||||
provider_lower = "qwen"
|
||||
model = model or DEFAULT_MODELS.get(LLMProvider.QWEN if provider_lower == "qwen" else LLMProvider.DEEPSEEK)
|
||||
@@ -214,7 +179,7 @@ class LLMFactory:
|
||||
|
||||
@classmethod
|
||||
def cleanup(cls):
|
||||
"""清理所有缓存的客户端"""
|
||||
"""Handle cleanup for the L L M Factory instance."""
|
||||
for cache_key, client in cls._global_instances.items():
|
||||
try:
|
||||
client.close()
|
||||
@@ -227,7 +192,7 @@ class LLMFactory:
|
||||
|
||||
@lru_cache
|
||||
def get_llm_factory() -> LLMFactory:
|
||||
"""获取LLM工厂实例(缓存)"""
|
||||
"""Return llm factory."""
|
||||
return LLMFactory()
|
||||
|
||||
|
||||
@@ -236,20 +201,10 @@ def get_llm_client(
|
||||
model: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> BaseLLMClient:
|
||||
"""
|
||||
便捷函数:获取LLM客户端(优先使用缓存)
|
||||
|
||||
Args:
|
||||
provider: 提供商名称
|
||||
model: 模型名称
|
||||
**kwargs: 其他配置
|
||||
|
||||
Returns:
|
||||
BaseLLMClient: LLM客户端实例
|
||||
"""
|
||||
"""Return llm client."""
|
||||
factory = get_llm_factory()
|
||||
|
||||
# 先尝试获取缓存的实例
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
cached = factory.get_cached(provider, model)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Qwen LLM客户端 - 支持OpenAI兼容API格式"""
|
||||
"""Provide service-layer logic for qwen client."""
|
||||
|
||||
import time
|
||||
import json
|
||||
@@ -7,21 +7,12 @@ from loguru import logger
|
||||
import httpx
|
||||
|
||||
from .base_client import BaseLLMClient, LLMResponse, LLMConfig, LLMProvider
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
|
||||
class QwenClient(BaseLLMClient):
|
||||
"""
|
||||
Qwen API客户端(OpenAI兼容格式)
|
||||
|
||||
支持通过new-api等代理服务调用:
|
||||
- qwen-turbo
|
||||
- qwen-plus
|
||||
- qwen-max
|
||||
- qwen3.5-flash (推荐:快速响应)
|
||||
- qwen3.5-plus
|
||||
- qwen-long
|
||||
- qwen2.5系列
|
||||
"""
|
||||
"""Represent the Qwen Client type."""
|
||||
|
||||
SUPPORTED_MODELS = [
|
||||
"qwen-turbo",
|
||||
@@ -39,14 +30,15 @@ class QwenClient(BaseLLMClient):
|
||||
]
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
"""Initialize the Qwen Client instance."""
|
||||
if config.provider not in [LLMProvider.QWEN, LLMProvider.QWEN_VL]:
|
||||
raise ValueError(f"配置provider应为Qwen,实际为{config.provider}")
|
||||
super().__init__(config)
|
||||
self._init_client()
|
||||
|
||||
def _init_client(self):
|
||||
"""初始化HTTP客户端"""
|
||||
# OpenAI兼容API格式
|
||||
"""Handle init client for this module for the Qwen Client instance."""
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
self._client = httpx.Client(
|
||||
base_url=self.config.base_url,
|
||||
headers={
|
||||
@@ -64,11 +56,11 @@ class QwenClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> LLMResponse:
|
||||
"""对话补全(OpenAI兼容格式)"""
|
||||
"""Handle chat for the Qwen Client instance."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# OpenAI兼容格式的请求体
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
"messages": messages,
|
||||
@@ -78,7 +70,7 @@ class QwenClient(BaseLLMClient):
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# OpenAI兼容接口路径
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
response = self._client.post("/chat/completions", json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
@@ -86,7 +78,7 @@ class QwenClient(BaseLLMClient):
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# OpenAI兼容格式的响应解析
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
choices = data.get("choices", [{}])
|
||||
message = choices[0].get("message", {})
|
||||
|
||||
@@ -121,42 +113,33 @@ class QwenClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Generator[str, None, None]:
|
||||
"""
|
||||
流式对话补全(SSE格式)
|
||||
|
||||
Yields:
|
||||
str: 每次返回一个文本片段
|
||||
|
||||
使用示例:
|
||||
for chunk in client.stream_chat(messages):
|
||||
print(chunk, end="", flush=True)
|
||||
"""
|
||||
"""Stream chat for the Qwen Client instance."""
|
||||
try:
|
||||
# OpenAI兼容格式的请求体,启用流式输出
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or self.config.max_tokens,
|
||||
"temperature": temperature or self.config.temperature,
|
||||
"top_p": kwargs.get("top_p", self.config.top_p),
|
||||
"stream": True # 启用流式输出
|
||||
"stream": True # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
}
|
||||
|
||||
# 使用stream模式发送请求
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
with self._client.stream("POST", "/chat/completions", json=payload) as response:
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
line = line.strip()
|
||||
# SSE格式: data: {...}
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:] # 移除 "data: " 前缀
|
||||
data_str = line[6:] # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
choices = data.get("choices", [])
|
||||
if not choices:
|
||||
continue # 跳过空的choices
|
||||
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
delta = choices[0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
@@ -179,41 +162,27 @@ class QwenClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
异步流式对话补全(用于FastAPI SSE响应)
|
||||
|
||||
Yields:
|
||||
str: 每次返回一个文本片段
|
||||
"""
|
||||
"""Handle async stream chat for the Qwen Client instance."""
|
||||
import asyncio
|
||||
|
||||
# 使用同步流式方法,包装为异步
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
for chunk in self.stream_chat(messages, max_tokens, temperature, **kwargs):
|
||||
yield chunk
|
||||
# 给async循环一个小延迟,让其他任务有机会执行
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
await asyncio.sleep(0)
|
||||
|
||||
def get_available_models(self) -> List[str]:
|
||||
"""获取可用模型列表"""
|
||||
"""Return available models for the Qwen Client instance."""
|
||||
return self.SUPPORTED_MODELS
|
||||
|
||||
def close(self):
|
||||
"""关闭客户端"""
|
||||
"""Release the resources held by this component."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
|
||||
|
||||
class QwenVLClient(BaseLLMClient):
|
||||
"""
|
||||
Qwen VL多模态客户端(OpenAI兼容格式)
|
||||
|
||||
支持模型:
|
||||
- qwen-vl-plus
|
||||
- qwen-vl-max
|
||||
- qwen3-vl-plus
|
||||
- qwen2-vl-7b-instruct
|
||||
- qwen2-vl-72b-instruct
|
||||
"""
|
||||
"""Represent the Qwen V L Client type."""
|
||||
|
||||
SUPPORTED_MODELS = [
|
||||
"qwen-vl-plus",
|
||||
@@ -224,13 +193,14 @@ class QwenVLClient(BaseLLMClient):
|
||||
]
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
"""Initialize the Qwen V L Client instance."""
|
||||
if config.provider != LLMProvider.QWEN_VL:
|
||||
raise ValueError(f"配置provider应为QWEN_VL,实际为{config.provider}")
|
||||
super().__init__(config)
|
||||
self._init_client()
|
||||
|
||||
def _init_client(self):
|
||||
"""初始化HTTP客户端"""
|
||||
"""Handle init client for this module for the Qwen V L Client instance."""
|
||||
self._client = httpx.Client(
|
||||
base_url=self.config.base_url,
|
||||
headers={
|
||||
@@ -248,21 +218,11 @@ class QwenVLClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> LLMResponse:
|
||||
"""多模态对话补全(OpenAI兼容格式)
|
||||
|
||||
支持图片输入,消息格式:
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
|
||||
{"type": "text", "text": "描述这张图片"}
|
||||
]
|
||||
}
|
||||
"""
|
||||
"""Handle chat for the Qwen V L Client instance."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# OpenAI兼容格式的请求体
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
"messages": messages,
|
||||
@@ -312,7 +272,7 @@ class QwenVLClient(BaseLLMClient):
|
||||
temperature: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Generator[str, None, None]:
|
||||
"""流式多模态对话补全"""
|
||||
"""Stream chat for the Qwen V L Client instance."""
|
||||
try:
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
@@ -335,7 +295,7 @@ class QwenVLClient(BaseLLMClient):
|
||||
data = json.loads(data_str)
|
||||
choices = data.get("choices", [])
|
||||
if not choices:
|
||||
continue # 跳过空的choices
|
||||
continue # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
delta = choices[0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
@@ -348,11 +308,11 @@ class QwenVLClient(BaseLLMClient):
|
||||
yield f"[ERROR: {str(e)}]"
|
||||
|
||||
def get_available_models(self) -> List[str]:
|
||||
"""获取可用模型列表"""
|
||||
"""Return available models for the Qwen V L Client instance."""
|
||||
return self.SUPPORTED_MODELS
|
||||
|
||||
def close(self):
|
||||
"""关闭客户端"""
|
||||
"""Release the resources held by this component."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
|
||||
@@ -363,7 +323,7 @@ def create_qwen_client(
|
||||
base_url: str = "http://6.86.80.4:30080/v1",
|
||||
**kwargs
|
||||
) -> QwenClient:
|
||||
"""便捷函数:创建Qwen客户端"""
|
||||
"""Create qwen client."""
|
||||
config = LLMConfig(
|
||||
provider=LLMProvider.QWEN,
|
||||
model=model,
|
||||
@@ -380,7 +340,7 @@ def create_qwen_vl_client(
|
||||
base_url: str = "http://6.86.80.4:30080/v1",
|
||||
**kwargs
|
||||
) -> QwenVLClient:
|
||||
"""便捷函数:创建QwenVL客户端"""
|
||||
"""Create qwen vl client."""
|
||||
config = LLMConfig(
|
||||
provider=LLMProvider.QWEN_VL,
|
||||
model=model,
|
||||
|
||||
Reference in New Issue
Block a user