Fix SSE route dependency and align architecture docs
This commit is contained in:
@@ -1,17 +1,20 @@
|
||||
"""文档摘要生成服务 - LLM生成法规文档摘要"""
|
||||
"""Provide service-layer logic for document summarizer."""
|
||||
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
from loguru import logger
|
||||
|
||||
from app.services.llm import get_llm_client, BaseLLMClient
|
||||
from app.services.llm.base_client import BaseLLMClient
|
||||
from app.services.llm.llm_factory import get_llm_client
|
||||
from app.services.rag.prompt_templates import get_prompt_template
|
||||
from app.config.settings import settings
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentSummary:
|
||||
"""文档摘要结果"""
|
||||
"""Represent the Document Summary type."""
|
||||
doc_name: str
|
||||
summary: str
|
||||
applicable_scope: str
|
||||
@@ -24,24 +27,12 @@ class DocumentSummary:
|
||||
|
||||
@property
|
||||
def is_success(self) -> bool:
|
||||
"""Return whether success for the Document Summary instance."""
|
||||
return self.error is None
|
||||
|
||||
|
||||
class DocumentSummarizer:
|
||||
"""
|
||||
文档摘要生成器
|
||||
|
||||
功能:
|
||||
- 生成法规文档的核心要点摘要
|
||||
- 提取适用范围
|
||||
- 突出关键条款
|
||||
- 列出合规要点
|
||||
|
||||
使用示例:
|
||||
summarizer = DocumentSummarizer()
|
||||
result = summarizer.summarize("GB 7258-2017", markdown_content)
|
||||
print(result.summary)
|
||||
"""
|
||||
"""Represent the Document Summarizer type."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -49,25 +40,18 @@ class DocumentSummarizer:
|
||||
model: str = None,
|
||||
max_tokens: int = None
|
||||
):
|
||||
"""
|
||||
初始化摘要生成器
|
||||
|
||||
Args:
|
||||
provider: LLM提供商
|
||||
model: LLM模型名称
|
||||
max_tokens: 最大输出token数
|
||||
"""
|
||||
"""Initialize the Document Summarizer instance."""
|
||||
self.provider = provider or settings.llm_provider
|
||||
self.model = model or settings.llm_model
|
||||
self.max_tokens = max_tokens or settings.rag_summary_max_tokens
|
||||
|
||||
# LLM客户端(延迟加载)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
self.llm: Optional[BaseLLMClient] = None
|
||||
|
||||
logger.info(f"摘要生成器初始化: provider={self.provider}, model={self.model}")
|
||||
|
||||
def _init_llm(self):
|
||||
"""延迟初始化LLM"""
|
||||
"""Handle init llm for this module for the Document Summarizer instance."""
|
||||
if self.llm is None:
|
||||
self.llm = get_llm_client(
|
||||
provider=self.provider,
|
||||
@@ -81,18 +65,7 @@ class DocumentSummarizer:
|
||||
regulation_type: str = "",
|
||||
max_tokens: Optional[int] = None
|
||||
) -> DocumentSummary:
|
||||
"""
|
||||
生成文档摘要
|
||||
|
||||
Args:
|
||||
doc_name: 文档名称
|
||||
content: 文档内容(Markdown格式)
|
||||
regulation_type: 法规类型
|
||||
max_tokens: 最大输出token数
|
||||
|
||||
Returns:
|
||||
DocumentSummary: 摘要结果
|
||||
"""
|
||||
"""Handle summarize for the Document Summarizer instance."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
@@ -101,23 +74,23 @@ class DocumentSummarizer:
|
||||
try:
|
||||
self._init_llm()
|
||||
|
||||
# 使用摘要模板
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
template = get_prompt_template("document_summary")
|
||||
|
||||
# 构建用户消息
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
user_content = template.user_template.format(
|
||||
doc_name=doc_name,
|
||||
content=content[:8000] # 截取前8000字符(避免超出token限制)
|
||||
content=content[:8000] # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
)
|
||||
|
||||
# 调用LLM
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
response = self.llm.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": template.system_prompt},
|
||||
{"role": "user", "content": user_content}
|
||||
],
|
||||
max_tokens=max_tokens or self.max_tokens,
|
||||
temperature=0.3 # 低温度保证摘要准确性
|
||||
temperature=0.3 # Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
)
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
@@ -135,7 +108,7 @@ class DocumentSummarizer:
|
||||
error=response.error
|
||||
)
|
||||
|
||||
# 解析摘要结构
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
summary_data = self._parse_summary(response.content)
|
||||
|
||||
logger.success(f"摘要生成完成: {doc_name}, {latency_ms}ms")
|
||||
@@ -166,7 +139,7 @@ class DocumentSummarizer:
|
||||
)
|
||||
|
||||
def _parse_summary(self, content: str) -> Dict:
|
||||
"""解析摘要内容(提取结构化信息)"""
|
||||
"""Handle parse summary for this module for the Document Summarizer instance."""
|
||||
result = {
|
||||
"summary": content,
|
||||
"applicable_scope": "",
|
||||
@@ -175,26 +148,26 @@ class DocumentSummarizer:
|
||||
"compliance_points": []
|
||||
}
|
||||
|
||||
# 简单解析(提取关键信息)
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
lines = content.split("\n")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# 提取适用范围
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "适用范围" in line or "适用对象" in line:
|
||||
result["applicable_scope"] = line.split(":")[-1].strip() if ":" in line else line.split(":")[-1].strip()
|
||||
|
||||
# 提取关键条款
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if line.startswith("- 【条款") or line.startswith("【条款"):
|
||||
result["key_clauses"].append(line)
|
||||
|
||||
# 提取关键术语
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "关键术语" in line or "术语定义" in line:
|
||||
# 继续读取后续几行
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
pass
|
||||
|
||||
# 提取合规要点
|
||||
# Keep provider-specific behavior explicit so debugging stays straightforward.
|
||||
if "合规要点" in line or "必须满足" in line:
|
||||
pass
|
||||
|
||||
@@ -204,15 +177,7 @@ class DocumentSummarizer:
|
||||
self,
|
||||
documents: list
|
||||
) -> list:
|
||||
"""
|
||||
批量生成摘要
|
||||
|
||||
Args:
|
||||
documents: 文档列表 [{"doc_name": str, "content": str}, ...]
|
||||
|
||||
Returns:
|
||||
list: 摘要结果列表
|
||||
"""
|
||||
"""Handle batch summarize for the Document Summarizer instance."""
|
||||
results = []
|
||||
for doc in documents:
|
||||
result = self.summarize(doc["doc_name"], doc["content"])
|
||||
@@ -225,6 +190,6 @@ def summarize_document(
|
||||
content: str,
|
||||
**kwargs
|
||||
) -> DocumentSummary:
|
||||
"""便捷函数:生成文档摘要"""
|
||||
"""Handle summarize document."""
|
||||
summarizer = DocumentSummarizer(**kwargs)
|
||||
return summarizer.summarize(doc_name, content)
|
||||
|
||||
Reference in New Issue
Block a user