"""Provide service-layer logic for document summarizer.""" from typing import Dict, Optional from dataclasses import dataclass from loguru import logger from app.services.llm.base_client import BaseLLMClient from app.services.llm.llm_factory import get_llm_client from app.services.rag.prompt_templates import get_prompt_template from app.config.settings import settings # Keep provider-specific behavior explicit so debugging stays straightforward. @dataclass class DocumentSummary: """Represent the Document Summary type.""" doc_name: str summary: str applicable_scope: str key_clauses: list key_terms: list compliance_points: list model: str latency_ms: int error: Optional[str] = None @property def is_success(self) -> bool: """Return whether success for the Document Summary instance.""" return self.error is None class DocumentSummarizer: """Represent the Document Summarizer type.""" def __init__( self, provider: str = None, model: str = None, max_tokens: int = None ): """Initialize the Document Summarizer instance.""" self.provider = provider or settings.llm_provider self.model = model or settings.llm_model self.max_tokens = max_tokens or settings.rag_summary_max_tokens # Keep provider-specific behavior explicit so debugging stays straightforward. self.llm: Optional[BaseLLMClient] = None logger.info(f"摘要生成器初始化: provider={self.provider}, model={self.model}") def _init_llm(self): """Handle init llm for this module for the Document Summarizer instance.""" if self.llm is None: self.llm = get_llm_client( provider=self.provider, model=self.model ) def summarize( self, doc_name: str, content: str, regulation_type: str = "", max_tokens: Optional[int] = None ) -> DocumentSummary: """Handle summarize for the Document Summarizer instance.""" import time start_time = time.time() logger.info(f"生成文档摘要: {doc_name}") try: self._init_llm() # Keep provider-specific behavior explicit so debugging stays straightforward. template = get_prompt_template("document_summary") # Keep provider-specific behavior explicit so debugging stays straightforward. user_content = template.user_template.format( doc_name=doc_name, content=content[:8000] # Keep provider-specific behavior explicit so debugging stays straightforward. ) # Keep provider-specific behavior explicit so debugging stays straightforward. response = self.llm.chat( messages=[ {"role": "system", "content": template.system_prompt}, {"role": "user", "content": user_content} ], max_tokens=max_tokens or self.max_tokens, temperature=0.3 # Keep provider-specific behavior explicit so debugging stays straightforward. ) latency_ms = int((time.time() - start_time) * 1000) if not response.is_success: return DocumentSummary( doc_name=doc_name, summary="", applicable_scope="", key_clauses=[], key_terms=[], compliance_points=[], model=self.model, latency_ms=latency_ms, error=response.error ) # Keep provider-specific behavior explicit so debugging stays straightforward. summary_data = self._parse_summary(response.content) logger.success(f"摘要生成完成: {doc_name}, {latency_ms}ms") return DocumentSummary( doc_name=doc_name, summary=summary_data.get("summary", response.content), applicable_scope=summary_data.get("applicable_scope", ""), key_clauses=summary_data.get("key_clauses", []), key_terms=summary_data.get("key_terms", []), compliance_points=summary_data.get("compliance_points", []), model=response.model, latency_ms=latency_ms ) except Exception as e: logger.error(f"摘要生成失败: {e}") return DocumentSummary( doc_name=doc_name, summary="", applicable_scope="", key_clauses=[], key_terms=[], compliance_points=[], model=self.model, latency_ms=0, error=str(e) ) def _parse_summary(self, content: str) -> Dict: """Handle parse summary for this module for the Document Summarizer instance.""" result = { "summary": content, "applicable_scope": "", "key_clauses": [], "key_terms": [], "compliance_points": [] } # Keep provider-specific behavior explicit so debugging stays straightforward. lines = content.split("\n") for line in lines: line = line.strip() # Keep provider-specific behavior explicit so debugging stays straightforward. if "适用范围" in line or "适用对象" in line: result["applicable_scope"] = line.split(":")[-1].strip() if ":" in line else line.split(":")[-1].strip() # Keep provider-specific behavior explicit so debugging stays straightforward. if line.startswith("- 【条款") or line.startswith("【条款"): result["key_clauses"].append(line) # Keep provider-specific behavior explicit so debugging stays straightforward. if "关键术语" in line or "术语定义" in line: # Keep provider-specific behavior explicit so debugging stays straightforward. pass # Keep provider-specific behavior explicit so debugging stays straightforward. if "合规要点" in line or "必须满足" in line: pass return result def batch_summarize( self, documents: list ) -> list: """Handle batch summarize for the Document Summarizer instance.""" results = [] for doc in documents: result = self.summarize(doc["doc_name"], doc["content"]) results.append(result) return results def summarize_document( doc_name: str, content: str, **kwargs ) -> DocumentSummary: """Handle summarize document.""" summarizer = DocumentSummarizer(**kwargs) return summarizer.summarize(doc_name, content)