diff --git a/.env.example b/.env.example index d78134e..de3949c 100644 --- a/.env.example +++ b/.env.example @@ -5,15 +5,15 @@ OPENAI_API_KEY=your-api-key OPENAI_BASE_URL=http://6.86.80.4:30080/v1 -OPENAI_TIMEOUT_SECONDS=30 +OPENAI_TIMEOUT_SECONDS=180 # 默认评测模型(可在场景 YAML 或 Web 控制台 LLM 配置中覆盖) RAGAS_JUDGE_MODEL=deepseek-v4-flash RAGAS_EMBEDDING_MODEL=text-embedding-v3 -# 评估并发控制 +# 评估并发控制(启用 7 个指标时建议 RAGAS_METRIC_TIMEOUT_SECONDS=300) BATCH_SIZE=8 -RAGAS_METRIC_TIMEOUT_SECONDS=45 +RAGAS_METRIC_TIMEOUT_SECONDS=300 # ===== 阿里云文档解析(dataset build 功能需要) ===== diff --git a/rag_eval/advisor/llm_analyzer.py b/rag_eval/advisor/llm_analyzer.py index df9140c..97ffc9f 100644 --- a/rag_eval/advisor/llm_analyzer.py +++ b/rag_eval/advisor/llm_analyzer.py @@ -87,8 +87,9 @@ async def analyze( try: logger.info("[advisor] calling LLM for optimization analysis scenario=%s", scenario_name) from langchain_core.messages import HumanMessage - result = await llm.agenerate(texts=[[HumanMessage(content=prompt)]]) - text = result.generations[0][0].text.strip() + # Use the underlying langchain chat model directly (RAGAS LangchainLLMWrapper wraps BaseChatModel) + response = await llm.langchain_llm.ainvoke([HumanMessage(content=prompt)]) + text = response.content.strip() logger.info("[advisor] LLM analysis complete chars=%d", len(text)) return text except Exception as exc: diff --git a/webapp/models.py b/webapp/models.py index 908b297..bcf00e2 100644 --- a/webapp/models.py +++ b/webapp/models.py @@ -73,6 +73,7 @@ class ReportData(BaseModel): groupings: dict[str, list[GroupStat]] = Field(default_factory=dict) lowest_samples: list[SampleScore] = Field(default_factory=list) summary_markdown: str = "" + advice_markdown: str = "" # optimization_advice.md content (empty if not generated) class RunDetail(BaseModel): diff --git a/webapp/services/report_builder.py b/webapp/services/report_builder.py index 1320578..5596315 100644 --- a/webapp/services/report_builder.py +++ b/webapp/services/report_builder.py @@ -164,12 +164,14 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData: """Build the full aggregated report payload for one run directory.""" frame = run_reader.read_scores_frame(run_dir) summary_markdown = run_reader.read_summary_markdown(run_dir) + advice_markdown = run_reader.read_advice_markdown(run_dir) if frame.empty or not metrics: return ReportData( metrics=metrics, metric_means={metric: None for metric in metrics}, summary_markdown=summary_markdown, + advice_markdown=advice_markdown, ) distributions = { @@ -185,4 +187,5 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData: groupings=_groupings(frame, metrics), lowest_samples=_lowest_samples(frame, metrics), summary_markdown=summary_markdown, + advice_markdown=advice_markdown, ) diff --git a/webapp/services/run_reader.py b/webapp/services/run_reader.py index 0d9eb27..ecec126 100644 --- a/webapp/services/run_reader.py +++ b/webapp/services/run_reader.py @@ -220,3 +220,14 @@ def read_summary_markdown(run_dir: Path) -> str: return summary_path.read_text(encoding="utf-8") except OSError: return "" + + +def read_advice_markdown(run_dir: Path) -> str: + """Return the optimization_advice.md for a run, or an empty string if not generated.""" + advice_path = run_dir / "optimization_advice.md" + if not advice_path.is_file(): + return "" + try: + return advice_path.read_text(encoding="utf-8") + except OSError: + return ""