feat(advisor): add optimization advisor module
- rag_eval/advisor/: new package with rules engine, LLM analyzer, writer - rules.py: 7-metric diagnostic rules (warning/critical thresholds, top-3 low samples) - llm_analyzer.py: Chinese optimization report via judge_model, graceful fallback - writer.py: writes optimization_advice.md + log summary - __init__.py: run_advisor() entry point (no-op when optimization_advisor=False) - Scenario.optimization_advisor: new bool field (default False) - ScenarioModel: same field added, loader.py透传 - RunArtifactPaths.advice_md: new path field - factory.py: build_models() now public; build_metric_pipeline() accepts pre-built llm/embeddings - runner.py: lifts llm, passes to pipeline and advisor; calls run_advisor() at end - siemens online YAML: optimization_advisor: true enabled - tests: 9 rules tests + 6 writer tests, all pass - docs: advisor section added to engine-flow.md and architecture.md Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
99
rag_eval/advisor/llm_analyzer.py
Normal file
99
rag_eval/advisor/llm_analyzer.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""LLM-powered analysis of rule diagnostics and low-score samples."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from .rules import Diagnosis
|
||||
|
||||
logger = logging.getLogger("rag_eval.advisor")
|
||||
|
||||
_PROMPT_TEMPLATE = """\
|
||||
你是一个 RAG 系统优化专家,正在分析西门子医疗 CT 文档问答系统的评测结果。
|
||||
请用中文撰写一份优化建议报告,格式为 Markdown。
|
||||
|
||||
## 评测诊断摘要
|
||||
|
||||
{diagnosis_summary}
|
||||
|
||||
## 低分样本示例
|
||||
|
||||
{low_sample_text}
|
||||
|
||||
## 报告要求
|
||||
|
||||
1. 按指标分节(## 指标名 [severity]),先解释"为什么低"(结合低分样本具体分析),再给出"具体怎么改"
|
||||
2. "具体怎么改"要结合低分样本的实际内容,而不只是泛泛建议
|
||||
3. 最后写一节 **## 优先优化次序**,按性价比排序(不增加 LLM 调用次数的优化优先)
|
||||
4. 语言简洁,面向工程师,不要废话,不要重复列表内容
|
||||
|
||||
只输出 Markdown 报告正文,不要任何前置说明。
|
||||
"""
|
||||
|
||||
|
||||
def _build_diagnosis_summary(diagnoses: list[Diagnosis]) -> str:
|
||||
lines = []
|
||||
for d in diagnoses:
|
||||
direction = "(越低越好)" if d.metric == "noise_sensitivity" else ""
|
||||
lines.append(
|
||||
f"- **{d.metric}** {direction} 均值={d.mean_score:.4f},"
|
||||
f"阈值={d.threshold},严重程度={d.severity}"
|
||||
)
|
||||
lines.append(f" - 可能原因:{'; '.join(d.root_causes)}")
|
||||
lines.append(f" - 建议动作:{'; '.join(d.suggested_actions)}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_low_sample_text(diagnoses: list[Diagnosis]) -> str:
|
||||
lines = []
|
||||
for d in diagnoses:
|
||||
if not d.low_samples:
|
||||
continue
|
||||
lines.append(f"### {d.metric} 低分样本(最多 3 条)")
|
||||
for i, s in enumerate(d.low_samples, 1):
|
||||
score = s.get(d.metric, "N/A")
|
||||
lines.append(f"\n**样本 {i}**(分数={score})")
|
||||
lines.append(f"- 问题:{s.get('question', '')}")
|
||||
lines.append(f"- 回答:{s.get('answer', '')[:300]}")
|
||||
lines.append(f"- 标准答案:{s.get('ground_truth', '')[:200]}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def analyze(
|
||||
diagnoses: list[Diagnosis],
|
||||
llm: Any,
|
||||
scenario_name: str,
|
||||
) -> str:
|
||||
"""Call the judge LLM to generate a Chinese optimization report.
|
||||
|
||||
Args:
|
||||
diagnoses: Non-empty list of Diagnosis from rules.diagnose().
|
||||
llm: RAGAS LLM wrapper (has .agenerate() method).
|
||||
scenario_name: Used only for logging.
|
||||
|
||||
Returns:
|
||||
LLM-generated Markdown string, or "" on failure (triggers writer fallback).
|
||||
"""
|
||||
if not diagnoses:
|
||||
return ""
|
||||
|
||||
diagnosis_summary = _build_diagnosis_summary(diagnoses)
|
||||
low_sample_text = _build_low_sample_text(diagnoses)
|
||||
prompt = _PROMPT_TEMPLATE.format(
|
||||
diagnosis_summary=diagnosis_summary,
|
||||
low_sample_text=low_sample_text,
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("[advisor] calling LLM for optimization analysis scenario=%s", scenario_name)
|
||||
from langchain_core.messages import HumanMessage
|
||||
result = await llm.agenerate(texts=[[HumanMessage(content=prompt)]])
|
||||
text = result.generations[0][0].text.strip()
|
||||
logger.info("[advisor] LLM analysis complete chars=%d", len(text))
|
||||
return text
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[advisor] LLM analysis failed (%s: %s) — falling back to rule report",
|
||||
type(exc).__name__, exc,
|
||||
)
|
||||
return ""
|
||||
Reference in New Issue
Block a user