- Add advisory_threshold=0.85 field to MetricRule (higher-is-better metrics) - diagnose() now emits severity='low' for scores in (warning_threshold, 0.85) - noise_sensitivity (lower-is-better) keeps its existing two-tier thresholds - writer.py: severity labels mapped to Chinese (严重/警告/待优化) - llm_analyzer.py: prompt explains low/warning/critical tiers in Chinese - Tests: 5 new cases for 'low' severity, updated log summary assertions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
94 lines
3.0 KiB
Python
94 lines
3.0 KiB
Python
"""Write optimization advice to markdown file and emit log summary."""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from pathlib import Path
|
||
|
||
from .rules import Diagnosis
|
||
|
||
logger = logging.getLogger("rag_eval.advisor")
|
||
|
||
# Chinese display labels for each severity tier.
|
||
_SEVERITY_LABEL: dict[str, str] = {
|
||
"critical": "严重",
|
||
"warning": "警告",
|
||
"low": "待优化",
|
||
}
|
||
|
||
|
||
def _format_log_summary(diagnoses: list[Diagnosis], advice_path: Path) -> str:
|
||
"""Return a single-line log summary of triggered diagnoses."""
|
||
if not diagnoses:
|
||
return "[advisor] 所有指标正常,无需优化建议。"
|
||
parts = [
|
||
f"{d.metric}({d.mean_score:.2f},{_SEVERITY_LABEL.get(d.severity, d.severity)})"
|
||
for d in diagnoses
|
||
]
|
||
triggered = " ".join(parts)
|
||
return f"[advisor] 触发诊断 {len(diagnoses)} 项: {triggered} → {advice_path}"
|
||
|
||
|
||
def _build_fallback_report(diagnoses: list[Diagnosis]) -> str:
|
||
"""Build a rules-only report when LLM analysis is unavailable."""
|
||
if not diagnoses:
|
||
return ""
|
||
lines = ["## 规则诊断(LLM 分析不可用)\n"]
|
||
for d in diagnoses:
|
||
label = _SEVERITY_LABEL.get(d.severity, d.severity)
|
||
lines.append(f"### {d.metric} [{label}] 均值={d.mean_score:.4f}")
|
||
lines.append("\n**可能原因:**")
|
||
for cause in d.root_causes:
|
||
lines.append(f"- {cause}")
|
||
lines.append("\n**建议动作:**")
|
||
for action in d.suggested_actions:
|
||
lines.append(f"- {action}")
|
||
lines.append("")
|
||
return "\n".join(lines)
|
||
|
||
|
||
def write_advice(
|
||
diagnoses: list[Diagnosis],
|
||
llm_markdown: str,
|
||
advice_path: Path,
|
||
scenario_name: str,
|
||
run_id: str,
|
||
judge_model: str,
|
||
) -> None:
|
||
"""Write optimization_advice.md and emit a log summary line.
|
||
|
||
Args:
|
||
diagnoses: List of Diagnosis from rules.diagnose().
|
||
llm_markdown: LLM-generated Markdown body. Empty string triggers fallback.
|
||
advice_path: Full path to write the .md file.
|
||
scenario_name: Human-readable scenario identifier for the report header.
|
||
run_id: Run identifier string.
|
||
judge_model: Model used for LLM analysis (shown in header).
|
||
"""
|
||
advice_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
from rag_eval.shared.utils import utc_now_iso
|
||
header_lines = [
|
||
f"# 优化建议报告 — {scenario_name}",
|
||
"",
|
||
f"- run_id: `{run_id}`",
|
||
f"- 生成时间: `{utc_now_iso()}`",
|
||
f"- judge_model: `{judge_model}`",
|
||
"",
|
||
"---",
|
||
"",
|
||
]
|
||
|
||
if not diagnoses:
|
||
body = "## ✅ 未发现明显指标异常\n\n所有指标均在正常范围内,当前 RAG 链路表现良好。\n"
|
||
elif llm_markdown:
|
||
body = llm_markdown
|
||
else:
|
||
body = _build_fallback_report(diagnoses)
|
||
|
||
content = "\n".join(header_lines) + body
|
||
advice_path.write_text(content, encoding="utf-8")
|
||
|
||
summary = _format_log_summary(diagnoses, advice_path)
|
||
logger.info(summary)
|
||
logger.info("[advisor] 优化建议已写出: %s", advice_path)
|