feat(advisor): add 0.85 advisory threshold triggering LLM suggestions
- Add advisory_threshold=0.85 field to MetricRule (higher-is-better metrics) - diagnose() now emits severity='low' for scores in (warning_threshold, 0.85) - noise_sensitivity (lower-is-better) keeps its existing two-tier thresholds - writer.py: severity labels mapped to Chinese (严重/警告/待优化) - llm_analyzer.py: prompt explains low/warning/critical tiers in Chinese - Tests: 5 new cases for 'low' severity, updated log summary assertions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -8,12 +8,22 @@ from .rules import Diagnosis
|
||||
|
||||
logger = logging.getLogger("rag_eval.advisor")
|
||||
|
||||
# Chinese display labels for each severity tier.
|
||||
_SEVERITY_LABEL: dict[str, str] = {
|
||||
"critical": "严重",
|
||||
"warning": "警告",
|
||||
"low": "待优化",
|
||||
}
|
||||
|
||||
|
||||
def _format_log_summary(diagnoses: list[Diagnosis], advice_path: Path) -> str:
|
||||
"""Return a single-line log summary of triggered diagnoses."""
|
||||
if not diagnoses:
|
||||
return "[advisor] 所有指标正常,无需优化建议。"
|
||||
parts = [f"{d.metric}({d.mean_score:.2f}, {d.severity})" for d in diagnoses]
|
||||
parts = [
|
||||
f"{d.metric}({d.mean_score:.2f},{_SEVERITY_LABEL.get(d.severity, d.severity)})"
|
||||
for d in diagnoses
|
||||
]
|
||||
triggered = " ".join(parts)
|
||||
return f"[advisor] 触发诊断 {len(diagnoses)} 项: {triggered} → {advice_path}"
|
||||
|
||||
@@ -24,7 +34,8 @@ def _build_fallback_report(diagnoses: list[Diagnosis]) -> str:
|
||||
return ""
|
||||
lines = ["## 规则诊断(LLM 分析不可用)\n"]
|
||||
for d in diagnoses:
|
||||
lines.append(f"### {d.metric} [{d.severity}] 均值={d.mean_score:.4f}")
|
||||
label = _SEVERITY_LABEL.get(d.severity, d.severity)
|
||||
lines.append(f"### {d.metric} [{label}] 均值={d.mean_score:.4f}")
|
||||
lines.append("\n**可能原因:**")
|
||||
for cause in d.root_causes:
|
||||
lines.append(f"- {cause}")
|
||||
|
||||
Reference in New Issue
Block a user