Files
siemens_ragas/scripts/smoke_advisor.py
wangwei f5c2dce64a feat(advisor): add optimization advisor module
- rag_eval/advisor/: new package with rules engine, LLM analyzer, writer
  - rules.py: 7-metric diagnostic rules (warning/critical thresholds, top-3 low samples)
  - llm_analyzer.py: Chinese optimization report via judge_model, graceful fallback
  - writer.py: writes optimization_advice.md + log summary
  - __init__.py: run_advisor() entry point (no-op when optimization_advisor=False)
- Scenario.optimization_advisor: new bool field (default False)
- ScenarioModel: same field added, loader.py透传
- RunArtifactPaths.advice_md: new path field
- factory.py: build_models() now public; build_metric_pipeline() accepts pre-built llm/embeddings
- runner.py: lifts llm, passes to pipeline and advisor; calls run_advisor() at end
- siemens online YAML: optimization_advisor: true enabled
- tests: 9 rules tests + 6 writer tests, all pass
- docs: advisor section added to engine-flow.md and architecture.md

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-16 17:06:19 +08:00

60 lines
2.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Offline smoke-check for the advisor module wiring (no network required)."""
import math
import sys
import tempfile
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from rag_eval.advisor.rules import diagnose
from rag_eval.advisor.writer import write_advice, _format_log_summary
# Simulate score_rows with low faithfulness and high noise_sensitivity
rows = [
{
"sample_id": f"s{i}",
"question": f"问题{i}西门子CT扫描的Flash技术原理是什么",
"answer": f"答案{i}Flash技术采用双源CT扫描",
"ground_truth": f"标准答案{i}Flash扫描利用双源CT和大螺距实现超低辐射剂量扫描",
"faithfulness": 0.3 + i * 0.05,
"noise_sensitivity": 0.4 + i * 0.02,
"context_recall": 0.75,
"semantic_similarity": 0.65,
}
for i in range(5)
]
diags = diagnose(rows, metrics=["faithfulness", "noise_sensitivity", "context_recall", "semantic_similarity"])
print(f"Diagnosed {len(diags)} metric(s):")
for d in diags:
print(f" {d.metric}: mean={d.mean_score}, severity={d.severity}, low_samples={len(d.low_samples)}")
assert len(diags) >= 2, f"Expected at least 2 diagnoses, got {len(diags)}"
metrics_hit = {d.metric for d in diags}
assert "faithfulness" in metrics_hit, "faithfulness should be triggered"
assert "noise_sensitivity" in metrics_hit, "noise_sensitivity should be triggered"
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "optimization_advice.md"
write_advice(
diagnoses=diags,
llm_markdown="", # fallback mode (no LLM)
advice_path=path,
scenario_name="smoke-test-siemens",
run_id="2026-06-16T00-00-00",
judge_model="deepseek-v4-flash",
)
content = path.read_text(encoding="utf-8")
assert "smoke-test-siemens" in content, "scenario name missing from report"
assert "faithfulness" in content, "faithfulness missing from report"
assert "noise_sensitivity" in content, "noise_sensitivity missing from report"
print(f"\nAdvice file ({len(content)} chars) — assertions OK")
# Verify log summary format
summary = _format_log_summary(diags, Path("optimization_advice.md"))
print(f"\nLog summary length: {len(summary)} chars, faithfulness present: {'faithfulness' in summary}")
assert "触发诊断" in summary
assert "faithfulness" in summary
print("\nSmoke check PASSED")