60 lines
2.4 KiB
Python
60 lines
2.4 KiB
Python
|
|
"""Offline smoke-check for the advisor module wiring (no network required)."""
|
|||
|
|
import math
|
|||
|
|
import sys
|
|||
|
|
import tempfile
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
|
|
|||
|
|
from rag_eval.advisor.rules import diagnose
|
|||
|
|
from rag_eval.advisor.writer import write_advice, _format_log_summary
|
|||
|
|
|
|||
|
|
# Simulate score_rows with low faithfulness and high noise_sensitivity
|
|||
|
|
rows = [
|
|||
|
|
{
|
|||
|
|
"sample_id": f"s{i}",
|
|||
|
|
"question": f"问题{i}:西门子CT扫描的Flash技术原理是什么?",
|
|||
|
|
"answer": f"答案{i}:Flash技术采用双源CT扫描",
|
|||
|
|
"ground_truth": f"标准答案{i}:Flash扫描利用双源CT和大螺距实现超低辐射剂量扫描",
|
|||
|
|
"faithfulness": 0.3 + i * 0.05,
|
|||
|
|
"noise_sensitivity": 0.4 + i * 0.02,
|
|||
|
|
"context_recall": 0.75,
|
|||
|
|
"semantic_similarity": 0.65,
|
|||
|
|
}
|
|||
|
|
for i in range(5)
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
diags = diagnose(rows, metrics=["faithfulness", "noise_sensitivity", "context_recall", "semantic_similarity"])
|
|||
|
|
print(f"Diagnosed {len(diags)} metric(s):")
|
|||
|
|
for d in diags:
|
|||
|
|
print(f" {d.metric}: mean={d.mean_score}, severity={d.severity}, low_samples={len(d.low_samples)}")
|
|||
|
|
|
|||
|
|
assert len(diags) >= 2, f"Expected at least 2 diagnoses, got {len(diags)}"
|
|||
|
|
metrics_hit = {d.metric for d in diags}
|
|||
|
|
assert "faithfulness" in metrics_hit, "faithfulness should be triggered"
|
|||
|
|
assert "noise_sensitivity" in metrics_hit, "noise_sensitivity should be triggered"
|
|||
|
|
|
|||
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|||
|
|
path = Path(tmp) / "optimization_advice.md"
|
|||
|
|
write_advice(
|
|||
|
|
diagnoses=diags,
|
|||
|
|
llm_markdown="", # fallback mode (no LLM)
|
|||
|
|
advice_path=path,
|
|||
|
|
scenario_name="smoke-test-siemens",
|
|||
|
|
run_id="2026-06-16T00-00-00",
|
|||
|
|
judge_model="deepseek-v4-flash",
|
|||
|
|
)
|
|||
|
|
content = path.read_text(encoding="utf-8")
|
|||
|
|
assert "smoke-test-siemens" in content, "scenario name missing from report"
|
|||
|
|
assert "faithfulness" in content, "faithfulness missing from report"
|
|||
|
|
assert "noise_sensitivity" in content, "noise_sensitivity missing from report"
|
|||
|
|
print(f"\nAdvice file ({len(content)} chars) — assertions OK")
|
|||
|
|
|
|||
|
|
# Verify log summary format
|
|||
|
|
summary = _format_log_summary(diags, Path("optimization_advice.md"))
|
|||
|
|
print(f"\nLog summary length: {len(summary)} chars, faithfulness present: {'faithfulness' in summary}")
|
|||
|
|
assert "触发诊断" in summary
|
|||
|
|
assert "faithfulness" in summary
|
|||
|
|
|
|||
|
|
print("\nSmoke check PASSED")
|