update
This commit is contained in:
@@ -88,3 +88,30 @@ def test_infer_metrics_excludes_weight_columns_without_snapshot(tmp_path: Path)
|
||||
)
|
||||
|
||||
assert _infer_metrics_from_scores(run_dir) == ["faithfulness"]
|
||||
|
||||
|
||||
def test_build_report_ranks_noise_sensitivity_with_lower_values_as_better(tmp_path: Path) -> None:
|
||||
"""Lowest-sample review should treat higher noise sensitivity as worse."""
|
||||
run_dir = tmp_path / "run"
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
(run_dir / "scores.csv").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"sample_id,question,noise_sensitivity",
|
||||
"s-good,q1,0.10",
|
||||
"s-warn,q2,0.30",
|
||||
"s-bad,q3,0.90",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(run_dir / "summary.md").write_text("summary", encoding="utf-8")
|
||||
(run_dir / "optimization_advice.md").write_text("", encoding="utf-8")
|
||||
|
||||
report = build_report(run_dir, ["noise_sensitivity"])
|
||||
|
||||
assert [sample.sample_id for sample in report.lowest_samples[:3]] == [
|
||||
"s-bad",
|
||||
"s-warn",
|
||||
"s-good",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user