"""Regression tests for weighted webapp report aggregation.""" from __future__ import annotations from pathlib import Path import pytest from webapp.services.report_builder import build_report from webapp.services.run_reader import _infer_metrics_from_scores, _read_weights_from_snapshot def _write_run_artifacts(run_dir: Path) -> None: """Create a minimal run directory with weighted scores and a snapshot.""" run_dir.mkdir(parents=True, exist_ok=True) (run_dir / "scores.csv").write_text( "\n".join( [ "sample_id,doc_name,faithfulness,context_recall,weighted_score,sample_weight", "s1,a.pdf,1.0,0.5,0.8333,3.0", "s2,b.pdf,0.0,0.5,0.1667,1.0", ] ), encoding="utf-8", ) (run_dir / "summary.md").write_text("summary", encoding="utf-8") (run_dir / "optimization_advice.md").write_text("advice", encoding="utf-8") (run_dir / "scenario.snapshot.yaml").write_text( "\n".join( [ "metrics:", " - faithfulness", " - context_recall", "metric_weights:", " faithfulness: 2.0", " context_recall: 1.0", "doc_weights:", " a.pdf: 3.0", " b.pdf: 1.0", ] ), encoding="utf-8", ) def test_read_weights_from_snapshot_returns_metric_and_doc_weights(tmp_path: Path) -> None: """Snapshot weight reader returns both weight maps as plain float dicts.""" run_dir = tmp_path / "run" _write_run_artifacts(run_dir) metric_weights, doc_weights = _read_weights_from_snapshot(run_dir) assert metric_weights == {"faithfulness": 2.0, "context_recall": 1.0} assert doc_weights == {"a.pdf": 3.0, "b.pdf": 1.0} def test_build_report_uses_weighted_means_and_exposes_snapshot_weights(tmp_path: Path) -> None: """Report aggregation uses weighted means and surfaces snapshot weights.""" run_dir = tmp_path / "run" _write_run_artifacts(run_dir) report = build_report(run_dir, ["faithfulness", "context_recall"]) assert report.metric_means == { "faithfulness": pytest.approx(0.75, rel=1e-4), "context_recall": pytest.approx(0.5, rel=1e-4), } # 综合加权得分已暂时禁用 assert report.weighted_score_mean is None assert report.metric_weights == {"faithfulness": 2.0, "context_recall": 1.0} assert report.doc_weights == {"a.pdf": 3.0, "b.pdf": 1.0} assert report.summary_markdown == "summary" assert report.advice_markdown == "advice" def test_infer_metrics_excludes_weight_columns_without_snapshot(tmp_path: Path) -> None: """Metric inference excludes weighted helper columns from scores.csv.""" run_dir = tmp_path / "run" run_dir.mkdir(parents=True, exist_ok=True) (run_dir / "scores.csv").write_text( "\n".join( [ "sample_id,doc_name,faithfulness,weighted_score,sample_weight", "s1,a.pdf,0.8,0.8,2.0", ] ), encoding="utf-8", ) assert _infer_metrics_from_scores(run_dir) == ["faithfulness"]