feat: report_builder uses weighted means; ReportData gains weighted_score_mean

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-18 17:16:09 +08:00
parent 835614189e
commit 36e5506e2a
3 changed files with 134 additions and 12 deletions

View File

@@ -13,6 +13,11 @@ from pathlib import Path
import pandas as pd
from rag_eval.metrics.weights import (
compute_overall_weighted_score_mean,
weighted_metric_means as _weighted_metric_means,
)
from webapp.services.run_reader import _read_weights_from_snapshot
from webapp.services.text_utils import parse_contexts
from webapp.models import (
DistributionBin,
@@ -42,17 +47,6 @@ def _round_or_none(value: float | None) -> float | None:
return round(float(value), 4)
def _metric_means(frame: pd.DataFrame, metrics: list[str]) -> dict[str, float | None]:
"""Compute the mean of each metric column across all scored samples."""
means: dict[str, float | None] = {}
for metric in metrics:
if metric in frame.columns:
means[metric] = _round_or_none(frame[metric].mean(numeric_only=True))
else:
means[metric] = None
return means
def _distribution(frame: pd.DataFrame, metric: str) -> list[DistributionBin]:
"""Bucket one metric's scores into fixed-width [0,1] histogram bins."""
bins: list[DistributionBin] = []
@@ -165,6 +159,7 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData:
frame = run_reader.read_scores_frame(run_dir)
summary_markdown = run_reader.read_summary_markdown(run_dir)
advice_markdown = run_reader.read_advice_markdown(run_dir)
metric_weights, doc_weights = _read_weights_from_snapshot(run_dir)
if frame.empty or not metrics:
return ReportData(
@@ -172,8 +167,20 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData:
metric_means={metric: None for metric in metrics},
summary_markdown=summary_markdown,
advice_markdown=advice_markdown,
metric_weights=metric_weights,
doc_weights=doc_weights,
)
score_rows_list = frame.to_dict(orient="records")
# Use weighted metric means (degrades to arithmetic mean when weights are empty).
w_means = _weighted_metric_means(score_rows_list, metrics, doc_weights)
rounded_means = {metric: _round_or_none(value) for metric, value in w_means.items()}
overall_ws = compute_overall_weighted_score_mean(
score_rows_list, metric_weights, doc_weights
)
distributions = {
metric: _distribution(frame, metric)
for metric in metrics
@@ -182,10 +189,13 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData:
return ReportData(
metrics=metrics,
metric_means=_metric_means(frame, metrics),
metric_means=rounded_means,
distributions=distributions,
groupings=_groupings(frame, metrics),
lowest_samples=_lowest_samples(frame, metrics),
summary_markdown=summary_markdown,
advice_markdown=advice_markdown,
weighted_score_mean=_round_or_none(overall_ws),
metric_weights=metric_weights,
doc_weights=doc_weights,
)