feat: add weighted_score and sample_weight columns to score rows

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-18 16:53:45 +08:00
parent 8617eaa5aa
commit d371ef7d24
2 changed files with 43 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ from rag_eval.datasets.loader import load_dataset_records
from rag_eval.datasets.normalizers import normalize_records
from rag_eval.execution.concurrency import gather_with_limit
from rag_eval.metrics.pipeline import MetricPipeline
from rag_eval.metrics.weights import compute_weighted_score, resolve_weight
from rag_eval.shared.models import EvaluationResult, InvalidSample, NormalizedSample, Scenario
from rag_eval.shared.utils import utc_now_iso
@@ -171,7 +172,7 @@ class Evaluator:
return valid, invalid
def _merge_score(self, sample: NormalizedSample, score: Any) -> dict[str, Any]:
"""Combine sample data, metric results, and run metadata into one output row."""
"""Combine sample data, metric results, run metadata, and weight columns."""
record = sample.to_record()
record["contexts"] = sample.contexts
record.update(score.metrics)
@@ -179,4 +180,12 @@ class Evaluator:
record["judge_model"] = self.scenario.judge_model
record["embedding_model"] = self.scenario.embedding_model
record["run_id"] = self.scenario.scenario_name
# Weighted score columns — enable post-hoc weighted aggregation in reporting.
record["weighted_score"] = compute_weighted_score(
score.metrics, self.scenario.metric_weights
)
doc_name = str(sample.metadata.get("doc_name", "") or "")
record["sample_weight"] = resolve_weight(
self.scenario.doc_weights, doc_name, default=1.0
)
return record