feat: add weighted_score and sample_weight columns to score rows
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -183,6 +183,39 @@ class ScenarioAndDatasetTests(unittest.TestCase):
|
||||
|
||||
|
||||
class EvaluatorAndReportingTests(unittest.TestCase):
|
||||
def test_merge_score_includes_weighted_score_and_sample_weight(self):
|
||||
"""_merge_score adds weighted_score and sample_weight columns."""
|
||||
from unittest.mock import MagicMock
|
||||
from rag_eval.execution.evaluator import Evaluator
|
||||
from rag_eval.shared.models import (
|
||||
MetricScore, NormalizedSample, RuntimeConfig, Scenario, DatasetConfig,
|
||||
)
|
||||
|
||||
scenario = Scenario(
|
||||
scenario_name="w-test", mode="offline",
|
||||
dataset=DatasetConfig(path=Path("d.csv")),
|
||||
judge_model="m", embedding_model="e",
|
||||
metrics=["faithfulness", "context_recall"],
|
||||
output_dir=Path("out"),
|
||||
metric_weights={"faithfulness": 3.0, "context_recall": 1.0},
|
||||
doc_weights={"doc.pdf": 2.0},
|
||||
)
|
||||
evaluator = Evaluator(
|
||||
scenario=scenario,
|
||||
metric_pipeline=MagicMock(),
|
||||
app_adapter=None,
|
||||
)
|
||||
sample = NormalizedSample(
|
||||
sample_id="s1", question="q", contexts=["ctx"],
|
||||
answer="a", ground_truth="gt",
|
||||
metadata={"doc_name": "doc.pdf"},
|
||||
)
|
||||
score = MetricScore(metrics={"faithfulness": 1.0, "context_recall": 0.0})
|
||||
row = evaluator._merge_score(sample, score)
|
||||
# (3*1.0 + 1*0.0) / (3+1) = 0.75
|
||||
assert abs(row["weighted_score"] - 0.75) < 1e-4
|
||||
assert row["sample_weight"] == 2.0
|
||||
|
||||
def test_metric_pipeline_scores_sample(self) -> None:
|
||||
pipeline = MetricPipeline(
|
||||
metrics={
|
||||
|
||||
Reference in New Issue
Block a user