siemens_ragas/tests/test_weights.py

"""Unit tests for rag_eval/metrics/weights.py"""
import math

import pytest

from rag_eval.metrics.weights import (
    compute_overall_weighted_score_mean,
    compute_weighted_score,
    resolve_weight,
    weighted_metric_means,
)


class TestResolveWeight:
    def test_returns_value_when_key_present(self):
        assert resolve_weight({"faith": 0.5}, "faith") == 0.5

    def test_returns_default_when_key_missing(self):
        assert resolve_weight({}, "faith") == 1.0

    def test_returns_custom_default_when_key_missing(self):
        assert resolve_weight({}, "faith", default=2.0) == 2.0

    def test_empty_dict_returns_default(self):
        assert resolve_weight({}, "anything") == 1.0


class TestComputeWeightedScore:
    def test_equal_weights_is_simple_mean(self):
        scores = {"faithfulness": 0.8, "context_recall": 0.6}
        result = compute_weighted_score(scores, {})
        assert result == pytest.approx(0.7, rel=1e-4)

    def test_explicit_weights(self):
        scores = {"faithfulness": 1.0, "context_recall": 0.0}
        weights = {"faithfulness": 3.0, "context_recall": 1.0}
        result = compute_weighted_score(scores, weights)
        assert result == pytest.approx(0.75, rel=1e-4)

    def test_nan_values_excluded(self):
        scores = {"faithfulness": float("nan"), "context_recall": 0.8}
        result = compute_weighted_score(scores, {})
        assert result == pytest.approx(0.8, rel=1e-4)

    def test_none_values_excluded(self):
        scores = {"faithfulness": None, "context_recall": 0.6}
        result = compute_weighted_score(scores, {})
        assert result == pytest.approx(0.6, rel=1e-4)

    def test_all_nan_returns_none(self):
        scores = {"faithfulness": float("nan"), "context_recall": float("nan")}
        assert compute_weighted_score(scores, {}) is None

    def test_empty_scores_returns_none(self):
        assert compute_weighted_score({}, {}) is None

    def test_missing_metric_in_weights_uses_default_1(self):
        scores = {"faithfulness": 0.8, "context_recall": 0.4}
        weights = {"faithfulness": 2.0}
        result = compute_weighted_score(scores, weights)
        assert result == pytest.approx(2.0 / 3, rel=1e-4)


class TestWeightedMetricMeans:
    def _rows(self):
        return [
            {"doc_name": "a.pdf", "faithfulness": 1.0, "context_recall": 0.5},
            {"doc_name": "b.pdf", "faithfulness": 0.6, "context_recall": 0.8},
        ]

    def test_equal_weights_gives_arithmetic_mean(self):
        rows = self._rows()
        result = weighted_metric_means(rows, ["faithfulness", "context_recall"], {})
        assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)
        assert result["context_recall"] == pytest.approx(0.65, rel=1e-4)

    def test_doc_weight_amplifies_contribution(self):
        rows = self._rows()
        doc_weights = {"a.pdf": 3.0, "b.pdf": 1.0}
        result = weighted_metric_means(rows, ["faithfulness"], doc_weights)
        assert result["faithfulness"] == pytest.approx(0.9, rel=1e-4)

    def test_nan_rows_skipped_per_metric(self):
        rows = [
            {"doc_name": "a.pdf", "faithfulness": float("nan"), "context_recall": 0.5},
            {"doc_name": "b.pdf", "faithfulness": 0.8, "context_recall": 0.9},
        ]
        result = weighted_metric_means(rows, ["faithfulness", "context_recall"], {})
        assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)
        assert result["context_recall"] == pytest.approx(0.7, rel=1e-4)

    def test_missing_metric_column_returns_none(self):
        rows = [{"doc_name": "a.pdf", "faithfulness": 0.8}]
        result = weighted_metric_means(rows, ["faithfulness", "unknown_metric"], {})
        assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)
        assert result["unknown_metric"] is None

    def test_empty_rows_returns_none_for_all(self):
        result = weighted_metric_means([], ["faithfulness"], {})
        assert result["faithfulness"] is None


class TestComputeOverallWeightedScoreMean:
    def test_basic_weighted_mean_of_weighted_scores(self):
        rows = [
            {"doc_name": "a.pdf", "faithfulness": 1.0, "context_recall": 0.0},
            {"doc_name": "b.pdf", "faithfulness": 0.5, "context_recall": 0.5},
        ]
        metric_weights = {"faithfulness": 1.0, "context_recall": 1.0}
        result = compute_overall_weighted_score_mean(rows, metric_weights, {})
        assert result == pytest.approx(0.5, rel=1e-4)

    def test_doc_weight_amplifies_sample(self):
        rows = [
            {"doc_name": "important.pdf", "faithfulness": 1.0},
            {"doc_name": "other.pdf", "faithfulness": 0.0},
        ]
        doc_weights = {"important.pdf": 9.0, "other.pdf": 1.0}
        result = compute_overall_weighted_score_mean(rows, {}, doc_weights)
        assert result == pytest.approx(0.9, rel=1e-4)

    def test_all_nan_returns_none(self):
        rows = [{"doc_name": "a.pdf", "faithfulness": float("nan")}]
        assert compute_overall_weighted_score_mean(rows, {}, {}) is None
feat: add metric/doc weight computation module (weights.py) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> 2026-06-18 16:47:47 +08:00			`"""Unit tests for rag_eval/metrics/weights.py"""`
			`import math`

			`import pytest`

			`from rag_eval.metrics.weights import (`
			`compute_overall_weighted_score_mean,`
			`compute_weighted_score,`
			`resolve_weight,`
			`weighted_metric_means,`
			`)`


			`class TestResolveWeight:`
			`def test_returns_value_when_key_present(self):`
			`assert resolve_weight({"faith": 0.5}, "faith") == 0.5`

			`def test_returns_default_when_key_missing(self):`
			`assert resolve_weight({}, "faith") == 1.0`

			`def test_returns_custom_default_when_key_missing(self):`
			`assert resolve_weight({}, "faith", default=2.0) == 2.0`

			`def test_empty_dict_returns_default(self):`
			`assert resolve_weight({}, "anything") == 1.0`


			`class TestComputeWeightedScore:`
			`def test_equal_weights_is_simple_mean(self):`
			`scores = {"faithfulness": 0.8, "context_recall": 0.6}`
			`result = compute_weighted_score(scores, {})`
			`assert result == pytest.approx(0.7, rel=1e-4)`

			`def test_explicit_weights(self):`
			`scores = {"faithfulness": 1.0, "context_recall": 0.0}`
			`weights = {"faithfulness": 3.0, "context_recall": 1.0}`
			`result = compute_weighted_score(scores, weights)`
			`assert result == pytest.approx(0.75, rel=1e-4)`

			`def test_nan_values_excluded(self):`
			`scores = {"faithfulness": float("nan"), "context_recall": 0.8}`
			`result = compute_weighted_score(scores, {})`
			`assert result == pytest.approx(0.8, rel=1e-4)`

			`def test_none_values_excluded(self):`
			`scores = {"faithfulness": None, "context_recall": 0.6}`
			`result = compute_weighted_score(scores, {})`
			`assert result == pytest.approx(0.6, rel=1e-4)`

			`def test_all_nan_returns_none(self):`
			`scores = {"faithfulness": float("nan"), "context_recall": float("nan")}`
			`assert compute_weighted_score(scores, {}) is None`

			`def test_empty_scores_returns_none(self):`
			`assert compute_weighted_score({}, {}) is None`

			`def test_missing_metric_in_weights_uses_default_1(self):`
			`scores = {"faithfulness": 0.8, "context_recall": 0.4}`
			`weights = {"faithfulness": 2.0}`
			`result = compute_weighted_score(scores, weights)`
			`assert result == pytest.approx(2.0 / 3, rel=1e-4)`


			`class TestWeightedMetricMeans:`
			`def _rows(self):`
			`return [`
			`{"doc_name": "a.pdf", "faithfulness": 1.0, "context_recall": 0.5},`
			`{"doc_name": "b.pdf", "faithfulness": 0.6, "context_recall": 0.8},`
			`]`

			`def test_equal_weights_gives_arithmetic_mean(self):`
			`rows = self._rows()`
			`result = weighted_metric_means(rows, ["faithfulness", "context_recall"], {})`
			`assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)`
			`assert result["context_recall"] == pytest.approx(0.65, rel=1e-4)`

			`def test_doc_weight_amplifies_contribution(self):`
			`rows = self._rows()`
			`doc_weights = {"a.pdf": 3.0, "b.pdf": 1.0}`
			`result = weighted_metric_means(rows, ["faithfulness"], doc_weights)`
			`assert result["faithfulness"] == pytest.approx(0.9, rel=1e-4)`

			`def test_nan_rows_skipped_per_metric(self):`
			`rows = [`
			`{"doc_name": "a.pdf", "faithfulness": float("nan"), "context_recall": 0.5},`
			`{"doc_name": "b.pdf", "faithfulness": 0.8, "context_recall": 0.9},`
			`]`
			`result = weighted_metric_means(rows, ["faithfulness", "context_recall"], {})`
			`assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)`
			`assert result["context_recall"] == pytest.approx(0.7, rel=1e-4)`

			`def test_missing_metric_column_returns_none(self):`
			`rows = [{"doc_name": "a.pdf", "faithfulness": 0.8}]`
			`result = weighted_metric_means(rows, ["faithfulness", "unknown_metric"], {})`
			`assert result["faithfulness"] == pytest.approx(0.8, rel=1e-4)`
			`assert result["unknown_metric"] is None`

			`def test_empty_rows_returns_none_for_all(self):`
			`result = weighted_metric_means([], ["faithfulness"], {})`
			`assert result["faithfulness"] is None`


			`class TestComputeOverallWeightedScoreMean:`
			`def test_basic_weighted_mean_of_weighted_scores(self):`
			`rows = [`
			`{"doc_name": "a.pdf", "faithfulness": 1.0, "context_recall": 0.0},`
			`{"doc_name": "b.pdf", "faithfulness": 0.5, "context_recall": 0.5},`
			`]`
			`metric_weights = {"faithfulness": 1.0, "context_recall": 1.0}`
			`result = compute_overall_weighted_score_mean(rows, metric_weights, {})`
			`assert result == pytest.approx(0.5, rel=1e-4)`

			`def test_doc_weight_amplifies_sample(self):`
			`rows = [`
			`{"doc_name": "important.pdf", "faithfulness": 1.0},`
			`{"doc_name": "other.pdf", "faithfulness": 0.0},`
			`]`
			`doc_weights = {"important.pdf": 9.0, "other.pdf": 1.0}`
			`result = compute_overall_weighted_score_mean(rows, {}, doc_weights)`
			`assert result == pytest.approx(0.9, rel=1e-4)`

			`def test_all_nan_returns_none(self):`
			`rows = [{"doc_name": "a.pdf", "faithfulness": float("nan")}]`
			`assert compute_overall_weighted_score_mean(rows, {}, {}) is None`