Files
siemens_ragas/tests/webapp/test_score_api.py

129 lines
4.2 KiB
Python

"""Tests for POST /api/score endpoint."""
from __future__ import annotations
import pytest
from pydantic import ValidationError
from webapp.models import ScoreRequest, ScoreResponse
class TestScoreRequest:
def test_minimal_valid_request(self):
"""Only required fields — question, answer, contexts."""
req = ScoreRequest(
question="What is CT?",
answer="CT is imaging.",
contexts="CT uses X-rays.",
)
assert req.question == "What is CT?"
assert req.contexts == "CT uses X-rays."
assert req.ground_truth is None
assert req.context_separator == " |||| "
assert req.metrics == [
"faithfulness",
"answer_relevancy",
"context_recall",
"context_precision",
]
def test_contexts_split_by_separator(self):
"""contexts_as_list() splits on context_separator."""
req = ScoreRequest(
question="q",
answer="a",
contexts="ctx1 |||| ctx2 |||| ctx3",
context_separator=" |||| ",
)
assert req.contexts_as_list() == ["ctx1", "ctx2", "ctx3"]
def test_contexts_split_custom_separator(self):
req = ScoreRequest(
question="q",
answer="a",
contexts="a---b---c",
context_separator="---",
)
assert req.contexts_as_list() == ["a", "b", "c"]
def test_contexts_split_single_item(self):
req = ScoreRequest(question="q", answer="a", contexts="only one")
assert req.contexts_as_list() == ["only one"]
def test_missing_question_raises(self):
with pytest.raises(ValidationError):
ScoreRequest(answer="a", contexts="c") # type: ignore[call-arg]
def test_missing_answer_raises(self):
with pytest.raises(ValidationError):
ScoreRequest(question="q", contexts="c") # type: ignore[call-arg]
def test_missing_contexts_raises(self):
with pytest.raises(ValidationError):
ScoreRequest(question="q", answer="a") # type: ignore[call-arg]
def test_custom_metrics_accepted(self):
req = ScoreRequest(
question="q",
answer="a",
contexts="c",
metrics=["faithfulness"],
)
assert req.metrics == ["faithfulness"]
def test_invalid_metric_name_raises(self):
with pytest.raises(ValidationError):
ScoreRequest(
question="q",
answer="a",
contexts="c",
metrics=["not_a_metric"],
)
def test_effective_metrics_drops_ground_truth_dependent_when_missing(self):
"""Without ground_truth, GT-dependent metrics are excluded."""
req = ScoreRequest(
question="q",
answer="a",
contexts="c",
metrics=[
"faithfulness",
"context_recall",
"factual_correctness",
"semantic_similarity",
"noise_sensitivity",
],
)
effective = req.effective_metrics()
assert "faithfulness" in effective
assert "context_recall" not in effective
assert "factual_correctness" not in effective
assert "semantic_similarity" not in effective
assert "noise_sensitivity" not in effective
def test_effective_metrics_keeps_all_when_ground_truth_present(self):
req = ScoreRequest(
question="q",
answer="a",
contexts="c",
ground_truth="gt",
metrics=["faithfulness", "context_recall", "factual_correctness"],
)
effective = req.effective_metrics()
assert effective == [
"faithfulness",
"context_recall",
"factual_correctness",
]
class TestScoreResponse:
def test_score_response_structure(self):
resp = ScoreResponse(
scores={"faithfulness": 0.85, "answer_relevancy": None},
weighted_score=0.85,
latency_ms=1200,
)
assert resp.scores["faithfulness"] == 0.85
assert resp.scores["answer_relevancy"] is None
assert resp.latency_ms == 1200