feat: add ScoreRequest/ScoreResponse models and SCORE_API_TOKEN setting
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
128
tests/webapp/test_score_api.py
Normal file
128
tests/webapp/test_score_api.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Tests for POST /api/score endpoint."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from webapp.models import ScoreRequest, ScoreResponse
|
||||
|
||||
|
||||
class TestScoreRequest:
|
||||
def test_minimal_valid_request(self):
|
||||
"""Only required fields — question, answer, contexts."""
|
||||
req = ScoreRequest(
|
||||
question="What is CT?",
|
||||
answer="CT is imaging.",
|
||||
contexts="CT uses X-rays.",
|
||||
)
|
||||
assert req.question == "What is CT?"
|
||||
assert req.contexts == "CT uses X-rays."
|
||||
assert req.ground_truth is None
|
||||
assert req.context_separator == " |||| "
|
||||
assert req.metrics == [
|
||||
"faithfulness",
|
||||
"answer_relevancy",
|
||||
"context_recall",
|
||||
"context_precision",
|
||||
]
|
||||
|
||||
def test_contexts_split_by_separator(self):
|
||||
"""contexts_as_list() splits on context_separator."""
|
||||
req = ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="ctx1 |||| ctx2 |||| ctx3",
|
||||
context_separator=" |||| ",
|
||||
)
|
||||
assert req.contexts_as_list() == ["ctx1", "ctx2", "ctx3"]
|
||||
|
||||
def test_contexts_split_custom_separator(self):
|
||||
req = ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="a---b---c",
|
||||
context_separator="---",
|
||||
)
|
||||
assert req.contexts_as_list() == ["a", "b", "c"]
|
||||
|
||||
def test_contexts_split_single_item(self):
|
||||
req = ScoreRequest(question="q", answer="a", contexts="only one")
|
||||
assert req.contexts_as_list() == ["only one"]
|
||||
|
||||
def test_missing_question_raises(self):
|
||||
with pytest.raises(ValidationError):
|
||||
ScoreRequest(answer="a", contexts="c") # type: ignore[call-arg]
|
||||
|
||||
def test_missing_answer_raises(self):
|
||||
with pytest.raises(ValidationError):
|
||||
ScoreRequest(question="q", contexts="c") # type: ignore[call-arg]
|
||||
|
||||
def test_missing_contexts_raises(self):
|
||||
with pytest.raises(ValidationError):
|
||||
ScoreRequest(question="q", answer="a") # type: ignore[call-arg]
|
||||
|
||||
def test_custom_metrics_accepted(self):
|
||||
req = ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="c",
|
||||
metrics=["faithfulness"],
|
||||
)
|
||||
assert req.metrics == ["faithfulness"]
|
||||
|
||||
def test_invalid_metric_name_raises(self):
|
||||
with pytest.raises(ValidationError):
|
||||
ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="c",
|
||||
metrics=["not_a_metric"],
|
||||
)
|
||||
|
||||
def test_effective_metrics_drops_ground_truth_dependent_when_missing(self):
|
||||
"""Without ground_truth, GT-dependent metrics are excluded."""
|
||||
req = ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="c",
|
||||
metrics=[
|
||||
"faithfulness",
|
||||
"context_recall",
|
||||
"factual_correctness",
|
||||
"semantic_similarity",
|
||||
"noise_sensitivity",
|
||||
],
|
||||
)
|
||||
effective = req.effective_metrics()
|
||||
assert "faithfulness" in effective
|
||||
assert "context_recall" not in effective
|
||||
assert "factual_correctness" not in effective
|
||||
assert "semantic_similarity" not in effective
|
||||
assert "noise_sensitivity" not in effective
|
||||
|
||||
def test_effective_metrics_keeps_all_when_ground_truth_present(self):
|
||||
req = ScoreRequest(
|
||||
question="q",
|
||||
answer="a",
|
||||
contexts="c",
|
||||
ground_truth="gt",
|
||||
metrics=["faithfulness", "context_recall", "factual_correctness"],
|
||||
)
|
||||
effective = req.effective_metrics()
|
||||
assert effective == [
|
||||
"faithfulness",
|
||||
"context_recall",
|
||||
"factual_correctness",
|
||||
]
|
||||
|
||||
|
||||
class TestScoreResponse:
|
||||
def test_score_response_structure(self):
|
||||
resp = ScoreResponse(
|
||||
scores={"faithfulness": 0.85, "answer_relevancy": None},
|
||||
weighted_score=0.85,
|
||||
latency_ms=1200,
|
||||
)
|
||||
assert resp.scores["faithfulness"] == 0.85
|
||||
assert resp.scores["answer_relevancy"] is None
|
||||
assert resp.latency_ms == 1200
|
||||
Reference in New Issue
Block a user