feat: add InlineScorer service with LLM client caching
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -126,3 +126,62 @@ class TestScoreResponse:
|
||||
assert resp.scores["faithfulness"] == 0.85
|
||||
assert resp.scores["answer_relevancy"] is None
|
||||
assert resp.latency_ms == 1200
|
||||
|
||||
|
||||
class TestInlineScorer:
|
||||
def test_score_returns_dict_with_requested_metrics(self):
|
||||
"""InlineScorer.score returns a dict keyed by the requested metrics."""
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from webapp.services.inline_scorer import InlineScorer
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
|
||||
mock_score = MagicMock()
|
||||
mock_score.metrics = {"faithfulness": 0.9, "answer_relevancy": 0.8}
|
||||
mock_score.error = ""
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.score_sample = AsyncMock(return_value=mock_score)
|
||||
|
||||
with patch("webapp.services.inline_scorer.build_models", return_value=(MagicMock(), MagicMock())):
|
||||
with patch("webapp.services.inline_scorer.MetricPipeline", return_value=mock_pipeline):
|
||||
with patch("webapp.services.inline_scorer._build_metric_instances", return_value={}):
|
||||
scorer = InlineScorer()
|
||||
result = scorer.score(
|
||||
question="q", answer="a",
|
||||
contexts=["ctx1"],
|
||||
ground_truth=None,
|
||||
metrics=["faithfulness", "answer_relevancy"],
|
||||
judge_model="test-model",
|
||||
embedding_model="test-embed",
|
||||
settings=EvaluationSettings(_env_file=None),
|
||||
)
|
||||
assert "faithfulness" in result
|
||||
assert "answer_relevancy" in result
|
||||
assert result["faithfulness"] == pytest.approx(0.9)
|
||||
|
||||
def test_score_converts_nan_to_none(self):
|
||||
"""NaN scores are converted to None in the returned dict."""
|
||||
import math
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from webapp.services.inline_scorer import InlineScorer
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
|
||||
mock_score = MagicMock()
|
||||
mock_score.metrics = {"faithfulness": float("nan")}
|
||||
mock_score.error = ""
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.score_sample = AsyncMock(return_value=mock_score)
|
||||
|
||||
with patch("webapp.services.inline_scorer.build_models", return_value=(MagicMock(), MagicMock())):
|
||||
with patch("webapp.services.inline_scorer.MetricPipeline", return_value=mock_pipeline):
|
||||
with patch("webapp.services.inline_scorer._build_metric_instances", return_value={}):
|
||||
scorer = InlineScorer()
|
||||
result = scorer.score(
|
||||
question="q", answer="a", contexts=["c"],
|
||||
ground_truth=None,
|
||||
metrics=["faithfulness"],
|
||||
judge_model="m", embedding_model="e",
|
||||
settings=EvaluationSettings(_env_file=None),
|
||||
)
|
||||
assert result["faithfulness"] is None
|
||||
|
||||
Reference in New Issue
Block a user