feat: add InlineScorer service with LLM client caching

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-22 15:03:43 +08:00
parent 761faf9c42
commit e4d4e4968b
2 changed files with 168 additions and 0 deletions

View File

@@ -126,3 +126,62 @@ class TestScoreResponse:
assert resp.scores["faithfulness"] == 0.85
assert resp.scores["answer_relevancy"] is None
assert resp.latency_ms == 1200
class TestInlineScorer:
def test_score_returns_dict_with_requested_metrics(self):
"""InlineScorer.score returns a dict keyed by the requested metrics."""
from unittest.mock import AsyncMock, MagicMock, patch
from webapp.services.inline_scorer import InlineScorer
from rag_eval.settings import EvaluationSettings
mock_score = MagicMock()
mock_score.metrics = {"faithfulness": 0.9, "answer_relevancy": 0.8}
mock_score.error = ""
mock_pipeline = MagicMock()
mock_pipeline.score_sample = AsyncMock(return_value=mock_score)
with patch("webapp.services.inline_scorer.build_models", return_value=(MagicMock(), MagicMock())):
with patch("webapp.services.inline_scorer.MetricPipeline", return_value=mock_pipeline):
with patch("webapp.services.inline_scorer._build_metric_instances", return_value={}):
scorer = InlineScorer()
result = scorer.score(
question="q", answer="a",
contexts=["ctx1"],
ground_truth=None,
metrics=["faithfulness", "answer_relevancy"],
judge_model="test-model",
embedding_model="test-embed",
settings=EvaluationSettings(_env_file=None),
)
assert "faithfulness" in result
assert "answer_relevancy" in result
assert result["faithfulness"] == pytest.approx(0.9)
def test_score_converts_nan_to_none(self):
"""NaN scores are converted to None in the returned dict."""
import math
from unittest.mock import AsyncMock, MagicMock, patch
from webapp.services.inline_scorer import InlineScorer
from rag_eval.settings import EvaluationSettings
mock_score = MagicMock()
mock_score.metrics = {"faithfulness": float("nan")}
mock_score.error = ""
mock_pipeline = MagicMock()
mock_pipeline.score_sample = AsyncMock(return_value=mock_score)
with patch("webapp.services.inline_scorer.build_models", return_value=(MagicMock(), MagicMock())):
with patch("webapp.services.inline_scorer.MetricPipeline", return_value=mock_pipeline):
with patch("webapp.services.inline_scorer._build_metric_instances", return_value={}):
scorer = InlineScorer()
result = scorer.score(
question="q", answer="a", contexts=["c"],
ground_truth=None,
metrics=["faithfulness"],
judge_model="m", embedding_model="e",
settings=EvaluationSettings(_env_file=None),
)
assert result["faithfulness"] is None