feat: add POST /api/score endpoint for Dify real-time scoring

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-22 15:14:19 +08:00
parent e4d4e4968b
commit a03a24be4e
3 changed files with 321 additions and 4 deletions
--- a/tests/webapp/test_score_api.py
+++ b/tests/webapp/test_score_api.py
@@ -185,3 +185,143 @@ class TestInlineScorer:
                        settings=EvaluationSettings(_env_file=None),
                    )
        assert result["faithfulness"] is None
+
+
+# ── Endpoint integration tests ────────────────────────────────────────────────
+
+@pytest.fixture()
+def client(monkeypatch):
+    """TestClient with mocked InlineScorer."""
+    import webapp.api.score as score_mod
+    from unittest.mock import MagicMock
+
+    mock_scorer = MagicMock()
+    mock_scorer.score.return_value = {
+        "faithfulness": 0.85,
+        "answer_relevancy": 0.90,
+    }
+    monkeypatch.setattr(score_mod, "inline_scorer", mock_scorer)
+
+    from webapp.server import create_app
+    return TestClient(create_app())
+
+
+from fastapi.testclient import TestClient
+
+
+class TestScoreEndpoint:
+    def test_post_score_returns_200(self, client):
+        resp = client.post("/api/score", json={
+            "question": "What is CT?",
+            "answer": "CT is imaging.",
+            "contexts": "CT uses X-rays.",
+        })
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "scores" in data
+        assert "latency_ms" in data
+        assert data["scores"]["faithfulness"] == pytest.approx(0.85)
+
+    def test_weighted_score_computed(self, client):
+        resp = client.post("/api/score", json={
+            "question": "q", "answer": "a", "contexts": "c",
+        })
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["weighted_score"] is not None
+
+    def test_missing_required_fields_returns_422(self, client):
+        resp = client.post("/api/score", json={"question": "q"})
+        assert resp.status_code == 422
+
+    def test_invalid_metric_name_returns_422(self, client):
+        resp = client.post("/api/score", json={
+            "question": "q", "answer": "a", "contexts": "c",
+            "metrics": ["not_a_metric"],
+        })
+        assert resp.status_code == 422
+
+    def test_skipped_metrics_returned_when_no_ground_truth(self, client):
+        resp = client.post("/api/score", json={
+            "question": "q", "answer": "a", "contexts": "c",
+            "metrics": ["faithfulness", "context_recall"],
+        })
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "context_recall" in data["skipped_metrics"]
+
+    def test_contexts_split_on_separator(self, monkeypatch):
+        """contexts string is split before passing to scorer."""
+        import webapp.api.score as score_mod
+        from unittest.mock import MagicMock
+        calls = []
+        def capture(**kwargs):
+            calls.append(kwargs.get("contexts", []))
+            return {"faithfulness": 0.9}
+        mock_scorer = MagicMock()
+        mock_scorer.score.side_effect = lambda **kw: capture(**kw)
+        monkeypatch.setattr(score_mod, "inline_scorer", mock_scorer)
+
+        from webapp.server import create_app
+        from fastapi.testclient import TestClient
+        tc = TestClient(create_app())
+        tc.post("/api/score", json={
+            "question": "q", "answer": "a",
+            "contexts": "ctx1 |||| ctx2",
+            "context_separator": " |||| ",
+        })
+        assert len(calls) == 1
+        assert calls[0] == ["ctx1", "ctx2"]
+
+    def test_bearer_token_auth_required_when_configured(self, monkeypatch):
+        """When SCORE_API_TOKEN is set, requests without token get 401."""
+        import webapp.api.score as score_mod
+        from rag_eval.settings import EvaluationSettings
+        from unittest.mock import MagicMock
+
+        mock_settings = EvaluationSettings(_env_file=None)
+        object.__setattr__(mock_settings, "score_api_token", "secret-token")
+        monkeypatch.setattr(score_mod, "_get_settings", lambda: mock_settings)
+
+        mock_scorer = MagicMock()
+        mock_scorer.score.return_value = {"faithfulness": 0.9}
+        monkeypatch.setattr(score_mod, "inline_scorer", mock_scorer)
+
+        from webapp.server import create_app
+        from fastapi.testclient import TestClient
+        tc = TestClient(create_app())
+
+        # No auth header -> 401
+        resp = tc.post("/api/score", json={
+            "question": "q", "answer": "a", "contexts": "c",
+        })
+        assert resp.status_code == 401
+
+        # Correct token -> 200
+        resp = tc.post("/api/score",
+            json={"question": "q", "answer": "a", "contexts": "c"},
+            headers={"Authorization": "Bearer secret-token"},
+        )
+        assert resp.status_code == 200
+
+    def test_wrong_bearer_token_returns_401(self, monkeypatch):
+        import webapp.api.score as score_mod
+        from rag_eval.settings import EvaluationSettings
+        from unittest.mock import MagicMock
+
+        mock_settings = EvaluationSettings(_env_file=None)
+        object.__setattr__(mock_settings, "score_api_token", "correct-token")
+        monkeypatch.setattr(score_mod, "_get_settings", lambda: mock_settings)
+
+        mock_scorer = MagicMock()
+        mock_scorer.score.return_value = {}
+        monkeypatch.setattr(score_mod, "inline_scorer", mock_scorer)
+
+        from webapp.server import create_app
+        from fastapi.testclient import TestClient
+        tc = TestClient(create_app())
+        resp = tc.post("/api/score",
+            json={"question": "q", "answer": "a", "contexts": "c"},
+            headers={"Authorization": "Bearer wrong-token"},
+        )
+        assert resp.status_code == 401