"""Route for real-time single-sample RAGAS scoring (Dify external Tool endpoint).""" from __future__ import annotations import time from typing import Annotated from fastapi import APIRouter, Header, HTTPException from rag_eval.metrics.weights import compute_weighted_score from rag_eval.settings import EvaluationSettings from webapp.models import ScoreRequest, ScoreResponse from webapp.services.inline_scorer import inline_scorer router = APIRouter(prefix="/api/score", tags=["score"]) def _get_settings() -> EvaluationSettings: """Return a fresh EvaluationSettings instance (overridable in tests).""" return EvaluationSettings() def _check_auth(authorization: str | None, token: str) -> None: """Raise 401 if Bearer token does not match the configured token.""" if authorization is None: raise HTTPException(status_code=401, detail="Missing Authorization header.") parts = authorization.split(" ", 1) if len(parts) != 2 or parts[0].lower() != "bearer" or parts[1] != token: raise HTTPException(status_code=401, detail="Invalid Bearer token.") @router.post( "", response_model=ScoreResponse, summary="单题实时评分(Dify 外部 Tool)", responses={ 200: {"description": "各指标得分和加权综合得分。"}, 401: {"description": "配置了 SCORE_API_TOKEN 但未提供有效 Bearer token。"}, 422: {"description": "请求参数校验失败。"}, }, ) def score_sample( request: ScoreRequest, authorization: Annotated[str | None, Header()] = None, ) -> ScoreResponse: """Accept one QA sample, run RAGAS metrics synchronously, and return scores.""" settings = _get_settings() # Require Bearer auth only when the deployment configured a shared token. if settings.score_api_token: _check_auth(authorization, settings.score_api_token) judge_model = request.judge_model or settings.ragas_judge_model embedding_model = request.embedding_model or settings.ragas_embedding_model effective = request.effective_metrics() requested = set(request.metrics) skipped = sorted(requested - set(effective)) if not effective: return ScoreResponse( scores={metric_name: None for metric_name in request.metrics}, weighted_score=None, latency_ms=0, skipped_metrics=skipped, ) t0 = time.monotonic() try: raw_scores = inline_scorer.score( question=request.question, answer=request.answer, contexts=request.contexts_as_list(), ground_truth=request.ground_truth, metrics=effective, judge_model=judge_model, embedding_model=embedding_model, settings=settings, ) except Exception as exc: # noqa: BLE001 latency_ms = int((time.monotonic() - t0) * 1000) return ScoreResponse( scores={}, weighted_score=None, latency_ms=latency_ms, skipped_metrics=skipped, error=f"{type(exc).__name__}: {exc}", ) latency_ms = int((time.monotonic() - t0) * 1000) # Keep skipped metrics visible to callers by emitting them as null scores. all_scores: dict[str, float | None] = {metric_name: None for metric_name in request.metrics} all_scores.update(raw_scores) weighted = compute_weighted_score( {key: value for key, value in raw_scores.items() if value is not None}, {}, ) return ScoreResponse( scores=all_scores, weighted_score=round(weighted, 4) if weighted is not None else None, latency_ms=latency_ms, skipped_metrics=skipped, )