106 lines
3.6 KiB
Python
106 lines
3.6 KiB
Python
"""Route for real-time single-sample RAGAS scoring (Dify external Tool endpoint)."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import time
|
||
from typing import Annotated
|
||
|
||
from fastapi import APIRouter, Header, HTTPException
|
||
|
||
from rag_eval.metrics.weights import compute_weighted_score
|
||
from rag_eval.settings import EvaluationSettings
|
||
from webapp.models import ScoreRequest, ScoreResponse
|
||
from webapp.services.inline_scorer import inline_scorer
|
||
|
||
router = APIRouter(prefix="/api/score", tags=["score"])
|
||
|
||
|
||
def _get_settings() -> EvaluationSettings:
|
||
"""Return a fresh EvaluationSettings instance (overridable in tests)."""
|
||
return EvaluationSettings()
|
||
|
||
|
||
def _check_auth(authorization: str | None, token: str) -> None:
|
||
"""Raise 401 if Bearer token does not match the configured token."""
|
||
if authorization is None:
|
||
raise HTTPException(status_code=401, detail="Missing Authorization header.")
|
||
parts = authorization.split(" ", 1)
|
||
if len(parts) != 2 or parts[0].lower() != "bearer" or parts[1] != token:
|
||
raise HTTPException(status_code=401, detail="Invalid Bearer token.")
|
||
|
||
|
||
@router.post(
|
||
"",
|
||
response_model=ScoreResponse,
|
||
summary="单题实时评分(Dify 外部 Tool)",
|
||
responses={
|
||
200: {"description": "各指标得分和加权综合得分。"},
|
||
401: {"description": "配置了 SCORE_API_TOKEN 但未提供有效 Bearer token。"},
|
||
422: {"description": "请求参数校验失败。"},
|
||
},
|
||
)
|
||
def score_sample(
|
||
request: ScoreRequest,
|
||
authorization: Annotated[str | None, Header()] = None,
|
||
) -> ScoreResponse:
|
||
"""Accept one QA sample, run RAGAS metrics synchronously, and return scores."""
|
||
settings = _get_settings()
|
||
|
||
# Require Bearer auth only when the deployment configured a shared token.
|
||
if settings.score_api_token:
|
||
_check_auth(authorization, settings.score_api_token)
|
||
|
||
judge_model = request.judge_model or settings.ragas_judge_model
|
||
embedding_model = request.embedding_model or settings.ragas_embedding_model
|
||
effective = request.effective_metrics()
|
||
requested = set(request.metrics)
|
||
skipped = sorted(requested - set(effective))
|
||
|
||
if not effective:
|
||
return ScoreResponse(
|
||
scores={metric_name: None for metric_name in request.metrics},
|
||
weighted_score=None,
|
||
latency_ms=0,
|
||
skipped_metrics=skipped,
|
||
)
|
||
|
||
t0 = time.monotonic()
|
||
try:
|
||
raw_scores = inline_scorer.score(
|
||
question=request.question,
|
||
answer=request.answer,
|
||
contexts=request.contexts_as_list(),
|
||
ground_truth=request.ground_truth,
|
||
metrics=effective,
|
||
judge_model=judge_model,
|
||
embedding_model=embedding_model,
|
||
settings=settings,
|
||
)
|
||
except Exception as exc: # noqa: BLE001
|
||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||
return ScoreResponse(
|
||
scores={},
|
||
weighted_score=None,
|
||
latency_ms=latency_ms,
|
||
skipped_metrics=skipped,
|
||
error=f"{type(exc).__name__}: {exc}",
|
||
)
|
||
|
||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||
|
||
# Keep skipped metrics visible to callers by emitting them as null scores.
|
||
all_scores: dict[str, float | None] = {metric_name: None for metric_name in request.metrics}
|
||
all_scores.update(raw_scores)
|
||
|
||
weighted = compute_weighted_score(
|
||
{key: value for key, value in raw_scores.items() if value is not None},
|
||
{},
|
||
)
|
||
|
||
return ScoreResponse(
|
||
scores=all_scores,
|
||
weighted_score=round(weighted, 4) if weighted is not None else None,
|
||
latency_ms=latency_ms,
|
||
skipped_metrics=skipped,
|
||
)
|