"""Routes for session-grouped async RAGAS scoring (Dify multi-call integration). Use case: Dify evaluates multiple Q&A pairs in a session. Each pair gets its own `POST /api/score/session_async` call with a shared `session_id`. All results are accumulated into one report, visible in 「运行列表」→「报告详情」. Key behaviour: - Deterministic run_id: derived from session_id — same session always maps to the same report directory (outputs/score-session/session-/). - Append semantics: each call adds a new sample row. Previous rows are preserved. - Advisor regeneration: optimization_advice.md is regenerated after every call using the full set of accumulated rows. - Each call returns its own `job_id` for individual status polling, plus the shared `run_id` and `session_id`. Endpoints: POST /api/score/session_async Submit one call (returns job_id + run_id) GET /api/score/sessions List all sessions GET /api/score/sessions/{session_id} Session aggregate (call_count, metric_means, jobs) GET /api/score/session/jobs/{job_id} Status of one individual call """ from __future__ import annotations import logging from fastapi import APIRouter, HTTPException from webapp.models import ( AsyncScoreJobStatus, ScoreRequest, SessionScoreJobResponse, SessionScoreRequest, SessionStatus, ) from webapp.services.session_score_manager import session_score_manager router = APIRouter(prefix="/api/score", tags=["score"]) logger = logging.getLogger("webapp.api.session_score_jobs") @router.post( "/session_async", status_code=202, response_model=SessionScoreJobResponse, summary="提交 Session 异步评分(多样本批量聚合)", responses={ 202: { "description": ( "调用已排队,立即返回 job_id + run_id(202 Accepted)。\n\n" "相同 `session_id` 的多次调用合并为同一报告,每次调用新增一个样本行。\n" "评分完成后,`summary.md` 和 `optimization_advice.md` 增量更新。\n" "通过 `GET /api/score/sessions/{session_id}` 查看 session 聚合状态," "通过 `GET /api/score/session/jobs/{job_id}` 查询单次调用状态," "在「运行列表」中查看完整报告(run_id 即 `session-` 形式)。" ), "content": { "application/json": { "example": { "job_id": "abc123def456", "session_id": "dify-session-001", "run_id": "session-dify-session-001", "status": "queued", "call_count": 1, } } }, }, }, ) def submit_session_async_score(request: SessionScoreRequest) -> SessionScoreJobResponse: """提交 Session 异步 RAGAS 评分,立即返回 job_id。 相同 `session_id` 的多次调用合并到同一评估报告中,每次调用: 1. 新增一个样本行到 `scores.csv` 2. 重写 `summary.md`(包含所有累积样本的指标均值) 3. 重新生成 `optimization_advice.md`(基于全量样本的 LLM 优化建议) **适合 Dify 工作流**:在循环节点中批量调用,所有轮次共用同一 `session_id`, 最终在 RAGAS 平台「运行列表」中查看完整的批量评估报告。 """ logger.info( "[session_async] submit session_id=%s metrics=%s has_ctx=%s has_gt=%s", request.session_id, request.metrics, bool(request.contexts), bool(request.ground_truth), ) # Strip session_id to build a plain ScoreRequest for the manager score_request = ScoreRequest( question=request.question, answer=request.answer, contexts=request.contexts, ground_truth=request.ground_truth, context_separator=request.context_separator, metrics=request.metrics, judge_model=request.judge_model, embedding_model=request.embedding_model, ) status, run_id = session_score_manager.submit(request.session_id, score_request) # Compute call_count from current session state session_status = session_score_manager.get_session(request.session_id) call_count = session_status.call_count if session_status else 1 logger.info( "[session_async] queued job_id=%s session_id=%s run_id=%s call=%d", status.job_id, request.session_id, run_id, call_count, ) return SessionScoreJobResponse( job_id=status.job_id, session_id=request.session_id, run_id=run_id, status=status.status, call_count=call_count, ) @router.get( "/sessions", response_model=dict, summary="列出所有 Session 聚合状态", ) def list_sessions() -> dict: """返回所有 session 的聚合状态,按最近完成时间倒序排列。""" sessions = session_score_manager.list_sessions() logger.info("[session_score] list_sessions count=%d", len(sessions)) return {"sessions": [s.model_dump() for s in sessions]} @router.get( "/sessions/{session_id}", response_model=SessionStatus, summary="查询 Session 聚合状态(指标均值 + 所有调用记录)", responses={404: {"description": "指定 session_id 不存在。"}}, ) def get_session(session_id: str) -> SessionStatus: """查询 session 的聚合评分状态。 返回内容: - `run_id`:在「运行列表」中查看完整报告 - `call_count`:本 session 累计调用次数 - `metric_means`:所有已累积样本的各指标均值(实时读取 scores.csv) - `jobs`:本 session 所有调用记录列表 """ status = session_score_manager.get_session(session_id) if status is None: raise HTTPException(status_code=404, detail=f"Session not found: {session_id}") return status @router.get( "/session/jobs/{job_id}", response_model=AsyncScoreJobStatus, summary="查询 Session 单次调用状态", responses={404: {"description": "指定 job_id 不存在。"}}, ) def get_session_job(job_id: str) -> AsyncScoreJobStatus: """查询 session 评分中某次调用的状态和评分结果。 `status` 为 `completed` 时,`run_id` 即所属 session 的报告目录, `scores` 包含本次调用的各指标得分。 """ status = session_score_manager.get_job(job_id) if status is None: raise HTTPException( status_code=404, detail=f"Session score job not found: {job_id}" ) return status