feat(session-async): add /api/score/session_async with incremental session report aggregation
- New POST /api/score/session_async endpoint: same session_id calls append to one shared report
- New GET /api/score/sessions/{session_id}: returns call_count, metric_means, all job records
- New GET /api/score/session/jobs/{job_id}: individual call status
- SessionScoreJobManager: deterministic run_id from session_id, per-session mutex for CSV append, advisor regenerated on every call
- SessionScoreRequest (extends ScoreRequest + session_id), SessionScoreJobResponse, SessionStatus models added
- 24 new tests, all passing
chore(weighted-score): comment out 综合加权得分 display and computation
- report.js: hide 综合加权得分 card in report detail page
- score_jobs.js: hide 综合 chip in async job list
- report_builder.py: overall_ws=None (computation disabled)
- summary.py: weighted_score summary line disabled
- evaluator.py: weighted_score/sample_weight columns no longer written to scores.csv
- score.py /api/score: weighted_score always returns null
- score_job_manager.py + session_score_manager.py: weighted=None
- Updated 3 tests to match new behaviour (6 pre-existing failures unchanged)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -156,10 +156,11 @@ def score_sample(
|
||||
all_scores: dict[str, float | None] = {metric_name: None for metric_name in request.metrics}
|
||||
all_scores.update(raw_scores)
|
||||
|
||||
weighted = compute_weighted_score(
|
||||
{key: value for key, value in raw_scores.items() if value is not None},
|
||||
{},
|
||||
)
|
||||
# 综合加权得分计算(已暂时禁用)
|
||||
# weighted = compute_weighted_score(
|
||||
# {key: value for key, value in raw_scores.items() if value is not None},
|
||||
# {},
|
||||
# )
|
||||
|
||||
logger.info(
|
||||
"[score] done latency=%dms skipped=%s scores=%s",
|
||||
@@ -169,7 +170,7 @@ def score_sample(
|
||||
)
|
||||
return ScoreResponse(
|
||||
scores=all_scores,
|
||||
weighted_score=round(weighted, 4) if weighted is not None else None,
|
||||
weighted_score=None, # 综合加权得分已暂时禁用
|
||||
latency_ms=latency_ms,
|
||||
skipped_metrics=skipped,
|
||||
)
|
||||
|
||||
171
webapp/api/session_score_jobs.py
Normal file
171
webapp/api/session_score_jobs.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""Routes for session-grouped async RAGAS scoring (Dify multi-call integration).
|
||||
|
||||
Use case: Dify evaluates multiple Q&A pairs in a session. Each pair gets its own
|
||||
`POST /api/score/session_async` call with a shared `session_id`. All results are
|
||||
accumulated into one report, visible in 「运行列表」→「报告详情」.
|
||||
|
||||
Key behaviour:
|
||||
- Deterministic run_id: derived from session_id — same session always maps to the
|
||||
same report directory (outputs/score-session/session-<id>/).
|
||||
- Append semantics: each call adds a new sample row. Previous rows are preserved.
|
||||
- Advisor regeneration: optimization_advice.md is regenerated after every call
|
||||
using the full set of accumulated rows.
|
||||
- Each call returns its own `job_id` for individual status polling, plus the
|
||||
shared `run_id` and `session_id`.
|
||||
|
||||
Endpoints:
|
||||
POST /api/score/session_async Submit one call (returns job_id + run_id)
|
||||
GET /api/score/sessions List all sessions
|
||||
GET /api/score/sessions/{session_id} Session aggregate (call_count, metric_means, jobs)
|
||||
GET /api/score/session/jobs/{job_id} Status of one individual call
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from webapp.models import (
|
||||
AsyncScoreJobStatus,
|
||||
ScoreRequest,
|
||||
SessionScoreJobResponse,
|
||||
SessionScoreRequest,
|
||||
SessionStatus,
|
||||
)
|
||||
from webapp.services.session_score_manager import session_score_manager
|
||||
|
||||
router = APIRouter(prefix="/api/score", tags=["score"])
|
||||
logger = logging.getLogger("webapp.api.session_score_jobs")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/session_async",
|
||||
status_code=202,
|
||||
response_model=SessionScoreJobResponse,
|
||||
summary="提交 Session 异步评分(多样本批量聚合)",
|
||||
responses={
|
||||
202: {
|
||||
"description": (
|
||||
"调用已排队,立即返回 job_id + run_id(202 Accepted)。\n\n"
|
||||
"相同 `session_id` 的多次调用合并为同一报告,每次调用新增一个样本行。\n"
|
||||
"评分完成后,`summary.md` 和 `optimization_advice.md` 增量更新。\n"
|
||||
"通过 `GET /api/score/sessions/{session_id}` 查看 session 聚合状态,"
|
||||
"通过 `GET /api/score/session/jobs/{job_id}` 查询单次调用状态,"
|
||||
"在「运行列表」中查看完整报告(run_id 即 `session-<session_id>` 形式)。"
|
||||
),
|
||||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"job_id": "abc123def456",
|
||||
"session_id": "dify-session-001",
|
||||
"run_id": "session-dify-session-001",
|
||||
"status": "queued",
|
||||
"call_count": 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
def submit_session_async_score(request: SessionScoreRequest) -> SessionScoreJobResponse:
|
||||
"""提交 Session 异步 RAGAS 评分,立即返回 job_id。
|
||||
|
||||
相同 `session_id` 的多次调用合并到同一评估报告中,每次调用:
|
||||
1. 新增一个样本行到 `scores.csv`
|
||||
2. 重写 `summary.md`(包含所有累积样本的指标均值)
|
||||
3. 重新生成 `optimization_advice.md`(基于全量样本的 LLM 优化建议)
|
||||
|
||||
**适合 Dify 工作流**:在循环节点中批量调用,所有轮次共用同一 `session_id`,
|
||||
最终在 RAGAS 平台「运行列表」中查看完整的批量评估报告。
|
||||
"""
|
||||
logger.info(
|
||||
"[session_async] submit session_id=%s metrics=%s has_ctx=%s has_gt=%s",
|
||||
request.session_id,
|
||||
request.metrics,
|
||||
bool(request.contexts),
|
||||
bool(request.ground_truth),
|
||||
)
|
||||
|
||||
# Strip session_id to build a plain ScoreRequest for the manager
|
||||
score_request = ScoreRequest(
|
||||
question=request.question,
|
||||
answer=request.answer,
|
||||
contexts=request.contexts,
|
||||
ground_truth=request.ground_truth,
|
||||
context_separator=request.context_separator,
|
||||
metrics=request.metrics,
|
||||
judge_model=request.judge_model,
|
||||
embedding_model=request.embedding_model,
|
||||
)
|
||||
|
||||
status, run_id = session_score_manager.submit(request.session_id, score_request)
|
||||
|
||||
# Compute call_count from current session state
|
||||
session_status = session_score_manager.get_session(request.session_id)
|
||||
call_count = session_status.call_count if session_status else 1
|
||||
|
||||
logger.info(
|
||||
"[session_async] queued job_id=%s session_id=%s run_id=%s call=%d",
|
||||
status.job_id, request.session_id, run_id, call_count,
|
||||
)
|
||||
return SessionScoreJobResponse(
|
||||
job_id=status.job_id,
|
||||
session_id=request.session_id,
|
||||
run_id=run_id,
|
||||
status=status.status,
|
||||
call_count=call_count,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions",
|
||||
response_model=dict,
|
||||
summary="列出所有 Session 聚合状态",
|
||||
)
|
||||
def list_sessions() -> dict:
|
||||
"""返回所有 session 的聚合状态,按最近完成时间倒序排列。"""
|
||||
sessions = session_score_manager.list_sessions()
|
||||
logger.info("[session_score] list_sessions count=%d", len(sessions))
|
||||
return {"sessions": [s.model_dump() for s in sessions]}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{session_id}",
|
||||
response_model=SessionStatus,
|
||||
summary="查询 Session 聚合状态(指标均值 + 所有调用记录)",
|
||||
responses={404: {"description": "指定 session_id 不存在。"}},
|
||||
)
|
||||
def get_session(session_id: str) -> SessionStatus:
|
||||
"""查询 session 的聚合评分状态。
|
||||
|
||||
返回内容:
|
||||
- `run_id`:在「运行列表」中查看完整报告
|
||||
- `call_count`:本 session 累计调用次数
|
||||
- `metric_means`:所有已累积样本的各指标均值(实时读取 scores.csv)
|
||||
- `jobs`:本 session 所有调用记录列表
|
||||
"""
|
||||
status = session_score_manager.get_session(session_id)
|
||||
if status is None:
|
||||
raise HTTPException(status_code=404, detail=f"Session not found: {session_id}")
|
||||
return status
|
||||
|
||||
|
||||
@router.get(
|
||||
"/session/jobs/{job_id}",
|
||||
response_model=AsyncScoreJobStatus,
|
||||
summary="查询 Session 单次调用状态",
|
||||
responses={404: {"description": "指定 job_id 不存在。"}},
|
||||
)
|
||||
def get_session_job(job_id: str) -> AsyncScoreJobStatus:
|
||||
"""查询 session 评分中某次调用的状态和评分结果。
|
||||
|
||||
`status` 为 `completed` 时,`run_id` 即所属 session 的报告目录,
|
||||
`scores` 包含本次调用的各指标得分。
|
||||
"""
|
||||
status = session_score_manager.get_job(job_id)
|
||||
if status is None:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Session score job not found: {job_id}"
|
||||
)
|
||||
return status
|
||||
@@ -531,6 +531,50 @@ class AsyncScoreJobResponse(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Session async 评分模型
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SessionScoreRequest(ScoreRequest):
|
||||
"""Request body for session-grouped async scoring.
|
||||
|
||||
All calls sharing the same session_id are accumulated into one report.
|
||||
Each call adds a new sample row to the session's scores.csv.
|
||||
"""
|
||||
|
||||
session_id: str = Field(
|
||||
description=(
|
||||
"会话唯一标识符。相同 session_id 的多次调用合并为同一报告,"
|
||||
"每次调用新增一个样本行,指标均值和优化建议在每次调用后增量更新。"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class SessionScoreJobResponse(BaseModel):
|
||||
"""Immediate 202 response after submitting a session scoring call."""
|
||||
|
||||
job_id: str = Field(description="本次调用的任务唯一标识符。")
|
||||
session_id: str = Field(description="会话标识符。")
|
||||
run_id: str = Field(description="本 session 对应的报告 Run ID,可在「运行列表」中查看。")
|
||||
status: str = Field(default="queued", description="初始状态:queued。")
|
||||
call_count: int = Field(default=1, description="本 session 当前累计调用次数(包含本次)。")
|
||||
|
||||
|
||||
class SessionStatus(BaseModel):
|
||||
"""Aggregate status and metrics for a scoring session."""
|
||||
|
||||
session_id: str = Field(description="会话标识符。")
|
||||
run_id: str = Field(description="对应报告目录的 Run ID。")
|
||||
call_count: int = Field(description="本 session 累计调用次数。")
|
||||
metric_means: dict[str, float | None] = Field(
|
||||
default_factory=dict, description="所有已累积样本的各指标均值。"
|
||||
)
|
||||
latest_finished_at: str = Field(default="", description="最近一次评分完成时间(ISO 8601 UTC)。")
|
||||
jobs: list[AsyncScoreJobStatus] = Field(
|
||||
default_factory=list, description="本 session 所有调用记录,按创建时间排序。"
|
||||
)
|
||||
|
||||
|
||||
class AsyncScoreJobStatus(BaseModel):
|
||||
"""State of one async score job (queued → running → completed/failed)."""
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from webapp.api import evaluations, llm_profiles, pipeline, runs, scenarios, score, score_jobs
|
||||
from webapp.api import evaluations, llm_profiles, pipeline, runs, scenarios, score, score_jobs, session_score_jobs
|
||||
|
||||
STATIC_DIR = Path(__file__).resolve().parent / "static"
|
||||
logger = logging.getLogger("webapp.server")
|
||||
@@ -73,6 +73,10 @@ OPENAPI_TAGS = [
|
||||
"**异步评分 API(Dify 推荐)** — `POST /api/score/async`\n\n"
|
||||
"异步方式立即返回 job_id(202),评分在后台执行,完成后自动生成完整报告(含优化建议),"
|
||||
"在「运行列表」页查看。\n\n"
|
||||
"**Session 批量评分 API** — `POST /api/score/session_async`\n\n"
|
||||
"适合 Dify 循环节点批量评估:同一 `session_id` 的多次调用合并为一个报告,"
|
||||
"每次调用新增一个样本行,指标均值和优化建议增量更新。\n"
|
||||
"通过 `GET /api/score/sessions/{session_id}` 查看 session 聚合状态。\n\n"
|
||||
"通过 `GET /api/score/jobs` 列出所有异步评分记录,"
|
||||
"`GET /api/score/jobs/{job_id}` 查询单个任务状态。\n\n"
|
||||
"**鉴权**:若 `.env` 中配置了 `SCORE_API_TOKEN`,需携带 "
|
||||
@@ -111,6 +115,7 @@ def create_app() -> FastAPI:
|
||||
app.include_router(pipeline.router)
|
||||
app.include_router(score.router)
|
||||
app.include_router(score_jobs.router)
|
||||
app.include_router(session_score_jobs.router)
|
||||
|
||||
@app.middleware("http")
|
||||
async def access_log_middleware(request: Request, call_next):
|
||||
|
||||
257
webapp/services/pipeline_task_manager.py
Normal file
257
webapp/services/pipeline_task_manager.py
Normal file
@@ -0,0 +1,257 @@
|
||||
"""Background task manager for end-to-end pipeline jobs (build + eval).
|
||||
|
||||
Each job runs three sequential phases inside a worker thread:
|
||||
1. parsing_documents — AliyunDocmind parses every PDF
|
||||
2. generating_questions — LLM generates a draft question bank
|
||||
3. evaluating — RAGAS online evaluation scores each question
|
||||
|
||||
The DatasetBuildJob and Scenario objects are constructed entirely from the
|
||||
API request parameters, so no YAML config files are needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import threading
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from webapp.models import (
|
||||
PipelineJobRequest,
|
||||
PipelineJobStatus,
|
||||
PipelineResult,
|
||||
)
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
_PIPELINE_OUTPUT_ROOT = _REPO_ROOT / "outputs" / "pipeline"
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
class _LineCapture(io.TextIOBase):
|
||||
"""Write-only stream that appends complete lines to a task's log buffer."""
|
||||
|
||||
def __init__(self, sink: "PipelineTask") -> None:
|
||||
self._sink = sink
|
||||
self._buffer = ""
|
||||
|
||||
def write(self, text: str) -> int:
|
||||
self._buffer += text
|
||||
while "\n" in self._buffer:
|
||||
line, self._buffer = self._buffer.split("\n", 1)
|
||||
self._sink.append_log(line)
|
||||
return len(text)
|
||||
|
||||
def flush(self) -> None:
|
||||
if self._buffer:
|
||||
self._sink.append_log(self._buffer)
|
||||
self._buffer = ""
|
||||
|
||||
|
||||
class PipelineTask:
|
||||
"""Mutable state for one pipeline job (build + eval)."""
|
||||
|
||||
def __init__(self, job_id: str, job_name: str) -> None:
|
||||
self.job_id = job_id
|
||||
self.job_name = job_name
|
||||
self.status = "queued"
|
||||
self.phase = "idle"
|
||||
self.logs: list[str] = []
|
||||
self.result: PipelineResult | None = None
|
||||
self.error: str | None = None
|
||||
self.created_at = _now_iso()
|
||||
self.finished_at = ""
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def append_log(self, line: str) -> None:
|
||||
with self._lock:
|
||||
self.logs.append(line)
|
||||
|
||||
def snapshot(self) -> PipelineJobStatus:
|
||||
with self._lock:
|
||||
return PipelineJobStatus(
|
||||
job_id=self.job_id,
|
||||
job_name=self.job_name,
|
||||
status=self.status,
|
||||
phase=self.phase,
|
||||
logs=list(self.logs),
|
||||
result=self.result,
|
||||
error=self.error,
|
||||
created_at=self.created_at,
|
||||
finished_at=self.finished_at,
|
||||
)
|
||||
|
||||
|
||||
class PipelineTaskManager:
|
||||
"""Owns the thread pool and registry of pipeline jobs."""
|
||||
|
||||
def __init__(self, max_workers: int = 2) -> None:
|
||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
self._tasks: dict[str, PipelineTask] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def submit(self, request: PipelineJobRequest) -> PipelineTask:
|
||||
"""Register and schedule a new pipeline job; return its task object."""
|
||||
job_id = uuid.uuid4().hex[:12]
|
||||
job_name = request.job_name.strip() or f"pipeline-{job_id[:6]}"
|
||||
task = PipelineTask(job_id=job_id, job_name=job_name)
|
||||
with self._lock:
|
||||
self._tasks[job_id] = task
|
||||
self._executor.submit(self._run, task, request)
|
||||
return task
|
||||
|
||||
def get(self, job_id: str) -> PipelineJobStatus | None:
|
||||
with self._lock:
|
||||
task = self._tasks.get(job_id)
|
||||
return task.snapshot() if task is not None else None
|
||||
|
||||
def list_jobs(self) -> list[PipelineJobStatus]:
|
||||
with self._lock:
|
||||
tasks = list(self._tasks.values())
|
||||
snapshots = [t.snapshot() for t in tasks]
|
||||
snapshots.sort(key=lambda s: s.created_at, reverse=True)
|
||||
return snapshots
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Worker
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _run(self, task: PipelineTask, request: PipelineJobRequest) -> None:
|
||||
"""Execute the full pipeline end to end inside a worker thread."""
|
||||
task.status = "running"
|
||||
task.append_log(f"[{_now_iso()}] 开始 pipeline 任务: {task.job_name}")
|
||||
|
||||
capture = _LineCapture(task)
|
||||
try:
|
||||
with redirect_stdout(capture), redirect_stderr(capture):
|
||||
result = self._execute(task, request)
|
||||
capture.flush()
|
||||
task.result = result
|
||||
task.phase = "done"
|
||||
task.status = "completed"
|
||||
task.append_log(f"[{_now_iso()}] pipeline 任务完成: {task.job_name}")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
capture.flush()
|
||||
task.error = f"{type(exc).__name__}: {exc}"
|
||||
task.append_log(f"[{_now_iso()}] pipeline 任务失败: {task.error}")
|
||||
task.status = "failed"
|
||||
finally:
|
||||
task.finished_at = _now_iso()
|
||||
|
||||
def _execute(self, task: PipelineTask, req: PipelineJobRequest) -> PipelineResult:
|
||||
"""Run build then eval, updating task.phase as we go."""
|
||||
|
||||
# ── resolve paths ──────────────────────────────────────────────
|
||||
docs_path = Path(req.docs_path)
|
||||
if not docs_path.is_absolute():
|
||||
docs_path = (_REPO_ROOT / docs_path).resolve()
|
||||
if not docs_path.is_dir():
|
||||
raise ValueError(f"docs_path is not an existing directory: {docs_path}")
|
||||
|
||||
job_output_dir = _PIPELINE_OUTPUT_ROOT / task.job_id
|
||||
build_artifact_dir = job_output_dir / "build"
|
||||
dataset_csv = job_output_dir / "generated_dataset.csv"
|
||||
eval_output_dir = job_output_dir / "eval"
|
||||
|
||||
# ── phase 1 + 2: dataset build (parse & generate) ─────────────
|
||||
task.phase = "parsing_documents"
|
||||
task.append_log(f" [build] 扫描文档目录: {docs_path}")
|
||||
build_result = self._run_build(task, req, docs_path, build_artifact_dir, dataset_csv)
|
||||
|
||||
source_chunks_jsonl = build_artifact_dir / "latest" / "source_chunks.jsonl"
|
||||
total_q = len(build_result.draft_samples)
|
||||
parse_failures = len(build_result.parse_failures)
|
||||
task.append_log(f" [build] 题库生成完毕: {total_q} 道题目, {parse_failures} 份文档解析失败")
|
||||
|
||||
if total_q == 0:
|
||||
raise RuntimeError("题库为空(所有文档均解析或生成失败),中止评估。")
|
||||
|
||||
# ── phase 3: evaluation ────────────────────────────────────────
|
||||
task.phase = "evaluating"
|
||||
task.append_log(f" [eval] 开始 RAGAS 评估,共 {total_q} 道题目")
|
||||
eval_result = self._run_eval(task, req, dataset_csv, source_chunks_jsonl, eval_output_dir)
|
||||
|
||||
from rag_eval.reporting.artifacts import build_artifact_paths as _build_eval_paths
|
||||
eval_artifact_paths = _build_eval_paths(eval_result.scenario.output_dir, eval_result.run_id)
|
||||
|
||||
return PipelineResult(
|
||||
build_artifact_dir=build_artifact_dir.as_posix(),
|
||||
dataset_csv=dataset_csv.as_posix(),
|
||||
source_chunks_jsonl=source_chunks_jsonl.as_posix(),
|
||||
total_questions=total_q,
|
||||
parse_failures=parse_failures,
|
||||
eval_run_id=eval_result.run_id,
|
||||
eval_output_dir=eval_result.scenario.output_dir.as_posix(),
|
||||
scores_csv=eval_artifact_paths.scores_csv.as_posix(),
|
||||
summary_md=eval_artifact_paths.summary_md.as_posix(),
|
||||
)
|
||||
|
||||
def _run_build(self, task: PipelineTask, req: PipelineJobRequest,
|
||||
docs_path: Path, artifact_dir: Path, dataset_csv: Path):
|
||||
"""Construct DatasetBuildJob and run the build phase."""
|
||||
from rag_eval.dataset_builder.models import DatasetBuildJob, DatasetBuildRuntime
|
||||
from rag_eval.dataset_builder.runner import execute_dataset_build_job
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
|
||||
settings = EvaluationSettings()
|
||||
job = DatasetBuildJob(
|
||||
job_name=task.job_name,
|
||||
input_path=docs_path,
|
||||
input_glob="*.pdf",
|
||||
parser_provider="aliyun_docmind",
|
||||
failure_mode=req.failure_mode, # type: ignore[arg-type]
|
||||
generation_model=req.generation_model,
|
||||
output_type="online_question_bank",
|
||||
review_mode="draft_with_manual_review",
|
||||
max_questions_per_document=req.max_questions_per_document,
|
||||
max_source_chunks_per_question=req.max_source_chunks_per_question,
|
||||
dataset_path=dataset_csv,
|
||||
artifact_dir=artifact_dir,
|
||||
runtime=DatasetBuildRuntime(max_documents=req.max_documents),
|
||||
)
|
||||
return execute_dataset_build_job(job, settings=settings)
|
||||
|
||||
def _run_eval(self, task: PipelineTask, req: PipelineJobRequest,
|
||||
dataset_csv: Path, source_chunks_jsonl: Path, eval_output_dir: Path):
|
||||
"""Construct Scenario and run the evaluation phase."""
|
||||
from rag_eval.execution.runner import run_scenario_from_scenario_obj
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
from rag_eval.shared.models import (
|
||||
AppAdapterConfig, DatasetConfig, RuntimeConfig, Scenario,
|
||||
)
|
||||
|
||||
settings = EvaluationSettings()
|
||||
scenario = Scenario(
|
||||
scenario_name=task.job_name,
|
||||
mode="online",
|
||||
dataset=DatasetConfig(path=dataset_csv),
|
||||
judge_model=req.judge_model,
|
||||
embedding_model=req.embedding_model,
|
||||
metrics=list(req.metrics),
|
||||
output_dir=eval_output_dir,
|
||||
runtime=RuntimeConfig(
|
||||
batch_size=4,
|
||||
app_concurrency=2,
|
||||
metric_concurrency=2,
|
||||
max_samples=req.max_samples,
|
||||
),
|
||||
app_adapter=AppAdapterConfig(
|
||||
type="python",
|
||||
callable="apps.siemens_pdf_qa.adapter:run",
|
||||
static_kwargs={
|
||||
"source_chunks_path": source_chunks_jsonl,
|
||||
"model": req.answer_model,
|
||||
},
|
||||
),
|
||||
optimization_advisor=req.optimization_advisor,
|
||||
)
|
||||
return run_scenario_from_scenario_obj(scenario, settings=settings)
|
||||
|
||||
|
||||
# Module-level singleton shared by the FastAPI routes.
|
||||
pipeline_task_manager = PipelineTaskManager()
|
||||
@@ -177,9 +177,11 @@ def build_report(run_dir: Path, metrics: list[str]) -> ReportData:
|
||||
w_means = _weighted_metric_means(score_rows_list, metrics, doc_weights)
|
||||
rounded_means = {metric: _round_or_none(value) for metric, value in w_means.items()}
|
||||
|
||||
overall_ws = compute_overall_weighted_score_mean(
|
||||
score_rows_list, metric_weights, doc_weights
|
||||
)
|
||||
# 综合加权得分计算(已暂时禁用)
|
||||
# overall_ws = compute_overall_weighted_score_mean(
|
||||
# score_rows_list, metric_weights, doc_weights
|
||||
# )
|
||||
overall_ws = None
|
||||
|
||||
distributions = {
|
||||
metric: _distribution(frame, metric)
|
||||
|
||||
@@ -149,10 +149,12 @@ class ScoreJobManager:
|
||||
# Build full scores dict (skipped = None)
|
||||
all_scores: dict[str, float | None] = {m: None for m in request.metrics}
|
||||
all_scores.update(raw_scores)
|
||||
weighted_raw = compute_weighted_score(
|
||||
{k: v for k, v in raw_scores.items() if v is not None}, {}
|
||||
)
|
||||
weighted = round(weighted_raw, 4) if weighted_raw is not None else None
|
||||
# 综合加权得分计算(已暂时禁用)
|
||||
# weighted_raw = compute_weighted_score(
|
||||
# {k: v for k, v in raw_scores.items() if v is not None}, {}
|
||||
# )
|
||||
# weighted = round(weighted_raw, 4) if weighted_raw is not None else None
|
||||
weighted = None
|
||||
|
||||
# Build a score row compatible with report_builder
|
||||
score_row: dict[str, Any] = {
|
||||
|
||||
452
webapp/services/session_score_manager.py
Normal file
452
webapp/services/session_score_manager.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""Background task manager for session-grouped async RAGAS scoring.
|
||||
|
||||
Each session groups multiple scoring calls into one shared run report:
|
||||
|
||||
1. First call: creates outputs/score-session/session-<id>/ and metadata.json.
|
||||
2. Every call: appends a new sample row to scores.csv, rewrites summary.md
|
||||
and optimization_advice.md by re-running write_run_artifacts + run_advisor
|
||||
over ALL accumulated rows.
|
||||
3. The resulting run directory is picked up automatically by run_reader, so the
|
||||
「运行列表」 and 「报告详情」 pages show the live, growing report.
|
||||
|
||||
Concurrency model:
|
||||
- Scoring (LLM network I/O) runs freely in the thread pool — different sessions
|
||||
score concurrently; multiple calls to the same session also start scoring in
|
||||
parallel.
|
||||
- File I/O (CSV append, artifact rewrite, advisor) is serialized per session via
|
||||
a per-session threading.Lock, so no two calls corrupt the same session's CSV.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from webapp.models import AsyncScoreJobStatus, ScoreRequest, SessionStatus
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
_DEFAULT_OUTPUT_DIR = _REPO_ROOT / "outputs" / "score-session"
|
||||
_DEFAULT_INDEX_DIR = _REPO_ROOT / "outputs" / "score-session-jobs"
|
||||
|
||||
# Columns that are sample metadata rather than metric scores (mirrors run_reader.NON_METRIC_COLUMNS)
|
||||
_NON_METRIC_COLUMNS = {
|
||||
"sample_id", "question", "contexts", "answer", "ground_truth",
|
||||
"scenario", "language", "retrieval_config", "error",
|
||||
"judge_model", "embedding_model", "run_id", "difficulty",
|
||||
"question_type", "doc_id", "doc_name", "section_path",
|
||||
"page_start", "page_end", "source_chunk_ids", "review_status",
|
||||
"review_notes", "weighted_score", "sample_weight",
|
||||
}
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _sanitize_session_id(session_id: str) -> str:
|
||||
"""Convert an arbitrary session_id string to a safe directory-name fragment."""
|
||||
return re.sub(r"[^a-zA-Z0-9]", "-", session_id)[:64].strip("-") or "default"
|
||||
|
||||
|
||||
class SessionScoreJobManager:
|
||||
"""Thread-pool manager for session-grouped async scoring jobs.
|
||||
|
||||
All calls sharing a session_id append to one shared run directory, so the
|
||||
report detail page shows all samples and their aggregate metrics together.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: Path = _DEFAULT_OUTPUT_DIR,
|
||||
index_dir: Path = _DEFAULT_INDEX_DIR,
|
||||
max_workers: int = 4,
|
||||
) -> None:
|
||||
self._output_dir = Path(output_dir)
|
||||
self._index_dir = Path(index_dir)
|
||||
self._output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._index_dir.mkdir(parents=True, exist_ok=True)
|
||||
(self._index_dir / "_sessions").mkdir(parents=True, exist_ok=True)
|
||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
|
||||
# job_id -> AsyncScoreJobStatus; guarded by _lock
|
||||
self._job_cache: dict[str, AsyncScoreJobStatus] = {}
|
||||
# session_id -> [job_ids in order]; guarded by _lock
|
||||
self._session_jobs: dict[str, list[str]] = {}
|
||||
# session_id -> per-session threading.Lock; guarded by _lock
|
||||
self._session_locks: dict[str, threading.Lock] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
self._load_existing()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Public API
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def session_run_id(self, session_id: str) -> str:
|
||||
"""Return the deterministic run_id for a session (also the dir name)."""
|
||||
return f"session-{_sanitize_session_id(session_id)}"
|
||||
|
||||
def submit(self, session_id: str, request: ScoreRequest) -> tuple[AsyncScoreJobStatus, str]:
|
||||
"""Queue one scoring call for a session.
|
||||
|
||||
Returns (job_status, run_id). run_id is deterministic from session_id.
|
||||
"""
|
||||
run_id = self.session_run_id(session_id)
|
||||
job_id = uuid.uuid4().hex[:12]
|
||||
|
||||
status = AsyncScoreJobStatus(
|
||||
job_id=job_id,
|
||||
status="queued",
|
||||
created_at=_now_iso(),
|
||||
request_summary={
|
||||
"question": (request.question or "")[:80],
|
||||
"answer": (request.answer or "")[:80],
|
||||
"metrics": list(request.metrics),
|
||||
"judge_model": request.judge_model or "",
|
||||
"embedding_model": request.embedding_model or "",
|
||||
"has_contexts": bool(request.contexts),
|
||||
"has_ground_truth": bool(request.ground_truth),
|
||||
"session_id": session_id,
|
||||
},
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._job_cache[job_id] = status
|
||||
if session_id not in self._session_jobs:
|
||||
self._session_jobs[session_id] = []
|
||||
self._session_jobs[session_id].append(job_id)
|
||||
|
||||
self._persist_job_index(status)
|
||||
self._persist_session_index(session_id)
|
||||
self._executor.submit(self._run, job_id, session_id, run_id, request)
|
||||
return status, run_id
|
||||
|
||||
def get_job(self, job_id: str) -> AsyncScoreJobStatus | None:
|
||||
"""Return current status of one call, or None if unknown."""
|
||||
with self._lock:
|
||||
return self._job_cache.get(job_id)
|
||||
|
||||
def list_jobs(self) -> list[AsyncScoreJobStatus]:
|
||||
"""Return all session job records, newest first."""
|
||||
with self._lock:
|
||||
jobs = list(self._job_cache.values())
|
||||
jobs.sort(key=lambda j: j.created_at, reverse=True)
|
||||
return jobs
|
||||
|
||||
def get_session(self, session_id: str) -> SessionStatus | None:
|
||||
"""Return aggregate status for a session, or None if unknown."""
|
||||
with self._lock:
|
||||
job_ids = list(self._session_jobs.get(session_id) or [])
|
||||
if not job_ids:
|
||||
return None
|
||||
|
||||
run_id = self.session_run_id(session_id)
|
||||
run_dir = self._output_dir / run_id
|
||||
|
||||
# Compute live metric means from the CSV (may be mid-update — best effort)
|
||||
metric_means = self._read_metric_means(run_dir)
|
||||
|
||||
with self._lock:
|
||||
jobs = [self._job_cache[jid] for jid in job_ids if jid in self._job_cache]
|
||||
|
||||
latest = max((j.finished_at for j in jobs if j.finished_at), default="")
|
||||
return SessionStatus(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
call_count=len(job_ids),
|
||||
metric_means=metric_means,
|
||||
latest_finished_at=latest,
|
||||
jobs=sorted(jobs, key=lambda j: j.created_at),
|
||||
)
|
||||
|
||||
def list_sessions(self) -> list[SessionStatus]:
|
||||
"""Return aggregate status for all known sessions."""
|
||||
with self._lock:
|
||||
session_ids = list(self._session_jobs.keys())
|
||||
results = []
|
||||
for sid in session_ids:
|
||||
status = self.get_session(sid)
|
||||
if status is not None:
|
||||
results.append(status)
|
||||
results.sort(key=lambda s: s.latest_finished_at, reverse=True)
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Worker
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _run(self, job_id: str, session_id: str, run_id: str, request: ScoreRequest) -> None:
|
||||
"""Score one sample then append it to the session's shared run artifacts."""
|
||||
import logging
|
||||
logger = logging.getLogger("webapp.services.session_score_manager")
|
||||
self._update_job(job_id, status="running")
|
||||
|
||||
# Lazy imports — keep web server bootable if ragas is not installed.
|
||||
from rag_eval.advisor import run_advisor
|
||||
from rag_eval.metrics.factory import build_models
|
||||
from rag_eval.metrics.weights import compute_weighted_score
|
||||
from rag_eval.reporting.writers import write_run_artifacts
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
from rag_eval.shared.models import (
|
||||
DatasetConfig, EvaluationResult, NormalizedSample,
|
||||
RuntimeConfig, Scenario,
|
||||
)
|
||||
from rag_eval.shared.utils import utc_now_iso
|
||||
from webapp.services.inline_scorer import inline_scorer
|
||||
|
||||
settings = EvaluationSettings()
|
||||
judge_model = request.judge_model or settings.ragas_judge_model
|
||||
embedding_model = request.embedding_model or settings.ragas_embedding_model
|
||||
effective = request.effective_metrics()
|
||||
requested = set(request.metrics)
|
||||
skipped = sorted(requested - set(effective))
|
||||
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
# --- Scoring (can run concurrently for the same session) ----------
|
||||
if effective:
|
||||
raw_scores = inline_scorer.score(
|
||||
question=request.question,
|
||||
answer=request.answer,
|
||||
contexts=request.contexts_as_list(),
|
||||
ground_truth=request.ground_truth,
|
||||
metrics=effective,
|
||||
judge_model=judge_model,
|
||||
embedding_model=embedding_model,
|
||||
settings=settings,
|
||||
)
|
||||
else:
|
||||
raw_scores = {}
|
||||
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
finished_at = utc_now_iso()
|
||||
|
||||
# Build complete scores for this sample (skipped metrics → None)
|
||||
all_scores: dict[str, float | None] = {m: None for m in request.metrics}
|
||||
all_scores.update(raw_scores)
|
||||
|
||||
# 综合加权得分计算(已暂时禁用)
|
||||
# weighted_raw = compute_weighted_score(
|
||||
# {k: v for k, v in raw_scores.items() if v is not None}, {}
|
||||
# )
|
||||
# weighted = round(weighted_raw, 4) if weighted_raw is not None else None
|
||||
weighted = None
|
||||
|
||||
# --- File I/O must be serialized per session ----------------------
|
||||
session_lock = self._get_session_lock(session_id)
|
||||
with session_lock:
|
||||
run_dir = self._output_dir / run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Read all existing rows, then append the new one
|
||||
existing_rows = self._read_score_rows(run_dir)
|
||||
call_number = len(existing_rows) + 1
|
||||
|
||||
new_row: dict[str, Any] = {
|
||||
"sample_id": f"session-score-{call_number}",
|
||||
"question": request.question,
|
||||
"answer": request.answer or "",
|
||||
"contexts": request.contexts or "",
|
||||
"ground_truth": request.ground_truth or "",
|
||||
"error": "",
|
||||
}
|
||||
new_row.update(all_scores)
|
||||
|
||||
all_rows = existing_rows + [new_row]
|
||||
|
||||
# Reconstruct NormalizedSample objects for write_run_artifacts metadata
|
||||
valid_samples = [
|
||||
NormalizedSample(
|
||||
sample_id=str(row.get("sample_id", f"session-score-{i + 1}")),
|
||||
question=str(row.get("question", "")),
|
||||
answer=str(row.get("answer", "")),
|
||||
contexts=[
|
||||
part.strip()
|
||||
for part in str(row.get("contexts", "")).split(" |||| ")
|
||||
if part.strip()
|
||||
],
|
||||
ground_truth=str(row.get("ground_truth", "")),
|
||||
)
|
||||
for i, row in enumerate(all_rows)
|
||||
]
|
||||
|
||||
# Determine all metric columns (union of all rows' metric keys)
|
||||
all_metric_names = sorted({
|
||||
k for row in all_rows
|
||||
for k in row if k not in _NON_METRIC_COLUMNS
|
||||
})
|
||||
|
||||
scenario = Scenario(
|
||||
scenario_name=f"session-{_sanitize_session_id(session_id)}",
|
||||
mode="offline",
|
||||
dataset=DatasetConfig(path=run_dir / "dataset.csv"),
|
||||
judge_model=judge_model,
|
||||
embedding_model=embedding_model,
|
||||
metrics=all_metric_names,
|
||||
output_dir=self._output_dir,
|
||||
optimization_advisor=True,
|
||||
)
|
||||
|
||||
started_at_val = (
|
||||
existing_rows[0].get("_started_at", finished_at)
|
||||
if existing_rows else finished_at
|
||||
)
|
||||
|
||||
result = EvaluationResult(
|
||||
scenario=scenario,
|
||||
run_id=run_id,
|
||||
started_at=started_at_val if isinstance(started_at_val, str) else finished_at,
|
||||
finished_at=finished_at,
|
||||
valid_samples=valid_samples,
|
||||
invalid_samples=[],
|
||||
score_rows=all_rows,
|
||||
)
|
||||
|
||||
write_run_artifacts(result)
|
||||
logger.info(
|
||||
"[session_job] artifacts written job_id=%s session_id=%s call=%d",
|
||||
job_id, session_id, call_number,
|
||||
)
|
||||
|
||||
# Regenerate optimization advice over all accumulated rows
|
||||
try:
|
||||
llm, _ = build_models(judge_model, embedding_model, settings)
|
||||
run_advisor(result, scenario, llm)
|
||||
logger.info("[session_job] advisor done job_id=%s session=%s", job_id, session_id)
|
||||
except Exception as adv_exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"[session_job] advisor failed job_id=%s err=%s", job_id, adv_exc
|
||||
)
|
||||
|
||||
self._update_job(
|
||||
job_id,
|
||||
status="completed",
|
||||
finished_at=finished_at,
|
||||
run_id=run_id,
|
||||
scores=all_scores,
|
||||
weighted_score=weighted,
|
||||
latency_ms=latency_ms,
|
||||
skipped_metrics=skipped,
|
||||
)
|
||||
self._persist_session_index(session_id)
|
||||
|
||||
except Exception as exc: # noqa: BLE001
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
import logging as _logging
|
||||
_logging.getLogger("webapp.services.session_score_manager").error(
|
||||
"[session_job] failed job_id=%s err=%s", job_id, exc
|
||||
)
|
||||
self._update_job(
|
||||
job_id,
|
||||
status="failed",
|
||||
finished_at=_now_iso(),
|
||||
latency_ms=latency_ms,
|
||||
error=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _get_session_lock(self, session_id: str) -> threading.Lock:
|
||||
with self._lock:
|
||||
if session_id not in self._session_locks:
|
||||
self._session_locks[session_id] = threading.Lock()
|
||||
return self._session_locks[session_id]
|
||||
|
||||
def _read_score_rows(self, run_dir: Path) -> list[dict[str, Any]]:
|
||||
"""Read existing scores.csv rows, returning empty list if file doesn't exist."""
|
||||
scores_path = run_dir / "scores.csv"
|
||||
if not scores_path.is_file():
|
||||
return []
|
||||
try:
|
||||
frame = pd.read_csv(scores_path)
|
||||
return frame.where(pd.notnull(frame), None).to_dict("records")
|
||||
except (OSError, ValueError):
|
||||
return []
|
||||
|
||||
def _read_metric_means(self, run_dir: Path) -> dict[str, float | None]:
|
||||
"""Compute per-metric means from the session's scores.csv."""
|
||||
scores_path = run_dir / "scores.csv"
|
||||
if not scores_path.is_file():
|
||||
return {}
|
||||
try:
|
||||
frame = pd.read_csv(scores_path)
|
||||
except (OSError, ValueError):
|
||||
return {}
|
||||
means: dict[str, float | None] = {}
|
||||
for col in frame.columns:
|
||||
if col in _NON_METRIC_COLUMNS:
|
||||
continue
|
||||
if pd.api.types.is_numeric_dtype(frame[col]):
|
||||
val = frame[col].mean(numeric_only=True)
|
||||
means[col] = None if pd.isna(val) else round(float(val), 4)
|
||||
return means
|
||||
|
||||
def _update_job(self, job_id: str, **kwargs: Any) -> None:
|
||||
with self._lock:
|
||||
existing = self._job_cache.get(job_id)
|
||||
if existing is None:
|
||||
return
|
||||
updated = existing.model_copy(update=kwargs)
|
||||
self._job_cache[job_id] = updated
|
||||
self._persist_job_index(updated)
|
||||
|
||||
def _persist_job_index(self, status: AsyncScoreJobStatus) -> None:
|
||||
"""Persist a single job's status to the index directory."""
|
||||
path = self._index_dir / f"{status.job_id}.json"
|
||||
path.write_text(
|
||||
json.dumps(status.model_dump(), ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def _persist_session_index(self, session_id: str) -> None:
|
||||
"""Persist the session→job_ids mapping."""
|
||||
with self._lock:
|
||||
job_ids = list(self._session_jobs.get(session_id) or [])
|
||||
run_id = self.session_run_id(session_id)
|
||||
data = {"session_id": session_id, "run_id": run_id, "job_ids": job_ids}
|
||||
path = self._index_dir / "_sessions" / f"{_sanitize_session_id(session_id)}.json"
|
||||
path.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def _load_existing(self) -> None:
|
||||
"""Restore job cache and session mappings from persisted index files on startup."""
|
||||
# Load individual job files
|
||||
for path in sorted(self._index_dir.glob("*.json")):
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
status = AsyncScoreJobStatus.model_validate(data)
|
||||
self._job_cache[status.job_id] = status
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
# Load session→job_ids mappings
|
||||
sessions_dir = self._index_dir / "_sessions"
|
||||
if not sessions_dir.is_dir():
|
||||
return
|
||||
for path in sorted(sessions_dir.glob("*.json")):
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
sid = data.get("session_id", "")
|
||||
job_ids = data.get("job_ids", [])
|
||||
if sid:
|
||||
self._session_jobs[sid] = job_ids
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
|
||||
# Module-level singleton shared by FastAPI routes.
|
||||
session_score_manager = SessionScoreJobManager()
|
||||
@@ -128,17 +128,17 @@ const Report = {
|
||||
wrap.appendChild(card);
|
||||
});
|
||||
|
||||
// 综合加权得分卡片
|
||||
const wsValue = (report && report.weighted_score_mean !== undefined) ? report.weighted_score_mean : null;
|
||||
const wsCard = document.createElement("div");
|
||||
wsCard.className = "metric-card weighted-score-card";
|
||||
const wsCls = App.scoreClass(wsValue);
|
||||
const wsText = wsValue === null || wsValue === undefined ? "n/a" : wsValue.toFixed(2);
|
||||
wsCard.innerHTML = `
|
||||
<div class="metric-value ${wsCls}">${wsText}</div>
|
||||
<div class="metric-name">综合加权得分</div>
|
||||
`;
|
||||
wrap.appendChild(wsCard);
|
||||
// 综合加权得分卡片(已暂时隐藏)
|
||||
// const wsValue = (report && report.weighted_score_mean !== undefined) ? report.weighted_score_mean : null;
|
||||
// const wsCard = document.createElement("div");
|
||||
// wsCard.className = "metric-card weighted-score-card";
|
||||
// const wsCls = App.scoreClass(wsValue);
|
||||
// const wsText = wsValue === null || wsValue === undefined ? "n/a" : wsValue.toFixed(2);
|
||||
// wsCard.innerHTML = `
|
||||
// <div class="metric-value ${wsCls}">${wsText}</div>
|
||||
// <div class="metric-name">综合加权得分</div>
|
||||
// `;
|
||||
// wrap.appendChild(wsCard);
|
||||
},
|
||||
|
||||
// ② 分数分布直方图(可切换指标)。
|
||||
|
||||
@@ -55,10 +55,11 @@ const ScoreJobs = {
|
||||
return `<span class="metric-chip" title="${App.escape(k)}">${App.escape(App.shortMetric(k))} <b class="${cls}">${text}</b></span>`;
|
||||
})
|
||||
.join(" ");
|
||||
if (job.weighted_score !== null && job.weighted_score !== undefined) {
|
||||
const cls = App.scoreClass(job.weighted_score);
|
||||
scoreHtml += ` <span class="metric-chip">综合 <b class="${cls}">${Number(job.weighted_score).toFixed(3)}</b></span>`;
|
||||
}
|
||||
// 综合加权得分(已暂时隐藏)
|
||||
// if (job.weighted_score !== null && job.weighted_score !== undefined) {
|
||||
// const cls = App.scoreClass(job.weighted_score);
|
||||
// scoreHtml += ` <span class="metric-chip">综合 <b class="${cls}">${Number(job.weighted_score).toFixed(3)}</b></span>`;
|
||||
// }
|
||||
} else if (job.status === "failed") {
|
||||
scoreHtml = `<span style="color:var(--bad);font-size:12px">${App.escape((job.error || "").slice(0, 80))}</span>`;
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user