Add RAGAS evaluation web console (FastAPI + vanilla JS)

- webapp/: FastAPI backend with runs/scenarios/evaluations API routers; services for run_reader, report_builder, scenario_scanner, task_manager (lazy ragas import — server boots even without ragas); Pydantic models - webapp/static/: single-page console (layout A: left-nav + main area); report detail with metric cards, Chart.js distribution histogram, grouping table, lowest-score sample review; trigger evaluation + log polling - webmain.py: uvicorn entry point (alongside existing main.py CLI) - start.bat: Windows one-click launcher with env checks and auto-browser open - rag_eval/datasets/: implement missing loader + normalizer modules (load_dataset_records, normalize_records) required by evaluator - scripts/seed_sample_run.py: generate realistic demo run artifacts - .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
2026-06-15 15:53:57 +08:00
parent 9cbdc1d95d
commit e89695e490
26 changed files with 2496 additions and 2 deletions
--- a/webapp/api/init.py
+++ b/webapp/api/init.py
@@ -0,0 +1 @@
+"""API router package for the evaluation console."""
--- a/webapp/api/evaluations.py
+++ b/webapp/api/evaluations.py
@@ -0,0 +1,44 @@
+"""Routes for triggering evaluations and polling background task status."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+
+from webapp.models import (
+    TaskStatus,
+    TriggerEvaluationRequest,
+    TriggerEvaluationResponse,
+)
+from webapp.services import scenario_scanner
+from webapp.services.task_manager import task_manager
+
+router = APIRouter(prefix="/api/evaluations", tags=["evaluations"])
+
+
+@router.post("", response_model=TriggerEvaluationResponse)
+def trigger_evaluation(request: TriggerEvaluationRequest) -> TriggerEvaluationResponse:
+    """Validate the scenario path and queue a background evaluation task."""
+    resolved = scenario_scanner.resolve_scenario_path(request.scenario_path)
+    if resolved is None:
+        raise HTTPException(
+            status_code=400,
+            detail=f"无效或不允许的场景路径: {request.scenario_path}",
+        )
+
+    task_id = task_manager.submit(request.scenario_path)
+    return TriggerEvaluationResponse(task_id=task_id)
+
+
+@router.get("/{task_id}", response_model=TaskStatus)
+def get_task_status(task_id: str) -> TaskStatus:
+    """Return the current status and logs for one evaluation task."""
+    status = task_manager.get(task_id)
+    if status is None:
+        raise HTTPException(status_code=404, detail=f"未找到任务: {task_id}")
+    return status
+
+
+@router.get("", response_model=dict)
+def list_tasks() -> dict[str, list]:
+    """Return all known evaluation tasks for this server session."""
+    return {"tasks": [task.model_dump() for task in task_manager.list_tasks()]}
--- a/webapp/api/runs.py
+++ b/webapp/api/runs.py
@@ -0,0 +1,32 @@
+"""Routes for listing evaluation runs and fetching a single run's report."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+
+from webapp.models import RunDetail
+from webapp.services import report_builder, run_reader
+
+router = APIRouter(prefix="/api/runs", tags=["runs"])
+
+
+@router.get("")
+def get_runs() -> dict[str, list]:
+    """Return summaries for every discoverable evaluation run."""
+    summaries = run_reader.list_run_summaries()
+    return {"runs": [summary.model_dump() for summary in summaries]}
+
+
+@router.get("/{run_id}")
+def get_run_detail(run_id: str) -> RunDetail:
+    """Return the full summary and aggregated report for one run."""
+    run_dir = run_reader.find_run_dir(run_id)
+    if run_dir is None:
+        raise HTTPException(status_code=404, detail=f"未找到运行: {run_id}")
+
+    summary = run_reader.build_run_summary(run_dir)
+    if summary is None:
+        raise HTTPException(status_code=404, detail=f"运行元数据缺失: {run_id}")
+
+    report = report_builder.build_report(run_dir, summary.metrics)
+    return RunDetail(summary=summary, report=report)
--- a/webapp/api/scenarios.py
+++ b/webapp/api/scenarios.py
@@ -0,0 +1,16 @@
+"""Route for discovering scenario YAML files that can be evaluated."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+from webapp.services import scenario_scanner
+
+router = APIRouter(prefix="/api/scenarios", tags=["scenarios"])
+
+
+@router.get("")
+def get_scenarios() -> dict[str, list]:
+    """Return every scenario file found under the scenarios/ directory."""
+    scenarios = scenario_scanner.list_scenarios()
+    return {"scenarios": [item.model_dump() for item in scenarios]}
				`@@ -0,0 +1 @@`
				`"""API router package for the evaluation console."""`