Files
wangwei e89695e490 Add RAGAS evaluation web console (FastAPI + vanilla JS)
- webapp/: FastAPI backend with runs/scenarios/evaluations API routers;
  services for run_reader, report_builder, scenario_scanner, task_manager
  (lazy ragas import — server boots even without ragas); Pydantic models
- webapp/static/: single-page console (layout A: left-nav + main area);
  report detail with metric cards, Chart.js distribution histogram,
  grouping table, lowest-score sample review; trigger evaluation + log polling
- webmain.py: uvicorn entry point (alongside existing main.py CLI)
- start.bat: Windows one-click launcher with env checks and auto-browser open
- rag_eval/datasets/: implement missing loader + normalizer modules
  (load_dataset_records, normalize_records) required by evaluator
- scripts/seed_sample_run.py: generate realistic demo run artifacts
- .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source

Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
2026-06-15 15:53:57 +08:00

57 lines
1.9 KiB
Python

"""Load raw evaluation dataset records from disk.
Supports CSV and JSONL formats. Returns a list of plain dicts — normalization
into NormalizedSample is handled by normalizers.py.
"""
from __future__ import annotations
import csv
import json
from pathlib import Path
from typing import Any
def load_dataset_records(path: Path | str) -> list[dict[str, Any]]:
"""Load raw records from a CSV or JSONL file.
Each row becomes a plain dict. Lists stored as JSON strings in CSV columns
are left as-is; normalizers handle parsing.
"""
file_path = Path(path)
if not file_path.is_file():
raise FileNotFoundError(f"Dataset file not found: {file_path}")
suffix = file_path.suffix.lower()
if suffix in (".jsonl", ".ndjson"):
return _load_jsonl(file_path)
if suffix in (".csv",):
return _load_csv(file_path)
# Fall back to CSV for unknown extensions.
return _load_csv(file_path)
def _load_csv(path: Path) -> list[dict[str, Any]]:
"""Read a CSV file into a list of row dicts."""
with path.open(encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return [dict(row) for row in reader]
def _load_jsonl(path: Path) -> list[dict[str, Any]]:
"""Read a JSONL file into a list of record dicts."""
records: list[dict[str, Any]] = []
with path.open(encoding="utf-8") as fh:
for lineno, line in enumerate(fh, 1):
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError as exc:
raise ValueError(f"Invalid JSON on line {lineno} of {path}: {exc}") from exc
if not isinstance(obj, dict):
raise ValueError(f"Expected JSON object on line {lineno} of {path}, got {type(obj).__name__}")
records.append(obj)
return records