Add RAGAS evaluation web console (FastAPI + vanilla JS)
- webapp/: FastAPI backend with runs/scenarios/evaluations API routers; services for run_reader, report_builder, scenario_scanner, task_manager (lazy ragas import — server boots even without ragas); Pydantic models - webapp/static/: single-page console (layout A: left-nav + main area); report detail with metric cards, Chart.js distribution histogram, grouping table, lowest-score sample review; trigger evaluation + log polling - webmain.py: uvicorn entry point (alongside existing main.py CLI) - start.bat: Windows one-click launcher with env checks and auto-browser open - rag_eval/datasets/: implement missing loader + normalizer modules (load_dataset_records, normalize_records) required by evaluator - scripts/seed_sample_run.py: generate realistic demo run artifacts - .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
This commit is contained in:
1
rag_eval/datasets/__init__.py
Normal file
1
rag_eval/datasets/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Dataset loading and normalization for the RAG evaluation platform."""
|
||||
56
rag_eval/datasets/loader.py
Normal file
56
rag_eval/datasets/loader.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Load raw evaluation dataset records from disk.
|
||||
|
||||
Supports CSV and JSONL formats. Returns a list of plain dicts — normalization
|
||||
into NormalizedSample is handled by normalizers.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def load_dataset_records(path: Path | str) -> list[dict[str, Any]]:
|
||||
"""Load raw records from a CSV or JSONL file.
|
||||
|
||||
Each row becomes a plain dict. Lists stored as JSON strings in CSV columns
|
||||
are left as-is; normalizers handle parsing.
|
||||
"""
|
||||
file_path = Path(path)
|
||||
if not file_path.is_file():
|
||||
raise FileNotFoundError(f"Dataset file not found: {file_path}")
|
||||
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix in (".jsonl", ".ndjson"):
|
||||
return _load_jsonl(file_path)
|
||||
if suffix in (".csv",):
|
||||
return _load_csv(file_path)
|
||||
# Fall back to CSV for unknown extensions.
|
||||
return _load_csv(file_path)
|
||||
|
||||
|
||||
def _load_csv(path: Path) -> list[dict[str, Any]]:
|
||||
"""Read a CSV file into a list of row dicts."""
|
||||
with path.open(encoding="utf-8", newline="") as fh:
|
||||
reader = csv.DictReader(fh)
|
||||
return [dict(row) for row in reader]
|
||||
|
||||
|
||||
def _load_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
"""Read a JSONL file into a list of record dicts."""
|
||||
records: list[dict[str, Any]] = []
|
||||
with path.open(encoding="utf-8") as fh:
|
||||
for lineno, line in enumerate(fh, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"Invalid JSON on line {lineno} of {path}: {exc}") from exc
|
||||
if not isinstance(obj, dict):
|
||||
raise ValueError(f"Expected JSON object on line {lineno} of {path}, got {type(obj).__name__}")
|
||||
records.append(obj)
|
||||
return records
|
||||
105
rag_eval/datasets/normalizers.py
Normal file
105
rag_eval/datasets/normalizers.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Normalize raw dataset records into NormalizedSample and InvalidSample objects.
|
||||
|
||||
Handles both offline mode (records already contain answer + contexts) and online
|
||||
mode (records only contain question + ground_truth; adapter fills the rest).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from rag_eval.shared.models import InvalidSample, NormalizedSample
|
||||
from rag_eval.shared.utils import parse_contexts
|
||||
|
||||
# Fields we always strip from the raw record before storing it in metadata.
|
||||
_CORE_FIELDS = {
|
||||
"sample_id",
|
||||
"question",
|
||||
"contexts",
|
||||
"answer",
|
||||
"ground_truth",
|
||||
"scenario",
|
||||
"language",
|
||||
"retrieval_config",
|
||||
}
|
||||
|
||||
|
||||
def _get_str(record: dict[str, Any], key: str, default: str = "") -> str:
|
||||
"""Return a string field from the record, coercing None/NaN to the default."""
|
||||
value = record.get(key)
|
||||
if value is None:
|
||||
return default
|
||||
text = str(value).strip()
|
||||
return default if text.lower() == "nan" else text
|
||||
|
||||
|
||||
def normalize_records(
|
||||
records: list[dict[str, Any]],
|
||||
mode: str = "offline",
|
||||
max_samples: int | None = None,
|
||||
) -> tuple[list[NormalizedSample], list[InvalidSample]]:
|
||||
"""Convert raw dicts into NormalizedSample / InvalidSample collections.
|
||||
|
||||
In offline mode every record must already contain answer and contexts.
|
||||
In online mode those fields may be absent; they will be filled by the adapter.
|
||||
"""
|
||||
if max_samples is not None:
|
||||
records = records[:max_samples]
|
||||
|
||||
valid: list[NormalizedSample] = []
|
||||
invalid: list[InvalidSample] = []
|
||||
|
||||
for raw in records:
|
||||
sample_id = _get_str(raw, "sample_id") or uuid.uuid4().hex[:12]
|
||||
|
||||
question = _get_str(raw, "question")
|
||||
if not question:
|
||||
invalid.append(InvalidSample(
|
||||
sample_id=sample_id,
|
||||
error="missing required field: question",
|
||||
raw=raw,
|
||||
))
|
||||
continue
|
||||
|
||||
ground_truth = _get_str(raw, "ground_truth")
|
||||
contexts = parse_contexts(raw.get("contexts"))
|
||||
answer = _get_str(raw, "answer")
|
||||
|
||||
if mode == "offline":
|
||||
errors: list[str] = []
|
||||
if not ground_truth:
|
||||
errors.append("missing ground_truth")
|
||||
if not answer:
|
||||
errors.append("missing answer")
|
||||
if not contexts:
|
||||
errors.append("missing or empty contexts")
|
||||
if errors:
|
||||
invalid.append(InvalidSample(
|
||||
sample_id=sample_id,
|
||||
error="; ".join(errors),
|
||||
raw=raw,
|
||||
))
|
||||
continue
|
||||
|
||||
# Collect any extra columns as opaque metadata for adapters and reporting.
|
||||
metadata = {
|
||||
key: value
|
||||
for key, value in raw.items()
|
||||
if key not in _CORE_FIELDS
|
||||
}
|
||||
|
||||
valid.append(NormalizedSample(
|
||||
sample_id=sample_id,
|
||||
question=question,
|
||||
contexts=contexts,
|
||||
answer=answer,
|
||||
ground_truth=ground_truth,
|
||||
scenario=_get_str(raw, "scenario"),
|
||||
language=_get_str(raw, "language"),
|
||||
retrieval_config=_get_str(raw, "retrieval_config"),
|
||||
metadata=metadata,
|
||||
raw=raw,
|
||||
))
|
||||
|
||||
return valid, invalid
|
||||
Reference in New Issue
Block a user