siemens_ragas/rag_eval/datasets/normalizers.py

"""Normalize raw dataset records into NormalizedSample and InvalidSample objects.

Handles both offline mode (records already contain answer + contexts) and online
mode (records only contain question + ground_truth; adapter fills the rest).
"""

from __future__ import annotations

import uuid
from typing import Any

from rag_eval.shared.models import InvalidSample, NormalizedSample
from rag_eval.shared.utils import parse_contexts

# Fields we always strip from the raw record before storing it in metadata.
_CORE_FIELDS = {
    "sample_id",
    "question",
    "contexts",
    "answer",
    "ground_truth",
    "scenario",
    "language",
    "retrieval_config",
}


def _get_str(record: dict[str, Any], key: str, default: str = "") -> str:
    """Return a string field from the record, coercing None/NaN to the default."""
    value = record.get(key)
    if value is None:
        return default
    text = str(value).strip()
    return default if text.lower() == "nan" else text


def normalize_records(
    records: list[dict[str, Any]],
    mode: str = "offline",
    max_samples: int | None = None,
) -> tuple[list[NormalizedSample], list[InvalidSample]]:
    """Convert raw dicts into NormalizedSample / InvalidSample collections.

    In offline mode every record must already contain answer and contexts.
    In online mode those fields may be absent; they will be filled by the adapter.
    """
    if max_samples is not None:
        records = records[:max_samples]

    valid: list[NormalizedSample] = []
    invalid: list[InvalidSample] = []

    for raw in records:
        sample_id = _get_str(raw, "sample_id") or uuid.uuid4().hex[:12]

        question = _get_str(raw, "question")
        if not question:
            invalid.append(InvalidSample(
                sample_id=sample_id,
                error="missing required field: question",
                raw=raw,
            ))
            continue

        ground_truth = _get_str(raw, "ground_truth")
        contexts = parse_contexts(raw.get("contexts"))
        answer = _get_str(raw, "answer")

        if mode == "offline":
            errors: list[str] = []
            if not ground_truth:
                errors.append("missing ground_truth")
            if not answer:
                errors.append("missing answer")
            if not contexts:
                errors.append("missing or empty contexts")
            if errors:
                invalid.append(InvalidSample(
                    sample_id=sample_id,
                    error="; ".join(errors),
                    raw=raw,
                ))
                continue

        # Collect any extra columns as opaque metadata for adapters and reporting.
        metadata = {
            key: value
            for key, value in raw.items()
            if key not in _CORE_FIELDS
        }

        valid.append(NormalizedSample(
            sample_id=sample_id,
            question=question,
            contexts=contexts,
            answer=answer,
            ground_truth=ground_truth,
            scenario=_get_str(raw, "scenario"),
            language=_get_str(raw, "language"),
            retrieval_config=_get_str(raw, "retrieval_config"),
            metadata=metadata,
            raw=raw,
        ))

    return valid, invalid
Add RAGAS evaluation web console (FastAPI + vanilla JS) - webapp/: FastAPI backend with runs/scenarios/evaluations API routers; services for run_reader, report_builder, scenario_scanner, task_manager (lazy ragas import — server boots even without ragas); Pydantic models - webapp/static/: single-page console (layout A: left-nav + main area); report detail with metric cards, Chart.js distribution histogram, grouping table, lowest-score sample review; trigger evaluation + log polling - webmain.py: uvicorn entry point (alongside existing main.py CLI) - start.bat: Windows one-click launcher with env checks and auto-browser open - rag_eval/datasets/: implement missing loader + normalizer modules (load_dataset_records, normalize_records) required by evaluator - scripts/seed_sample_run.py: generate realistic demo run artifacts - .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com> 2026-06-15 15:53:57 +08:00			`"""Normalize raw dataset records into NormalizedSample and InvalidSample objects.`

			`Handles both offline mode (records already contain answer + contexts) and online`
			`mode (records only contain question + ground_truth; adapter fills the rest).`
			`"""`

			`from __future__ import annotations`

			`import uuid`
			`from typing import Any`

			`from rag_eval.shared.models import InvalidSample, NormalizedSample`
			`from rag_eval.shared.utils import parse_contexts`

			`# Fields we always strip from the raw record before storing it in metadata.`
			`_CORE_FIELDS = {`
			`"sample_id",`
			`"question",`
			`"contexts",`
			`"answer",`
			`"ground_truth",`
			`"scenario",`
			`"language",`
			`"retrieval_config",`
			`}`


			`def _get_str(record: dict[str, Any], key: str, default: str = "") -> str:`
			`"""Return a string field from the record, coercing None/NaN to the default."""`
			`value = record.get(key)`
			`if value is None:`
			`return default`
			`text = str(value).strip()`
			`return default if text.lower() == "nan" else text`


			`def normalize_records(`
			`records: list[dict[str, Any]],`
			`mode: str = "offline",`
			`max_samples: int \| None = None,`
			`) -> tuple[list[NormalizedSample], list[InvalidSample]]:`
			`"""Convert raw dicts into NormalizedSample / InvalidSample collections.`

			`In offline mode every record must already contain answer and contexts.`
			`In online mode those fields may be absent; they will be filled by the adapter.`
			`"""`
			`if max_samples is not None:`
			`records = records[:max_samples]`

			`valid: list[NormalizedSample] = []`
			`invalid: list[InvalidSample] = []`

			`for raw in records:`
			`sample_id = _get_str(raw, "sample_id") or uuid.uuid4().hex[:12]`

			`question = _get_str(raw, "question")`
			`if not question:`
			`invalid.append(InvalidSample(`
			`sample_id=sample_id,`
			`error="missing required field: question",`
			`raw=raw,`
			`))`
			`continue`

			`ground_truth = _get_str(raw, "ground_truth")`
			`contexts = parse_contexts(raw.get("contexts"))`
			`answer = _get_str(raw, "answer")`

			`if mode == "offline":`
			`errors: list[str] = []`
			`if not ground_truth:`
			`errors.append("missing ground_truth")`
			`if not answer:`
			`errors.append("missing answer")`
			`if not contexts:`
			`errors.append("missing or empty contexts")`
			`if errors:`
			`invalid.append(InvalidSample(`
			`sample_id=sample_id,`
			`error="; ".join(errors),`
			`raw=raw,`
			`))`
			`continue`

			`# Collect any extra columns as opaque metadata for adapters and reporting.`
			`metadata = {`
			`key: value`
			`for key, value in raw.items()`
			`if key not in _CORE_FIELDS`
			`}`

			`valid.append(NormalizedSample(`
			`sample_id=sample_id,`
			`question=question,`
			`contexts=contexts,`
			`answer=answer,`
			`ground_truth=ground_truth,`
			`scenario=_get_str(raw, "scenario"),`
			`language=_get_str(raw, "language"),`
			`retrieval_config=_get_str(raw, "retrieval_config"),`
			`metadata=metadata,`
			`raw=raw,`
			`))`

			`return valid, invalid`