"""Pydantic response models for the evaluation console HTTP API.""" from __future__ import annotations from datetime import datetime, timezone from typing import Any from pydantic import BaseModel, Field def _utcnow_iso() -> str: return datetime.now(timezone.utc).isoformat() class RunSummary(BaseModel): """Compact description of a single evaluation run for list views.""" run_id: str scenario_name: str mode: str = "" judge_model: str = "" embedding_model: str = "" started_at: str = "" finished_at: str = "" dataset: str = "" total_samples: int = 0 valid_samples: int = 0 invalid_samples: int = 0 metrics: list[str] = Field(default_factory=list) metric_means: dict[str, float | None] = Field(default_factory=dict) output_path: str = "" class GroupStat(BaseModel): """Mean metric values for one slice of samples grouped by a metadata field.""" key: str count: int means: dict[str, float | None] = Field(default_factory=dict) class DistributionBin(BaseModel): """One histogram bucket of sample counts for a single metric.""" label: str lower: float upper: float count: int class SampleScore(BaseModel): """Per-sample row used for the lowest-score review table.""" sample_id: str question: str = "" contexts: list[str] = Field(default_factory=list) answer: str = "" ground_truth: str = "" language: str = "" difficulty: str = "" question_type: str = "" metrics: dict[str, float | None] = Field(default_factory=dict) mean_score: float | None = None error: str = "" class ReportData(BaseModel): """Aggregated report payload rendered by the report detail page.""" metrics: list[str] = Field(default_factory=list) metric_means: dict[str, float | None] = Field(default_factory=dict) distributions: dict[str, list[DistributionBin]] = Field(default_factory=dict) groupings: dict[str, list[GroupStat]] = Field(default_factory=dict) lowest_samples: list[SampleScore] = Field(default_factory=list) summary_markdown: str = "" advice_markdown: str = "" # optimization_advice.md content (empty if not generated) class RunDetail(BaseModel): """Full payload for a single run: summary metadata plus the report.""" summary: RunSummary report: ReportData class ScenarioInfo(BaseModel): """One discoverable scenario YAML file that can be evaluated from the UI.""" path: str scenario_name: str = "" mode: str = "" dataset: str = "" judge_model: str = "" metrics: list[str] = Field(default_factory=list) error: str = "" class TaskStatus(BaseModel): """State of a background evaluation task tracked by the task manager.""" task_id: str scenario_path: str status: str logs: list[str] = Field(default_factory=list) run_id: str | None = None error: str | None = None created_at: str = "" finished_at: str = "" class TriggerEvaluationRequest(BaseModel): """Request body for launching an evaluation run from the UI.""" scenario_path: str class TriggerEvaluationResponse(BaseModel): """Response returned immediately after queuing an evaluation task.""" task_id: str class LLMProfile(BaseModel): """A named LLM connection configuration that can be reused across tasks.""" profile_id: str name: str model: str base_url: str api_key: str timeout_seconds: int = 30 created_at: str = Field(default_factory=_utcnow_iso) updated_at: str = Field(default_factory=_utcnow_iso) class CreateProfileRequest(BaseModel): """Request body for creating or updating an LLM profile.""" name: str model: str base_url: str api_key: str timeout_seconds: int = 30 class ProfileApplyRequest(BaseModel): """Request body to patch LLM profile selections into a scenario YAML.""" scenario_path: str judge_profile_id: str | None = None answer_profile_id: str | None = None dataset_profile_id: str | None = None class ProfileApplyResponse(BaseModel): """Response after patching a scenario YAML with profile settings.""" scenario_path: str patched_fields: list[str] = Field(default_factory=list) def jsonable(value: Any) -> Any: """Convert NaN/inf floats into None so the payload stays valid JSON.""" import math if isinstance(value, float): if math.isnan(value) or math.isinf(value): return None return value if isinstance(value, dict): return {key: jsonable(item) for key, item in value.items()} if isinstance(value, list): return [jsonable(item) for item in value] return value