feat(scripts): add run_eval.bat / run_eval.ps1 evaluation launcher scripts

Both scripts support:
  - Shortcut args: online (default), offline, or any custom .yaml path
  - Second arg: log level (DEBUG/INFO/WARNING/ERROR), default INFO
  - Auto-timestamped log file saved to logs\eval_<date>_<time>.log
  - Sets PYTHONIOENCODING=utf-8 and PYTHONPATH=. automatically
  - Friendly error/success banners with log file path

Usage:
  run_eval.bat                    # online eval
  run_eval.bat offline DEBUG      # offline eval with DEBUG logs
  .\run_eval.ps1 online DEBUG     # PowerShell equivalent

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-06-16 11:16:53 +08:00
parent 629304aa6d
commit 4173a40d93
2 changed files with 203 additions and 0 deletions

107
run_eval.bat Normal file
View File

@@ -0,0 +1,107 @@
@echo off
setlocal enabledelayedexpansion
:: ============================================================
:: run_eval.bat - Run a RAGAS evaluation scenario with logs
::
:: Usage:
:: run_eval.bat (uses default online scenario)
:: run_eval.bat offline (runs offline smoke scenario)
:: run_eval.bat path\to\scenario.yaml (any custom scenario)
:: run_eval.bat offline DEBUG (second arg = log level)
:: ============================================================
cd /d "%~dp0"
echo.
echo ============================================================
echo Siemens RAGAS - Evaluation Runner
echo ============================================================
echo.
:: ----------------------------------------------------------------
:: 1. Resolve scenario path (arg1)
:: ----------------------------------------------------------------
set "SCENARIO=%~1"
if "%SCENARIO%"=="" set "SCENARIO=online"
if /i "%SCENARIO%"=="online" (
set "SCENARIO=scenarios\online\siemens-pdf-question-bank-online.yaml"
)
if /i "%SCENARIO%"=="offline" (
set "SCENARIO=scenarios\offline\siemens-pdf-offline-smoke.yaml"
)
if not exist "%SCENARIO%" (
echo [ERROR] Scenario file not found: %SCENARIO%
echo.
echo Usage examples:
echo run_eval.bat - online eval (default)
echo run_eval.bat offline - offline smoke
echo run_eval.bat path\to\file.yaml - custom scenario
goto :error
)
echo [OK] Scenario : %SCENARIO%
:: ----------------------------------------------------------------
:: 2. Resolve log level (arg2, default INFO)
:: ----------------------------------------------------------------
set "LOG_LEVEL=%~2"
if "%LOG_LEVEL%"=="" set "LOG_LEVEL=INFO"
echo [OK] Log level: %LOG_LEVEL%
:: ----------------------------------------------------------------
:: 3. Create logs dir and build timestamped log filename
:: ----------------------------------------------------------------
if not exist "logs" mkdir logs
for /f "tokens=1-3 delims=/-" %%a in ("%DATE%") do (
set "YMD=%%c-%%a-%%b"
)
for /f "tokens=1-3 delims=:." %%a in ("%TIME: =0%") do (
set "HMS=%%a%%b%%c"
)
set "LOG_FILE=logs\eval_%YMD%_%HMS%.log"
echo [OK] Log file : %LOG_FILE%
echo.
echo ============================================================
echo Starting evaluation...
echo (Logs also written to %LOG_FILE%)
echo Press Ctrl+C to abort
echo ============================================================
echo.
:: ----------------------------------------------------------------
:: 4. Run evaluation with UTF-8 and logging
:: ----------------------------------------------------------------
set PYTHONIOENCODING=utf-8
set PYTHONPATH=.
python main.py ^
--scenario "%SCENARIO%" ^
--log-file "%LOG_FILE%" ^
--log-level %LOG_LEVEL%
if errorlevel 1 (
echo.
echo [ERROR] Evaluation failed. Check log: %LOG_FILE%
goto :error
)
echo.
echo ============================================================
echo Evaluation complete!
echo Log saved to: %LOG_FILE%
echo Open the web console to view results: start.bat
echo ============================================================
echo.
pause
exit /b 0
:error
echo.
echo ============================================================
echo Evaluation failed. See error above or check log file.
echo ============================================================
pause
exit /b 1

96
run_eval.ps1 Normal file
View File

@@ -0,0 +1,96 @@
# run_eval.ps1 - Siemens RAGAS Evaluation Runner
# Usage:
# .\run_eval.ps1 # online eval (default)
# .\run_eval.ps1 offline # offline smoke
# .\run_eval.ps1 path\to\scenario.yaml # custom scenario
# .\run_eval.ps1 online DEBUG # second arg = log level (DEBUG/INFO/WARNING)
# Or: powershell -ExecutionPolicy Bypass -File run_eval.ps1 [scenario] [log-level]
param(
[string]$Scenario = "online",
[string]$LogLevel = "INFO"
)
$ErrorActionPreference = "Stop"
Set-Location $PSScriptRoot
Write-Host ""
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host " Siemens RAGAS - Evaluation Runner" -ForegroundColor Cyan
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host ""
# ----------------------------------------------------------------
# 1. Resolve scenario path
# ----------------------------------------------------------------
$scenarioMap = @{
"online" = "scenarios\online\siemens-pdf-question-bank-online.yaml"
"offline" = "scenarios\offline\siemens-pdf-offline-smoke.yaml"
}
if ($scenarioMap.ContainsKey($Scenario.ToLower())) {
$Scenario = $scenarioMap[$Scenario.ToLower()]
}
if (-not (Test-Path $Scenario)) {
Write-Host "[ERROR] Scenario file not found: $Scenario" -ForegroundColor Red
Write-Host ""
Write-Host "Usage examples:"
Write-Host " .\run_eval.ps1 - online eval (default)"
Write-Host " .\run_eval.ps1 offline - offline smoke"
Write-Host " .\run_eval.ps1 path\to\file.yaml - custom scenario"
Read-Host "Press Enter to exit"
exit 1
}
Write-Host "[OK] Scenario : $Scenario" -ForegroundColor Green
# ----------------------------------------------------------------
# 2. Validate log level
# ----------------------------------------------------------------
$validLevels = @("DEBUG", "INFO", "WARNING", "ERROR")
if ($validLevels -notcontains $LogLevel.ToUpper()) {
Write-Host "[WARN] Unknown log level '$LogLevel', defaulting to INFO" -ForegroundColor Yellow
$LogLevel = "INFO"
}
Write-Host "[OK] Log level: $LogLevel" -ForegroundColor Green
# ----------------------------------------------------------------
# 3. Create logs dir with timestamped filename
# ----------------------------------------------------------------
if (-not (Test-Path "logs")) { New-Item -ItemType Directory "logs" | Out-Null }
$timestamp = Get-Date -Format "yyyy-MM-dd_HHmmss"
$logFile = "logs\eval_$timestamp.log"
Write-Host "[OK] Log file : $logFile" -ForegroundColor Green
Write-Host ""
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host " Starting evaluation..." -ForegroundColor Cyan
Write-Host " Logs also written to: $logFile" -ForegroundColor Cyan
Write-Host " Press Ctrl+C to abort" -ForegroundColor Yellow
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host ""
# ----------------------------------------------------------------
# 4. Run evaluation
# ----------------------------------------------------------------
$env:PYTHONIOENCODING = "utf-8"
$env:PYTHONPATH = "."
& python main.py `
--scenario $Scenario `
--log-file $logFile `
--log-level $LogLevel.ToUpper()
if ($LASTEXITCODE -ne 0) {
Write-Host ""
Write-Host "[ERROR] Evaluation failed. Check log: $logFile" -ForegroundColor Red
Read-Host "Press Enter to exit"
exit 1
}
Write-Host ""
Write-Host "============================================================" -ForegroundColor Green
Write-Host " Evaluation complete!" -ForegroundColor Green
Write-Host " Log saved to: $logFile" -ForegroundColor Green
Write-Host " Open the web console to view results: start.bat" -ForegroundColor Cyan
Write-Host "============================================================" -ForegroundColor Green
Write-Host ""
Read-Host "Press Enter to exit"