feat(scripts): add run_eval.bat / run_eval.ps1 evaluation launcher scripts
Both scripts support: - Shortcut args: online (default), offline, or any custom .yaml path - Second arg: log level (DEBUG/INFO/WARNING/ERROR), default INFO - Auto-timestamped log file saved to logs\eval_<date>_<time>.log - Sets PYTHONIOENCODING=utf-8 and PYTHONPATH=. automatically - Friendly error/success banners with log file path Usage: run_eval.bat # online eval run_eval.bat offline DEBUG # offline eval with DEBUG logs .\run_eval.ps1 online DEBUG # PowerShell equivalent Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
107
run_eval.bat
Normal file
107
run_eval.bat
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
|
||||||
|
:: ============================================================
|
||||||
|
:: run_eval.bat - Run a RAGAS evaluation scenario with logs
|
||||||
|
::
|
||||||
|
:: Usage:
|
||||||
|
:: run_eval.bat (uses default online scenario)
|
||||||
|
:: run_eval.bat offline (runs offline smoke scenario)
|
||||||
|
:: run_eval.bat path\to\scenario.yaml (any custom scenario)
|
||||||
|
:: run_eval.bat offline DEBUG (second arg = log level)
|
||||||
|
:: ============================================================
|
||||||
|
|
||||||
|
cd /d "%~dp0"
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Siemens RAGAS - Evaluation Runner
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
:: 1. Resolve scenario path (arg1)
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
set "SCENARIO=%~1"
|
||||||
|
if "%SCENARIO%"=="" set "SCENARIO=online"
|
||||||
|
|
||||||
|
if /i "%SCENARIO%"=="online" (
|
||||||
|
set "SCENARIO=scenarios\online\siemens-pdf-question-bank-online.yaml"
|
||||||
|
)
|
||||||
|
if /i "%SCENARIO%"=="offline" (
|
||||||
|
set "SCENARIO=scenarios\offline\siemens-pdf-offline-smoke.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not exist "%SCENARIO%" (
|
||||||
|
echo [ERROR] Scenario file not found: %SCENARIO%
|
||||||
|
echo.
|
||||||
|
echo Usage examples:
|
||||||
|
echo run_eval.bat - online eval (default)
|
||||||
|
echo run_eval.bat offline - offline smoke
|
||||||
|
echo run_eval.bat path\to\file.yaml - custom scenario
|
||||||
|
goto :error
|
||||||
|
)
|
||||||
|
echo [OK] Scenario : %SCENARIO%
|
||||||
|
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
:: 2. Resolve log level (arg2, default INFO)
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
set "LOG_LEVEL=%~2"
|
||||||
|
if "%LOG_LEVEL%"=="" set "LOG_LEVEL=INFO"
|
||||||
|
echo [OK] Log level: %LOG_LEVEL%
|
||||||
|
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
:: 3. Create logs dir and build timestamped log filename
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
if not exist "logs" mkdir logs
|
||||||
|
for /f "tokens=1-3 delims=/-" %%a in ("%DATE%") do (
|
||||||
|
set "YMD=%%c-%%a-%%b"
|
||||||
|
)
|
||||||
|
for /f "tokens=1-3 delims=:." %%a in ("%TIME: =0%") do (
|
||||||
|
set "HMS=%%a%%b%%c"
|
||||||
|
)
|
||||||
|
set "LOG_FILE=logs\eval_%YMD%_%HMS%.log"
|
||||||
|
echo [OK] Log file : %LOG_FILE%
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Starting evaluation...
|
||||||
|
echo (Logs also written to %LOG_FILE%)
|
||||||
|
echo Press Ctrl+C to abort
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
:: 4. Run evaluation with UTF-8 and logging
|
||||||
|
:: ----------------------------------------------------------------
|
||||||
|
set PYTHONIOENCODING=utf-8
|
||||||
|
set PYTHONPATH=.
|
||||||
|
|
||||||
|
python main.py ^
|
||||||
|
--scenario "%SCENARIO%" ^
|
||||||
|
--log-file "%LOG_FILE%" ^
|
||||||
|
--log-level %LOG_LEVEL%
|
||||||
|
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo.
|
||||||
|
echo [ERROR] Evaluation failed. Check log: %LOG_FILE%
|
||||||
|
goto :error
|
||||||
|
)
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Evaluation complete!
|
||||||
|
echo Log saved to: %LOG_FILE%
|
||||||
|
echo Open the web console to view results: start.bat
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
pause
|
||||||
|
exit /b 0
|
||||||
|
|
||||||
|
:error
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Evaluation failed. See error above or check log file.
|
||||||
|
echo ============================================================
|
||||||
|
pause
|
||||||
|
exit /b 1
|
||||||
96
run_eval.ps1
Normal file
96
run_eval.ps1
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# run_eval.ps1 - Siemens RAGAS Evaluation Runner
|
||||||
|
# Usage:
|
||||||
|
# .\run_eval.ps1 # online eval (default)
|
||||||
|
# .\run_eval.ps1 offline # offline smoke
|
||||||
|
# .\run_eval.ps1 path\to\scenario.yaml # custom scenario
|
||||||
|
# .\run_eval.ps1 online DEBUG # second arg = log level (DEBUG/INFO/WARNING)
|
||||||
|
# Or: powershell -ExecutionPolicy Bypass -File run_eval.ps1 [scenario] [log-level]
|
||||||
|
|
||||||
|
param(
|
||||||
|
[string]$Scenario = "online",
|
||||||
|
[string]$LogLevel = "INFO"
|
||||||
|
)
|
||||||
|
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
Set-Location $PSScriptRoot
|
||||||
|
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "============================================================" -ForegroundColor Cyan
|
||||||
|
Write-Host " Siemens RAGAS - Evaluation Runner" -ForegroundColor Cyan
|
||||||
|
Write-Host "============================================================" -ForegroundColor Cyan
|
||||||
|
Write-Host ""
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 1. Resolve scenario path
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
$scenarioMap = @{
|
||||||
|
"online" = "scenarios\online\siemens-pdf-question-bank-online.yaml"
|
||||||
|
"offline" = "scenarios\offline\siemens-pdf-offline-smoke.yaml"
|
||||||
|
}
|
||||||
|
if ($scenarioMap.ContainsKey($Scenario.ToLower())) {
|
||||||
|
$Scenario = $scenarioMap[$Scenario.ToLower()]
|
||||||
|
}
|
||||||
|
if (-not (Test-Path $Scenario)) {
|
||||||
|
Write-Host "[ERROR] Scenario file not found: $Scenario" -ForegroundColor Red
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "Usage examples:"
|
||||||
|
Write-Host " .\run_eval.ps1 - online eval (default)"
|
||||||
|
Write-Host " .\run_eval.ps1 offline - offline smoke"
|
||||||
|
Write-Host " .\run_eval.ps1 path\to\file.yaml - custom scenario"
|
||||||
|
Read-Host "Press Enter to exit"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
Write-Host "[OK] Scenario : $Scenario" -ForegroundColor Green
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 2. Validate log level
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
$validLevels = @("DEBUG", "INFO", "WARNING", "ERROR")
|
||||||
|
if ($validLevels -notcontains $LogLevel.ToUpper()) {
|
||||||
|
Write-Host "[WARN] Unknown log level '$LogLevel', defaulting to INFO" -ForegroundColor Yellow
|
||||||
|
$LogLevel = "INFO"
|
||||||
|
}
|
||||||
|
Write-Host "[OK] Log level: $LogLevel" -ForegroundColor Green
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 3. Create logs dir with timestamped filename
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
if (-not (Test-Path "logs")) { New-Item -ItemType Directory "logs" | Out-Null }
|
||||||
|
$timestamp = Get-Date -Format "yyyy-MM-dd_HHmmss"
|
||||||
|
$logFile = "logs\eval_$timestamp.log"
|
||||||
|
Write-Host "[OK] Log file : $logFile" -ForegroundColor Green
|
||||||
|
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "============================================================" -ForegroundColor Cyan
|
||||||
|
Write-Host " Starting evaluation..." -ForegroundColor Cyan
|
||||||
|
Write-Host " Logs also written to: $logFile" -ForegroundColor Cyan
|
||||||
|
Write-Host " Press Ctrl+C to abort" -ForegroundColor Yellow
|
||||||
|
Write-Host "============================================================" -ForegroundColor Cyan
|
||||||
|
Write-Host ""
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 4. Run evaluation
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
$env:PYTHONIOENCODING = "utf-8"
|
||||||
|
$env:PYTHONPATH = "."
|
||||||
|
|
||||||
|
& python main.py `
|
||||||
|
--scenario $Scenario `
|
||||||
|
--log-file $logFile `
|
||||||
|
--log-level $LogLevel.ToUpper()
|
||||||
|
|
||||||
|
if ($LASTEXITCODE -ne 0) {
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "[ERROR] Evaluation failed. Check log: $logFile" -ForegroundColor Red
|
||||||
|
Read-Host "Press Enter to exit"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "============================================================" -ForegroundColor Green
|
||||||
|
Write-Host " Evaluation complete!" -ForegroundColor Green
|
||||||
|
Write-Host " Log saved to: $logFile" -ForegroundColor Green
|
||||||
|
Write-Host " Open the web console to view results: start.bat" -ForegroundColor Cyan
|
||||||
|
Write-Host "============================================================" -ForegroundColor Green
|
||||||
|
Write-Host ""
|
||||||
|
Read-Host "Press Enter to exit"
|
||||||
Reference in New Issue
Block a user