feat(scripts): add run_eval.bat / run_eval.ps1 evaluation launcher scripts

Both scripts support: - Shortcut args: online (default), offline, or any custom .yaml path - Second arg: log level (DEBUG/INFO/WARNING/ERROR), default INFO - Auto-timestamped log file saved to logs\eval_<date>_<time>.log - Sets PYTHONIOENCODING=utf-8 and PYTHONPATH=. automatically - Friendly error/success banners with log file path Usage: run_eval.bat # online eval run_eval.bat offline DEBUG # offline eval with DEBUG logs .\run_eval.ps1 online DEBUG # PowerShell equivalent Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-16 11:16:53 +08:00
parent 629304aa6d
commit 4173a40d93
2 changed files with 203 additions and 0 deletions
--- a/run_eval.bat
+++ b/run_eval.bat
@@ -0,0 +1,107 @@
@echo off
 setlocal enabledelayedexpansion
 :: ============================================================
 ::  run_eval.bat  -  Run a RAGAS evaluation scenario with logs
 ::
 ::  Usage:
 ::    run_eval.bat                          (uses default online scenario)
 ::    run_eval.bat offline                  (runs offline smoke scenario)
 ::    run_eval.bat path\to\scenario.yaml    (any custom scenario)
 ::    run_eval.bat offline DEBUG            (second arg = log level)
 :: ============================================================
 cd /d "%~dp0"
 echo.
 echo ============================================================
 echo   Siemens RAGAS  -  Evaluation Runner
 echo ============================================================
 echo.
 :: ----------------------------------------------------------------
 :: 1. Resolve scenario path  (arg1)
 :: ----------------------------------------------------------------
 set "SCENARIO=%~1"
 if "%SCENARIO%"=="" set "SCENARIO=online"
 if /i "%SCENARIO%"=="online" (
    set "SCENARIO=scenarios\online\siemens-pdf-question-bank-online.yaml"
 )
 if /i "%SCENARIO%"=="offline" (
    set "SCENARIO=scenarios\offline\siemens-pdf-offline-smoke.yaml"
 )
 if not exist "%SCENARIO%" (
    echo [ERROR] Scenario file not found: %SCENARIO%
    echo.
    echo Usage examples:
    echo   run_eval.bat                    - online eval (default)
    echo   run_eval.bat offline            - offline smoke
    echo   run_eval.bat path\to\file.yaml  - custom scenario
    goto :error
 )
 echo [OK] Scenario : %SCENARIO%
 :: ----------------------------------------------------------------
 :: 2. Resolve log level  (arg2, default INFO)
 :: ----------------------------------------------------------------
 set "LOG_LEVEL=%~2"
 if "%LOG_LEVEL%"=="" set "LOG_LEVEL=INFO"
 echo [OK] Log level: %LOG_LEVEL%
 :: ----------------------------------------------------------------
 :: 3. Create logs dir and build timestamped log filename
 :: ----------------------------------------------------------------
 if not exist "logs" mkdir logs
 for /f "tokens=1-3 delims=/-" %%a in ("%DATE%") do (
    set "YMD=%%c-%%a-%%b"
 )
 for /f "tokens=1-3 delims=:." %%a in ("%TIME: =0%") do (
    set "HMS=%%a%%b%%c"
 )
 set "LOG_FILE=logs\eval_%YMD%_%HMS%.log"
 echo [OK] Log file : %LOG_FILE%
 echo.
 echo ============================================================
 echo   Starting evaluation...
 echo   (Logs also written to %LOG_FILE%)
 echo   Press Ctrl+C to abort
 echo ============================================================
 echo.
 :: ----------------------------------------------------------------
 :: 4. Run evaluation with UTF-8 and logging
 :: ----------------------------------------------------------------
 set PYTHONIOENCODING=utf-8
 set PYTHONPATH=.
 python main.py ^
    --scenario "%SCENARIO%" ^
    --log-file "%LOG_FILE%" ^
    --log-level %LOG_LEVEL%
 if errorlevel 1 (
    echo.
    echo [ERROR] Evaluation failed. Check log: %LOG_FILE%
    goto :error
 )
 echo.
 echo ============================================================
 echo   Evaluation complete!
 echo   Log saved to: %LOG_FILE%
 echo   Open the web console to view results: start.bat
 echo ============================================================
 echo.
 pause
 exit /b 0
 :error
 echo.
 echo ============================================================
 echo   Evaluation failed. See error above or check log file.
 echo ============================================================
 pause
 exit /b 1
--- a/run_eval.ps1
+++ b/run_eval.ps1
@@ -0,0 +1,96 @@
 # run_eval.ps1 - Siemens RAGAS Evaluation Runner
 # Usage:
 #   .\run_eval.ps1                         # online eval (default)
 #   .\run_eval.ps1 offline                 # offline smoke
 #   .\run_eval.ps1 path\to\scenario.yaml   # custom scenario
 #   .\run_eval.ps1 online DEBUG            # second arg = log level (DEBUG/INFO/WARNING)
 # Or: powershell -ExecutionPolicy Bypass -File run_eval.ps1 [scenario] [log-level]
 param(
    [string]$Scenario = "online",
    [string]$LogLevel = "INFO"
 )
 $ErrorActionPreference = "Stop"
 Set-Location $PSScriptRoot
 Write-Host ""
 Write-Host "============================================================" -ForegroundColor Cyan
 Write-Host "  Siemens RAGAS  -  Evaluation Runner" -ForegroundColor Cyan
 Write-Host "============================================================" -ForegroundColor Cyan
 Write-Host ""
 # ----------------------------------------------------------------
 # 1. Resolve scenario path
 # ----------------------------------------------------------------
 $scenarioMap = @{
    "online"  = "scenarios\online\siemens-pdf-question-bank-online.yaml"
    "offline" = "scenarios\offline\siemens-pdf-offline-smoke.yaml"
 }
 if ($scenarioMap.ContainsKey($Scenario.ToLower())) {
    $Scenario = $scenarioMap[$Scenario.ToLower()]
 }
 if (-not (Test-Path $Scenario)) {
    Write-Host "[ERROR] Scenario file not found: $Scenario" -ForegroundColor Red
    Write-Host ""
    Write-Host "Usage examples:"
    Write-Host "  .\run_eval.ps1                    - online eval (default)"
    Write-Host "  .\run_eval.ps1 offline            - offline smoke"
    Write-Host "  .\run_eval.ps1 path\to\file.yaml  - custom scenario"
    Read-Host "Press Enter to exit"
    exit 1
 }
 Write-Host "[OK] Scenario : $Scenario" -ForegroundColor Green
 # ----------------------------------------------------------------
 # 2. Validate log level
 # ----------------------------------------------------------------
 $validLevels = @("DEBUG", "INFO", "WARNING", "ERROR")
 if ($validLevels -notcontains $LogLevel.ToUpper()) {
    Write-Host "[WARN] Unknown log level '$LogLevel', defaulting to INFO" -ForegroundColor Yellow
    $LogLevel = "INFO"
 }
 Write-Host "[OK] Log level: $LogLevel" -ForegroundColor Green
 # ----------------------------------------------------------------
 # 3. Create logs dir with timestamped filename
 # ----------------------------------------------------------------
 if (-not (Test-Path "logs")) { New-Item -ItemType Directory "logs" | Out-Null }
 $timestamp = Get-Date -Format "yyyy-MM-dd_HHmmss"
 $logFile = "logs\eval_$timestamp.log"
 Write-Host "[OK] Log file : $logFile" -ForegroundColor Green
 Write-Host ""
 Write-Host "============================================================" -ForegroundColor Cyan
 Write-Host "  Starting evaluation..." -ForegroundColor Cyan
 Write-Host "  Logs also written to: $logFile" -ForegroundColor Cyan
 Write-Host "  Press Ctrl+C to abort" -ForegroundColor Yellow
 Write-Host "============================================================" -ForegroundColor Cyan
 Write-Host ""
 # ----------------------------------------------------------------
 # 4. Run evaluation
 # ----------------------------------------------------------------
 $env:PYTHONIOENCODING = "utf-8"
 $env:PYTHONPATH = "."
 & python main.py `
    --scenario $Scenario `
    --log-file $logFile `
    --log-level $LogLevel.ToUpper()
 if ($LASTEXITCODE -ne 0) {
    Write-Host ""
    Write-Host "[ERROR] Evaluation failed. Check log: $logFile" -ForegroundColor Red
    Read-Host "Press Enter to exit"
    exit 1
 }
 Write-Host ""
 Write-Host "============================================================" -ForegroundColor Green
 Write-Host "  Evaluation complete!" -ForegroundColor Green
 Write-Host "  Log saved to: $logFile" -ForegroundColor Green
 Write-Host "  Open the web console to view results: start.bat" -ForegroundColor Cyan
 Write-Host "============================================================" -ForegroundColor Green
 Write-Host ""
 Read-Host "Press Enter to exit"