feat(scripts): add run_eval.bat / run_eval.ps1 evaluation launcher scripts

Both scripts support: - Shortcut args: online (default), offline, or any custom .yaml path - Second arg: log level (DEBUG/INFO/WARNING/ERROR), default INFO - Auto-timestamped log file saved to logs\eval_<date>_<time>.log - Sets PYTHONIOENCODING=utf-8 and PYTHONPATH=. automatically - Friendly error/success banners with log file path Usage: run_eval.bat # online eval run_eval.bat offline DEBUG # offline eval with DEBUG logs .\run_eval.ps1 online DEBUG # PowerShell equivalent Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-16 11:16:53 +08:00
parent 629304aa6d
commit 4173a40d93
2 changed files with 203 additions and 0 deletions
--- a/run_eval.ps1
+++ b/run_eval.ps1
@@ -0,0 +1,96 @@
+# run_eval.ps1 - Siemens RAGAS Evaluation Runner
+# Usage:
+#   .\run_eval.ps1                         # online eval (default)
+#   .\run_eval.ps1 offline                 # offline smoke
+#   .\run_eval.ps1 path\to\scenario.yaml   # custom scenario
+#   .\run_eval.ps1 online DEBUG            # second arg = log level (DEBUG/INFO/WARNING)
+# Or: powershell -ExecutionPolicy Bypass -File run_eval.ps1 [scenario] [log-level]
+
+param(
+    [string]$Scenario = "online",
+    [string]$LogLevel = "INFO"
+)
+
+$ErrorActionPreference = "Stop"
+Set-Location $PSScriptRoot
+
+Write-Host ""
+Write-Host "============================================================" -ForegroundColor Cyan
+Write-Host "  Siemens RAGAS  -  Evaluation Runner" -ForegroundColor Cyan
+Write-Host "============================================================" -ForegroundColor Cyan
+Write-Host ""
+
+# ----------------------------------------------------------------
+# 1. Resolve scenario path
+# ----------------------------------------------------------------
+$scenarioMap = @{
+    "online"  = "scenarios\online\siemens-pdf-question-bank-online.yaml"
+    "offline" = "scenarios\offline\siemens-pdf-offline-smoke.yaml"
+}
+if ($scenarioMap.ContainsKey($Scenario.ToLower())) {
+    $Scenario = $scenarioMap[$Scenario.ToLower()]
+}
+if (-not (Test-Path $Scenario)) {
+    Write-Host "[ERROR] Scenario file not found: $Scenario" -ForegroundColor Red
+    Write-Host ""
+    Write-Host "Usage examples:"
+    Write-Host "  .\run_eval.ps1                    - online eval (default)"
+    Write-Host "  .\run_eval.ps1 offline            - offline smoke"
+    Write-Host "  .\run_eval.ps1 path\to\file.yaml  - custom scenario"
+    Read-Host "Press Enter to exit"
+    exit 1
+}
+Write-Host "[OK] Scenario : $Scenario" -ForegroundColor Green
+
+# ----------------------------------------------------------------
+# 2. Validate log level
+# ----------------------------------------------------------------
+$validLevels = @("DEBUG", "INFO", "WARNING", "ERROR")
+if ($validLevels -notcontains $LogLevel.ToUpper()) {
+    Write-Host "[WARN] Unknown log level '$LogLevel', defaulting to INFO" -ForegroundColor Yellow
+    $LogLevel = "INFO"
+}
+Write-Host "[OK] Log level: $LogLevel" -ForegroundColor Green
+
+# ----------------------------------------------------------------
+# 3. Create logs dir with timestamped filename
+# ----------------------------------------------------------------
+if (-not (Test-Path "logs")) { New-Item -ItemType Directory "logs" | Out-Null }
+$timestamp = Get-Date -Format "yyyy-MM-dd_HHmmss"
+$logFile = "logs\eval_$timestamp.log"
+Write-Host "[OK] Log file : $logFile" -ForegroundColor Green
+
+Write-Host ""
+Write-Host "============================================================" -ForegroundColor Cyan
+Write-Host "  Starting evaluation..." -ForegroundColor Cyan
+Write-Host "  Logs also written to: $logFile" -ForegroundColor Cyan
+Write-Host "  Press Ctrl+C to abort" -ForegroundColor Yellow
+Write-Host "============================================================" -ForegroundColor Cyan
+Write-Host ""
+
+# ----------------------------------------------------------------
+# 4. Run evaluation
+# ----------------------------------------------------------------
+$env:PYTHONIOENCODING = "utf-8"
+$env:PYTHONPATH = "."
+
+& python main.py `
+    --scenario $Scenario `
+    --log-file $logFile `
+    --log-level $LogLevel.ToUpper()
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host ""
+    Write-Host "[ERROR] Evaluation failed. Check log: $logFile" -ForegroundColor Red
+    Read-Host "Press Enter to exit"
+    exit 1
+}
+
+Write-Host ""
+Write-Host "============================================================" -ForegroundColor Green
+Write-Host "  Evaluation complete!" -ForegroundColor Green
+Write-Host "  Log saved to: $logFile" -ForegroundColor Green
+Write-Host "  Open the web console to view results: start.bat" -ForegroundColor Cyan
+Write-Host "============================================================" -ForegroundColor Green
+Write-Host ""
+Read-Host "Press Enter to exit"