Files
siemens_ragas/run_eval.sh

148 lines
5.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# run_eval.sh — Siemens RAGAS 评估运行脚本Linux
# 对应 Windows 的 run_eval.ps1
#
# 用法:
# bash run_eval.sh # online 评估(默认)
# bash run_eval.sh offline # offline 冒烟测试
# bash run_eval.sh scenarios/xxx.yaml # 自定义场景
# bash run_eval.sh online DEBUG # 指定日志级别
# bash run_eval.sh build scenarios/siemens_build/siemens-pdf-build.yaml
# # 题库生成
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# ── 颜色输出 ──────────────────────────────────────────────────────
if [ -t 1 ]; then
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; CYAN='\033[0;36m'; NC='\033[0m'
else
GREEN=''; YELLOW=''; RED=''; CYAN=''; NC=''
fi
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
err() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
# ── 参数解析 ──────────────────────────────────────────────────────
SCENARIO="${1:-online}"
LOG_LEVEL="${2:-INFO}"
# 场景别名映射
declare -A SCENARIO_MAP=(
["online"]="scenarios/online/siemens-pdf-question-bank-online.yaml"
["offline"]="scenarios/offline/siemens-pdf-offline-smoke.yaml"
)
# 检测是否是 dataset build 模式
BUILD_MODE=false
BUILD_CONFIG=""
if [ "$SCENARIO" = "build" ]; then
BUILD_MODE=true
BUILD_CONFIG="${2:-scenarios/siemens_build/siemens-pdf-build.yaml}"
LOG_LEVEL="${3:-INFO}"
elif [ -v "SCENARIO_MAP[$SCENARIO]" ]; then
SCENARIO="${SCENARIO_MAP[$SCENARIO]}"
fi
# ── 验证 ──────────────────────────────────────────────────────────
echo ""
echo -e "${CYAN}============================================================${NC}"
echo -e "${CYAN} Siemens RAGAS — 评估运行${NC}"
echo -e "${CYAN}============================================================${NC}"
echo ""
# 检查虚拟环境
if [ ! -f ".venv/bin/python" ]; then
err "未找到 .venv请先执行部署bash deploy.sh"
exit 1
fi
PYTHON=".venv/bin/python"
# Build 模式校验
if [ "$BUILD_MODE" = true ]; then
if [ ! -f "$BUILD_CONFIG" ]; then
err "题库生成配置文件不存在:$BUILD_CONFIG"
echo ""
echo "可用配置:"
find scenarios/ -name "*.yaml" 2>/dev/null | head -20 | sed 's/^/ /'
exit 1
fi
ok "模式 : 题库生成 (dataset build)"
ok "配置文件 : $BUILD_CONFIG"
else
# 场景文件校验
if [ ! -f "$SCENARIO" ]; then
err "场景文件不存在:$SCENARIO"
echo ""
echo "用法示例:"
echo " bash run_eval.sh # online 评估"
echo " bash run_eval.sh offline # offline 冒烟"
echo " bash run_eval.sh scenarios/xxx.yaml # 自定义场景"
echo " bash run_eval.sh build [config.yaml] # 题库生成"
exit 1
fi
ok "场景文件 : $SCENARIO"
fi
# 日志级别校验
LOG_LEVEL_UPPER="${LOG_LEVEL^^}"
case "$LOG_LEVEL_UPPER" in
DEBUG|INFO|WARNING|ERROR) ;;
*)
warn "未知日志级别 '$LOG_LEVEL',使用默认值 INFO"
LOG_LEVEL_UPPER="INFO"
;;
esac
ok "日志级别 : $LOG_LEVEL_UPPER"
# 创建日志目录
mkdir -p logs
TIMESTAMP=$(date +%Y-%m-%d_%H%M%S)
LOG_FILE="logs/eval_${TIMESTAMP}.log"
ok "日志文件 : $LOG_FILE"
echo ""
echo -e "${CYAN}============================================================${NC}"
echo -e "${CYAN} 开始运行,按 Ctrl+C 中止${NC}"
echo -e "${CYAN}============================================================${NC}"
echo ""
# ── 运行 ──────────────────────────────────────────────────────────
export PYTHONIOENCODING="utf-8"
export PYTHONPATH="."
if [ "$BUILD_MODE" = true ]; then
"$PYTHON" main.py \
--dataset-build-config "$BUILD_CONFIG"
else
"$PYTHON" main.py \
--scenario "$SCENARIO" \
--log-file "$LOG_FILE" \
--log-level "$LOG_LEVEL_UPPER"
fi
EXIT_CODE=$?
echo ""
if [ $EXIT_CODE -eq 0 ]; then
echo -e "${GREEN}============================================================${NC}"
echo -e "${GREEN} 运行完成!${NC}"
if [ "$BUILD_MODE" = false ]; then
echo -e "${GREEN} 日志已保存到:$LOG_FILE${NC}"
fi
echo -e "${CYAN} 在 Web 控制台查看报告bash start.sh${NC}"
echo -e "${GREEN}============================================================${NC}"
else
err "运行失败exit code=$EXIT_CODE"
if [ "$BUILD_MODE" = false ]; then
err "查看日志cat $LOG_FILE"
fi
exit $EXIT_CODE
fi
echo ""