148 lines
5.1 KiB
Bash
148 lines
5.1 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
# run_eval.sh — Siemens RAGAS 评估运行脚本(Linux)
|
|||
|
|
# 对应 Windows 的 run_eval.ps1
|
|||
|
|
#
|
|||
|
|
# 用法:
|
|||
|
|
# bash run_eval.sh # online 评估(默认)
|
|||
|
|
# bash run_eval.sh offline # offline 冒烟测试
|
|||
|
|
# bash run_eval.sh scenarios/xxx.yaml # 自定义场景
|
|||
|
|
# bash run_eval.sh online DEBUG # 指定日志级别
|
|||
|
|
# bash run_eval.sh build scenarios/siemens_build/siemens-pdf-build.yaml
|
|||
|
|
# # 题库生成
|
|||
|
|
|
|||
|
|
set -euo pipefail
|
|||
|
|
|
|||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||
|
|
cd "$SCRIPT_DIR"
|
|||
|
|
|
|||
|
|
# ── 颜色输出 ──────────────────────────────────────────────────────
|
|||
|
|
if [ -t 1 ]; then
|
|||
|
|
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; CYAN='\033[0;36m'; NC='\033[0m'
|
|||
|
|
else
|
|||
|
|
GREEN=''; YELLOW=''; RED=''; CYAN=''; NC=''
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
|
|||
|
|
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
|||
|
|
err() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
|
|||
|
|
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
|||
|
|
|
|||
|
|
# ── 参数解析 ──────────────────────────────────────────────────────
|
|||
|
|
SCENARIO="${1:-online}"
|
|||
|
|
LOG_LEVEL="${2:-INFO}"
|
|||
|
|
|
|||
|
|
# 场景别名映射
|
|||
|
|
declare -A SCENARIO_MAP=(
|
|||
|
|
["online"]="scenarios/online/siemens-pdf-question-bank-online.yaml"
|
|||
|
|
["offline"]="scenarios/offline/siemens-pdf-offline-smoke.yaml"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 检测是否是 dataset build 模式
|
|||
|
|
BUILD_MODE=false
|
|||
|
|
BUILD_CONFIG=""
|
|||
|
|
if [ "$SCENARIO" = "build" ]; then
|
|||
|
|
BUILD_MODE=true
|
|||
|
|
BUILD_CONFIG="${2:-scenarios/siemens_build/siemens-pdf-build.yaml}"
|
|||
|
|
LOG_LEVEL="${3:-INFO}"
|
|||
|
|
elif [ -v "SCENARIO_MAP[$SCENARIO]" ]; then
|
|||
|
|
SCENARIO="${SCENARIO_MAP[$SCENARIO]}"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# ── 验证 ──────────────────────────────────────────────────────────
|
|||
|
|
echo ""
|
|||
|
|
echo -e "${CYAN}============================================================${NC}"
|
|||
|
|
echo -e "${CYAN} Siemens RAGAS — 评估运行${NC}"
|
|||
|
|
echo -e "${CYAN}============================================================${NC}"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# 检查虚拟环境
|
|||
|
|
if [ ! -f ".venv/bin/python" ]; then
|
|||
|
|
err "未找到 .venv,请先执行部署:bash deploy.sh"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
PYTHON=".venv/bin/python"
|
|||
|
|
|
|||
|
|
# Build 模式校验
|
|||
|
|
if [ "$BUILD_MODE" = true ]; then
|
|||
|
|
if [ ! -f "$BUILD_CONFIG" ]; then
|
|||
|
|
err "题库生成配置文件不存在:$BUILD_CONFIG"
|
|||
|
|
echo ""
|
|||
|
|
echo "可用配置:"
|
|||
|
|
find scenarios/ -name "*.yaml" 2>/dev/null | head -20 | sed 's/^/ /'
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
ok "模式 : 题库生成 (dataset build)"
|
|||
|
|
ok "配置文件 : $BUILD_CONFIG"
|
|||
|
|
else
|
|||
|
|
# 场景文件校验
|
|||
|
|
if [ ! -f "$SCENARIO" ]; then
|
|||
|
|
err "场景文件不存在:$SCENARIO"
|
|||
|
|
echo ""
|
|||
|
|
echo "用法示例:"
|
|||
|
|
echo " bash run_eval.sh # online 评估"
|
|||
|
|
echo " bash run_eval.sh offline # offline 冒烟"
|
|||
|
|
echo " bash run_eval.sh scenarios/xxx.yaml # 自定义场景"
|
|||
|
|
echo " bash run_eval.sh build [config.yaml] # 题库生成"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
ok "场景文件 : $SCENARIO"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 日志级别校验
|
|||
|
|
LOG_LEVEL_UPPER="${LOG_LEVEL^^}"
|
|||
|
|
case "$LOG_LEVEL_UPPER" in
|
|||
|
|
DEBUG|INFO|WARNING|ERROR) ;;
|
|||
|
|
*)
|
|||
|
|
warn "未知日志级别 '$LOG_LEVEL',使用默认值 INFO"
|
|||
|
|
LOG_LEVEL_UPPER="INFO"
|
|||
|
|
;;
|
|||
|
|
esac
|
|||
|
|
ok "日志级别 : $LOG_LEVEL_UPPER"
|
|||
|
|
|
|||
|
|
# 创建日志目录
|
|||
|
|
mkdir -p logs
|
|||
|
|
TIMESTAMP=$(date +%Y-%m-%d_%H%M%S)
|
|||
|
|
LOG_FILE="logs/eval_${TIMESTAMP}.log"
|
|||
|
|
ok "日志文件 : $LOG_FILE"
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo -e "${CYAN}============================================================${NC}"
|
|||
|
|
echo -e "${CYAN} 开始运行,按 Ctrl+C 中止${NC}"
|
|||
|
|
echo -e "${CYAN}============================================================${NC}"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# ── 运行 ──────────────────────────────────────────────────────────
|
|||
|
|
export PYTHONIOENCODING="utf-8"
|
|||
|
|
export PYTHONPATH="."
|
|||
|
|
|
|||
|
|
if [ "$BUILD_MODE" = true ]; then
|
|||
|
|
"$PYTHON" main.py \
|
|||
|
|
--dataset-build-config "$BUILD_CONFIG"
|
|||
|
|
else
|
|||
|
|
"$PYTHON" main.py \
|
|||
|
|
--scenario "$SCENARIO" \
|
|||
|
|
--log-file "$LOG_FILE" \
|
|||
|
|
--log-level "$LOG_LEVEL_UPPER"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
EXIT_CODE=$?
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
if [ $EXIT_CODE -eq 0 ]; then
|
|||
|
|
echo -e "${GREEN}============================================================${NC}"
|
|||
|
|
echo -e "${GREEN} 运行完成!${NC}"
|
|||
|
|
if [ "$BUILD_MODE" = false ]; then
|
|||
|
|
echo -e "${GREEN} 日志已保存到:$LOG_FILE${NC}"
|
|||
|
|
fi
|
|||
|
|
echo -e "${CYAN} 在 Web 控制台查看报告:bash start.sh${NC}"
|
|||
|
|
echo -e "${GREEN}============================================================${NC}"
|
|||
|
|
else
|
|||
|
|
err "运行失败(exit code=$EXIT_CODE)"
|
|||
|
|
if [ "$BUILD_MODE" = false ]; then
|
|||
|
|
err "查看日志:cat $LOG_FILE"
|
|||
|
|
fi
|
|||
|
|
exit $EXIT_CODE
|
|||
|
|
fi
|
|||
|
|
echo ""
|