Files
AIRegulation-Deployment/scripts/download_models.sh

92 lines
3.6 KiB
Bash
Raw Permalink Normal View History

2026-04-23 09:58:47 +08:00
#!/usr/bin/env bash
# ══════════════════════════════════════════════════
# download_models.sh
# 预下载 AI 模型到 ./models 目录(加速容器启动)
# 支持 HuggingFace 镜像加速(国内网络)
# 用法bash scripts/download_models.sh
# ══════════════════════════════════════════════════
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
BLUE='\033[0;34m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
MODELS_DIR="$PROJECT_DIR/models"
mkdir -p "$MODELS_DIR"
# 设置镜像加速
export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}"
export HF_HOME="$MODELS_DIR"
info "HuggingFace 镜像:$HF_ENDPOINT"
info "模型保存路径:$MODELS_DIR"
echo ""
# ── 方法1通过 huggingface_hub 下载 ────────────
download_hf() {
local repo=$1; local local_name=$2
info "下载 $repo..."
if python3 -c "
import os
os.environ['HF_ENDPOINT'] = '${HF_ENDPOINT}'
os.environ['HF_HOME'] = '${MODELS_DIR}'
from huggingface_hub import snapshot_download
snapshot_download(repo_id='$repo', cache_dir='${MODELS_DIR}')
print('下载完成')
" 2>&1; then
ok "$repo 下载成功"
else
warn "$repo HuggingFace 下载失败,尝试 ModelScope..."
download_modelscope "$repo" "$local_name"
fi
}
# ── 方法2通过 ModelScope 下载(备用)──────────
download_modelscope() {
local hf_name=$1
local ms_name=${2:-$1}
python3 -c "
try:
from modelscope import snapshot_download
snapshot_download(model_id='$ms_name', cache_dir='${MODELS_DIR}/modelscope')
print('ModelScope 下载完成')
except ImportError:
print('ModelScope 未安装,跳过')
except Exception as e:
print(f'ModelScope 下载失败: {e}')
" 2>&1 || warn "ModelScope 下载也失败,模型将在容器启动时自动下载"
}
# ── 检查 Python 环境 ────────────────────────────
if ! python3 -c "import huggingface_hub" 2>/dev/null; then
warn "未安装 huggingface_hub尝试安装..."
pip3 install -q huggingface_hub modelscope 2>/dev/null || \
warn "安装失败,模型将在容器首次启动时下载"
fi
# ── 下载模型列表 ────────────────────────────────
info "=== 下载 BGE-M3 嵌入模型(约 2.5GB==="
download_hf "BAAI/bge-m3" "BAAI/bge-m3"
echo ""
info "=== 下载 BGE-Reranker 精排模型(约 1.1GB==="
download_hf "BAAI/bge-reranker-v2-m3" "BAAI/bge-reranker-v2-m3"
echo ""
# MinerU 模型通过容器内脚本下载(依赖 magic-pdf 配置)
info "=== MinerU 模型说明 ==="
warn "MinerU 模型(约 2GB将在 mcp-server 容器首次启动时自动下载"
warn "如需预下载,请在 mcp-server 容器内运行mineru-models-download"
echo ""
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo -e "${GREEN} 模型下载完成!${NC}"
echo -e "${GREEN}══════════════════════════════════════════${NC}"
echo ""
echo "已下载到:$MODELS_DIR"
du -sh "$MODELS_DIR" 2>/dev/null || true