#!/usr/bin/env bash # ══════════════════════════════════════════════════ # download_models.sh # 预下载 AI 模型到 ./models 目录(加速容器启动) # 支持 HuggingFace 镜像加速(国内网络) # 用法:bash scripts/download_models.sh # ══════════════════════════════════════════════════ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" cd "$PROJECT_DIR" BLUE='\033[0;34m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' info() { echo -e "${BLUE}[INFO]${NC} $*"; } ok() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } MODELS_DIR="$PROJECT_DIR/models" mkdir -p "$MODELS_DIR" # 设置镜像加速 export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}" export HF_HOME="$MODELS_DIR" info "HuggingFace 镜像:$HF_ENDPOINT" info "模型保存路径:$MODELS_DIR" echo "" # ── 方法1:通过 huggingface_hub 下载 ──────────── download_hf() { local repo=$1; local local_name=$2 info "下载 $repo..." if python3 -c " import os os.environ['HF_ENDPOINT'] = '${HF_ENDPOINT}' os.environ['HF_HOME'] = '${MODELS_DIR}' from huggingface_hub import snapshot_download snapshot_download(repo_id='$repo', cache_dir='${MODELS_DIR}') print('下载完成') " 2>&1; then ok "$repo 下载成功" else warn "$repo HuggingFace 下载失败,尝试 ModelScope..." download_modelscope "$repo" "$local_name" fi } # ── 方法2:通过 ModelScope 下载(备用)────────── download_modelscope() { local hf_name=$1 local ms_name=${2:-$1} python3 -c " try: from modelscope import snapshot_download snapshot_download(model_id='$ms_name', cache_dir='${MODELS_DIR}/modelscope') print('ModelScope 下载完成') except ImportError: print('ModelScope 未安装,跳过') except Exception as e: print(f'ModelScope 下载失败: {e}') " 2>&1 || warn "ModelScope 下载也失败,模型将在容器启动时自动下载" } # ── 检查 Python 环境 ──────────────────────────── if ! python3 -c "import huggingface_hub" 2>/dev/null; then warn "未安装 huggingface_hub,尝试安装..." pip3 install -q huggingface_hub modelscope 2>/dev/null || \ warn "安装失败,模型将在容器首次启动时下载" fi # ── 下载模型列表 ──────────────────────────────── info "=== 下载 BGE-M3 嵌入模型(约 2.5GB)===" download_hf "BAAI/bge-m3" "BAAI/bge-m3" echo "" info "=== 下载 BGE-Reranker 精排模型(约 1.1GB)===" download_hf "BAAI/bge-reranker-v2-m3" "BAAI/bge-reranker-v2-m3" echo "" # MinerU 模型通过容器内脚本下载(依赖 magic-pdf 配置) info "=== MinerU 模型说明 ===" warn "MinerU 模型(约 2GB)将在 mcp-server 容器首次启动时自动下载" warn "如需预下载,请在 mcp-server 容器内运行:mineru-models-download" echo "" echo -e "${GREEN}══════════════════════════════════════════${NC}" echo -e "${GREEN} 模型下载完成!${NC}" echo -e "${GREEN}══════════════════════════════════════════${NC}" echo "" echo "已下载到:$MODELS_DIR" du -sh "$MODELS_DIR" 2>/dev/null || true