TERES_fastapi_backend/ocr/config.py

#
#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
"""
OCR 模块配置文件
"""
import os
import logging

# 并行设备数量（GPU数量，0表示使用CPU）
PARALLEL_DEVICES = 0
try:
    import torch.cuda
    PARALLEL_DEVICES = torch.cuda.device_count()
    logging.info(f"found {PARALLEL_DEVICES} gpus")
except Exception:
    logging.info("can't import package 'torch', using CPU mode")

# 模型目录
# 可以从环境变量获取，或使用默认路径
MODEL_DIR = os.getenv("OCR_MODEL_DIR", None)
if MODEL_DIR is None:
    # 默认模型目录：当前项目根目录下的 models/deepdoc 目录
    # 如果不存在，将在 OCR 类初始化时尝试从 HuggingFace 下载
    _base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    MODEL_DIR = os.path.join(_base_dir, "models", "deepdoc")
    # 如果目录不存在，设置为 None，让 OCR 类处理下载逻辑
    if not os.path.exists(MODEL_DIR):
        MODEL_DIR = None