diff --git a/Dockerfile b/Dockerfile index 4279875..5961286 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,143 +1,12 @@ -# base stage -FROM ubuntu:22.04 AS base -USER root -SHELL ["/bin/bash", "-c"] +# Use base image built from Dockerfile.base +# Build base image first: docker build -f Dockerfile.base -t ragflow-base:latest . +ARG BASE_IMAGE=ragflow-base:latest +FROM ${BASE_IMAGE} AS base ARG NEED_MIRROR=0 ARG LIGHTEN=0 ENV LIGHTEN=${LIGHTEN} -WORKDIR /ragflow - -# Copy models downloaded via download_deps.py -RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ - cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \ - tar --exclude='.*' -cf - \ - /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ - /huggingface.co/InfiniFlow/deepdoc \ - | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ - if [ "$LIGHTEN" != "1" ]; then \ - (tar -cf - \ - /huggingface.co/BAAI/bge-large-zh-v1.5 \ - /huggingface.co/maidalun1020/bce-embedding-base_v1 \ - | tar -xf - --strip-components=2 -C /root/.ragflow) \ - fi - -# https://github.com/chrismattmann/tika-python -# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ - cp -r /deps/nltk_data /root/ && \ - cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ - cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 - -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" -ENV DEBIAN_FRONTEND=noninteractive - -# Setup apt -# Python package and implicit dependencies: -# opencv-python: libglib2.0-0 libglx-mesa0 libgl1 -# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb -# python-pptx: default-jdk tika-server-standard-3.0.0.jar -# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 -# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - if [ "$NEED_MIRROR" == "1" ]; then \ - sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ - sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ - fi; \ - rm -f /etc/apt/apt.conf.d/docker-clean && \ - echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ - chmod 1777 /tmp && \ - apt update && \ - apt --no-install-recommends install -y ca-certificates && \ - apt update && \ - apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \ - apt install -y pkg-config libicu-dev libgdiplus && \ - apt install -y default-jdk && \ - apt install -y libatk-bridge2.0-0 && \ - apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ - apt install -y libjemalloc-dev && \ - apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ - apt install -y ghostscript - -RUN if [ "$NEED_MIRROR" == "1" ]; then \ - pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip3 config set global.trusted-host mirrors.aliyun.com; \ - mkdir -p /etc/uv && \ - echo "[[index]]" > /etc/uv/uv.toml && \ - echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \ - echo "default = true" >> /etc/uv/uv.toml; \ - fi; \ - pipx install uv - -ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 -ENV PATH=/root/.local/bin:$PATH - -# nodejs 12.22 on Ubuntu 22.04 is too old -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ - apt purge -y nodejs npm cargo && \ - apt autoremove -y && \ - apt update && \ - apt install -y nodejs - -# A modern version of cargo is needed for the latest version of the Rust compiler. -RUN apt update && apt install -y curl build-essential \ - && if [ "$NEED_MIRROR" == "1" ]; then \ - # Use TUNA mirrors for rustup/rust dist files - export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ - export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ - echo "Using TUNA mirrors for Rustup."; \ - fi; \ - # Force curl to use HTTP/1.1 - curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ - && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc - -ENV PATH="/root/.cargo/bin:${PATH}" - -RUN cargo --version && rustc --version - -# Add msssql ODBC driver -# macOS ARM64 environment, install msodbcsql18. -# general x86_64 environment, install msodbcsql17. -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ - curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ - apt update && \ - arch="$(uname -m)"; \ - if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \ - # ARM64 (macOS/Apple Silicon or Linux aarch64) - ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \ - else \ - # x86_64 or others - ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \ - fi || \ - { echo "Failed to install ODBC driver"; exit 1; } - - - -# Add dependencies of selenium -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ - unzip /chrome-linux64.zip && \ - mv chrome-linux64 /opt/chrome && \ - ln -s /opt/chrome/chrome /usr/local/bin/ -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ - unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ - mv chromedriver /usr/local/bin/ && \ - rm -f /usr/bin/google-chrome - -# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 -# aspose-slides on linux/arm64 is unavailable -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ - if [ "$(uname -m)" = "x86_64" ]; then \ - dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ - elif [ "$(uname -m)" = "aarch64" ]; then \ - dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ - fi - - # builder stage FROM base AS builder USER root diff --git a/Dockerfile.base b/Dockerfile.base new file mode 100644 index 0000000..819d96c --- /dev/null +++ b/Dockerfile.base @@ -0,0 +1,139 @@ +# base stage +FROM ubuntu:22.04 AS base +USER root +SHELL ["/bin/bash", "-c"] + +ARG NEED_MIRROR=0 +ARG LIGHTEN=0 +ENV LIGHTEN=${LIGHTEN} + +WORKDIR /ragflow + +# Copy models downloaded via download_deps.py +RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \ + tar --exclude='.*' -cf - \ + /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ + /huggingface.co/InfiniFlow/deepdoc \ + | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + if [ "$LIGHTEN" != "1" ]; then \ + (tar -cf - \ + /huggingface.co/BAAI/bge-large-zh-v1.5 \ + /huggingface.co/maidalun1020/bce-embedding-base_v1 \ + | tar -xf - --strip-components=2 -C /root/.ragflow) \ + fi + +# https://github.com/chrismattmann/tika-python +# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + cp -r /deps/nltk_data /root/ && \ + cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ + cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 + +ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" +ENV DEBIAN_FRONTEND=noninteractive + +# Setup apt +# Python package and implicit dependencies: +# opencv-python: libglib2.0-0 libglx-mesa0 libgl1 +# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb +# python-pptx: default-jdk tika-server-standard-3.0.0.jar +# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 +# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + if [ "$NEED_MIRROR" == "1" ]; then \ + sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + fi; \ + rm -f /etc/apt/apt.conf.d/docker-clean && \ + echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ + chmod 1777 /tmp && \ + apt update && \ + apt --no-install-recommends install -y ca-certificates && \ + apt update && \ + apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \ + apt install -y pkg-config libicu-dev libgdiplus && \ + apt install -y default-jdk && \ + apt install -y libatk-bridge2.0-0 && \ + apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ + apt install -y libjemalloc-dev && \ + apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ + apt install -y ghostscript + +RUN if [ "$NEED_MIRROR" == "1" ]; then \ + pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip3 config set global.trusted-host mirrors.aliyun.com; \ + mkdir -p /etc/uv && \ + echo "[[index]]" > /etc/uv/uv.toml && \ + echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \ + echo "default = true" >> /etc/uv/uv.toml; \ + fi; \ + pipx install uv + +ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 +ENV PATH=/root/.local/bin:$PATH + +# nodejs 12.22 on Ubuntu 22.04 is too old +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt purge -y nodejs npm cargo && \ + apt autoremove -y && \ + apt update && \ + apt install -y nodejs + +# A modern version of cargo is needed for the latest version of the Rust compiler. +RUN apt update && apt install -y curl build-essential \ + && if [ "$NEED_MIRROR" == "1" ]; then \ + # Use TUNA mirrors for rustup/rust dist files + export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ + export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ + echo "Using TUNA mirrors for Rustup."; \ + fi; \ + # Force curl to use HTTP/1.1 + curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ + && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc + +ENV PATH="/root/.cargo/bin:${PATH}" + +RUN cargo --version && rustc --version + +# Add msssql ODBC driver +# macOS ARM64 environment, install msodbcsql18. +# general x86_64 environment, install msodbcsql17. +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ + curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ + apt update && \ + arch="$(uname -m)"; \ + if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \ + # ARM64 (macOS/Apple Silicon or Linux aarch64) + ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \ + else \ + # x86_64 or others + ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \ + fi || \ + { echo "Failed to install ODBC driver"; exit 1; } + + + +# Add dependencies of selenium +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ + unzip /chrome-linux64.zip && \ + mv chrome-linux64 /opt/chrome && \ + ln -s /opt/chrome/chrome /usr/local/bin/ +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ + unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ + mv chromedriver /usr/local/bin/ && \ + rm -f /usr/bin/google-chrome + +# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 +# aspose-slides on linux/arm64 is unavailable +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + if [ "$(uname -m)" = "x86_64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ + elif [ "$(uname -m)" = "aarch64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ + fi + diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml index 1703257..d7aa601 100644 --- a/docker/docker-compose-base.yml +++ b/docker/docker-compose-base.yml @@ -1,39 +1,4 @@ services: - es01: - container_name: ragflow-es-01 - profiles: - - elasticsearch - image: elasticsearch:${STACK_VERSION} - volumes: - - esdata01:/usr/share/elasticsearch/data - ports: - - ${ES_PORT}:9200 - env_file: .env - environment: - - node.name=es01 - - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} - - bootstrap.memory_lock=false - - discovery.type=single-node - - xpack.security.enabled=true - - xpack.security.http.ssl.enabled=false - - xpack.security.transport.ssl.enabled=false - - cluster.routing.allocation.disk.watermark.low=5gb - - cluster.routing.allocation.disk.watermark.high=3gb - - cluster.routing.allocation.disk.watermark.flood_stage=2gb - - TZ=${TIMEZONE} - mem_limit: ${MEM_LIMIT} - ulimits: - memlock: - soft: -1 - hard: -1 - healthcheck: - test: ["CMD-SHELL", "curl http://localhost:9200"] - interval: 10s - timeout: 10s - retries: 120 - networks: - - ragflow - restart: on-failure opensearch01: container_name: ragflow-opensearch-01 @@ -73,34 +38,27 @@ services: - ragflow restart: on-failure - infinity: - container_name: ragflow-infinity - profiles: - - infinity - image: infiniflow/infinity:v0.6.1 - volumes: - - infinity_data:/var/infinity - - ./infinity_conf.toml:/infinity_conf.toml - command: ["-f", "/infinity_conf.toml"] - ports: - - ${INFINITY_THRIFT_PORT}:23817 - - ${INFINITY_HTTP_PORT}:23820 - - ${INFINITY_PSQL_PORT}:5432 + + postgres: + image: postgres:15 + container_name: ragflow-postgres env_file: .env environment: + - POSTGRES_DB=${POSTGRES_DBNAME} + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - TZ=${TIMEZONE} - mem_limit: ${MEM_LIMIT} - ulimits: - nofile: - soft: 500000 - hard: 500000 + ports: + - ${POSTGRES_PORT-5440}:5432 + volumes: + - postgres_data:/var/lib/postgresql/data networks: - ragflow healthcheck: - test: ["CMD", "curl", "http://localhost:23820/admin/node/current"] + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DBNAME}"] interval: 10s - timeout: 10s - retries: 120 + timeout: 5s + retries: 5 restart: on-failure sandbox-executor-manager: @@ -133,36 +91,6 @@ services: retries: 5 restart: on-failure - mysql: - # mysql:5.7 linux/arm64 image is unavailable. - image: mysql:8.0.39 - container_name: ragflow-mysql - env_file: .env - environment: - - MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD} - - TZ=${TIMEZONE} - command: - --max_connections=1000 - --character-set-server=utf8mb4 - --collation-server=utf8mb4_unicode_ci - --default-authentication-plugin=mysql_native_password - --tls_version="TLSv1.2,TLSv1.3" - --init-file /data/application/init.sql - --binlog_expire_logs_seconds=604800 - ports: - - ${MYSQL_PORT}:3306 - volumes: - - mysql_data:/var/lib/mysql - - ./init.sql:/data/application/init.sql - networks: - - ragflow - healthcheck: - test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"] - interval: 10s - timeout: 10s - retries: 3 - restart: on-failure - minio: image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z container_name: ragflow-minio @@ -207,47 +135,24 @@ services: start_period: 10s - kibana: - container_name: ragflow-kibana - profiles: - - kibana - image: kibana:${STACK_VERSION} - ports: - - ${KIBANA_PORT-5601}:5601 - env_file: .env - environment: - - TZ=${TIMEZONE} - volumes: - - kibana_data:/usr/share/kibana/data - depends_on: - es01: - condition: service_started - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5601/api/status"] - interval: 10s - timeout: 10s - retries: 120 - networks: - - ragflow - restart: on-failure - volumes: esdata01: - driver: local + name: ragflow_esdata01 osdata01: - driver: local + name: ragflow_osdata01 infinity_data: - driver: local + name: ragflow_infinity_data mysql_data: - driver: local + name: ragflow_mysql_data minio_data: - driver: local + name: ragflow_minio_data redis_data: - driver: local - kibana_data: - driver: local + name: ragflow_redis_data + postgres_data: + name: ragflow_postgres_data networks: ragflow: + name: ragflow driver: bridge diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 960f850..b323563 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -4,7 +4,7 @@ include: services: ragflow: depends_on: - mysql: + postgres: condition: service_healthy image: ${RAGFLOW_IMAGE} # Example configuration to set up an MCP server: diff --git a/docker/service_conf.yaml.template b/docker/service_conf.yaml.template index b5121d6..54dc60c 100644 --- a/docker/service_conf.yaml.template +++ b/docker/service_conf.yaml.template @@ -32,14 +32,14 @@ redis: db: 1 password: '${REDIS_PASSWORD:-infini_rag_flow}' host: '${REDIS_HOST:-redis}:6379' -# postgres: -# name: '${POSTGRES_DBNAME:-rag_flow}' -# user: '${POSTGRES_USER:-rag_flow}' -# password: '${POSTGRES_PASSWORD:-infini_rag_flow}' -# host: '${POSTGRES_HOST:-postgres}' -# port: 5432 -# max_connections: 100 -# stale_timeout: 30 +postgres: + name: '${POSTGRES_DBNAME:-rag_flow}' + user: '${POSTGRES_USER:-rag_flow}' + password: '${POSTGRES_PASSWORD:-infini_rag_flow}' + host: '${POSTGRES_HOST:-postgres}' + port: 5432 + max_connections: 100 + stale_timeout: 30 # s3: # access_key: 'access_key' # secret_key: 'secret_key'