From d57b5d76ae3e375b4134034b6263625133e774b1 Mon Sep 17 00:00:00 2001 From: dangzerong <429714019@qq.com> Date: Tue, 4 Nov 2025 16:06:36 +0800 Subject: [PATCH] v0.21.1-fastapi --- .gitignore | 2 +- Dockerfile | 204 +- Dockerfile.base | 189 - README.md | 14 +- README_id.md | 14 +- README_ja.md | 14 +- README_ko.md | 14 +- README_pt_br.md | 14 +- README_tzh.md | 14 +- README_zh.md | 16 +- admin/admin_client.py | 590 - admin/auth.py | 57 - admin/build_cli_release.sh | 47 + admin/{ => client}/README.md | 63 +- admin/client/admin_client.py | 931 + admin/client/pyproject.toml | 24 + admin/models.py | 0 admin/responses.py | 15 - admin/routes.py | 190 - admin/{ => server}/admin_server.py | 28 + admin/server/auth.py | 193 + admin/{ => server}/config.py | 31 +- admin/{ => server}/exceptions.py | 0 .../__init__.py => admin/server/models.py | 2 +- .../server/responses.py | 58 +- admin/server/roles.py | 76 + admin/server/routes.py | 371 + admin/{ => server}/services.py | 47 +- agent/canvas.py | 3 - agent/component/agent_with_tools.py | 8 +- .../advanced_ingestion_pipeline.json | 726 + agent/templates/chunk_summary.json | 493 + agent/templates/image_lingo.json | 515 +- agent/templates/stock_research_report.json | 1172 + agent/templates/title_chunker.json | 369 + agent/tools/exesql.py | 44 +- agent/tools/pubmed.py | 53 +- agent/tools/retrieval.py | 33 +- api/apps/__init___fastapi.py | 50 +- api/apps/api_app.py | 8 +- api/apps/canvas_app.py | 458 +- api/apps/chunk_app.py | 78 +- api/apps/document_app.py | 473 +- api/apps/file2document_app.py | 109 +- api/apps/file_app.py | 384 +- api/apps/kb_app.py | 399 +- api/apps/llm_app.py | 128 +- api/apps/mcp_server_app.py | 290 +- api/apps/models/auth_dependencies.py | 53 + api/apps/models/canvas_models.py | 129 + api/apps/models/chunk_models.py | 80 + api/apps/models/document_models.py | 204 + api/apps/models/kb_models.py | 159 + api/apps/models/llm_models.py | 101 + api/apps/models/mcp_models.py | 99 + api/apps/plugin_app.py | 18 + api/apps/sdk/agent.py | 5 +- api/apps/sdk/dataset.py | 41 +- api/apps/sdk/dify_retrieval.py | 109 +- api/apps/sdk/doc.py | 109 +- api/apps/sdk/files.py | 84 +- api/apps/sdk/session.py | 60 +- api/apps/system_app.py | 6 +- api/apps/tenant_app.py | 61 +- api/apps/user_app.py | 187 +- api/apps/user_app_fastapi.py | 61 +- api/common/exceptions.py | 24 +- api/db/db_models.py | 85 +- api/db/services/canvas_service.py | 9 +- api/db/services/dialog_service.py | 14 +- api/db/services/document_service.py | 30 +- api/db/services/file_service.py | 46 +- api/db/services/knowledgebase_service.py | 5 +- api/db/services/llm_service.py | 31 +- api/db/services/mcp_server_service.py | 3 +- api/db/services/search_service.py | 3 +- api/db/services/task_service.py | 21 +- api/db/services/tenant_llm_service.py | 41 +- api/db/services/user_canvas_version.py | 26 +- api/models/chunk_models.py | 88 - api/models/kb_models.py | 98 - api/models/llm_models.py | 84 - api/models/tenant_models.py | 30 - api/settings.py | 10 +- api/utils/api_utils.py | 83 +- api/utils/email_templates.py | 25 + api/utils/file_utils.py | 136 +- api/utils/health_utils.py | 34 +- api/utils/web_utils.py | 70 +- chat_demo/index.html | 19 + chat_demo/widget_demo.html | 154 + conf/infinity_mapping.json | 5 +- conf/llm_factories.json | 67 +- conf/os_mapping.json | 55 + deepdoc/README.md | 122 - deepdoc/README_zh.md | 116 - deepdoc/__init__.py | 18 - deepdoc/parser/__init__.py | 40 - deepdoc/parser/docx_parser.py | 139 - deepdoc/parser/excel_parser.py | 189 - deepdoc/parser/figure_parser.py | 105 - deepdoc/parser/html_parser.py | 214 - deepdoc/parser/json_parser.py | 179 - deepdoc/parser/markdown_parser.py | 273 - deepdoc/parser/ocr_http_client.py | 175 - deepdoc/parser/ppt_parser.py | 99 - deepdoc/parser/resume/__init__.py | 109 - .../parser/resume/entities/corporations.py | 128 - deepdoc/parser/resume/entities/degrees.py | 44 - deepdoc/parser/resume/entities/industries.py | 712 - deepdoc/parser/resume/entities/regions.py | 789 - .../resume/entities/res/corp.tks.freq.json | 65 - .../resume/entities/res/corp_baike_len.csv | 31480 ---------------- .../parser/resume/entities/res/corp_tag.json | 14939 -------- .../parser/resume/entities/res/good_corp.json | 911 - .../parser/resume/entities/res/good_sch.json | 595 - .../resume/entities/res/school.rank.csv | 1627 - .../parser/resume/entities/res/schools.csv | 5713 --- deepdoc/parser/resume/entities/schools.py | 91 - deepdoc/parser/resume/step_one.py | 189 - deepdoc/parser/resume/step_two.py | 696 - deepdoc/parser/txt_parser.py | 64 - deepdoc/parser/utils.py | 32 - deepdoc/vision/__init__.py | 90 - deepdoc/vision/layout_recognizer.py | 456 - deepdoc/vision/ocr.py | 746 - deepdoc/vision/operators.py | 725 - deepdoc/vision/postprocess.py | 370 - deepdoc/vision/recognizer.py | 442 - deepdoc/vision/seeit.py | 87 - deepdoc/vision/t_ocr.py | 93 - deepdoc/vision/t_recognizer.py | 186 - deepdoc/vision/table_structure_recognizer.py | 612 - docker/.env | 22 +- docker/README.md | 308 +- docker/docker-compose-base.yml | 172 +- docker/docker-compose.yml | 48 +- docker/entrypoint.sh | 17 +- docker/infinity_conf.toml | 2 +- docker/ragflow.sh | 63 - docker/service_conf.yaml.template | 16 +- docker/start-ragflow.sh | 27 - docker/stop-ragflow.sh | 12 - download_deps.py | 2 +- example/http/dataset_example.sh | 52 + example/sdk/dataset_example.py | 53 + graphrag/entity_resolution.py | 6 +- graphrag/general/extractor.py | 36 +- graphrag/general/index.py | 4 +- graphrag/general/smoke.py | 2 +- graphrag/light/smoke.py | 2 +- graphrag/utils.py | 19 +- helm/.helmignore | 23 + helm/Chart.yaml | 24 + helm/templates/_helpers.tpl | 62 + helm/templates/elasticsearch-config.yaml | 17 + helm/templates/elasticsearch.yaml | 131 + helm/templates/env.yaml | 53 + helm/templates/infinity.yaml | 122 + helm/templates/ingress.yaml | 43 + helm/templates/minio.yaml | 105 + helm/templates/mysql-config.yaml | 9 + helm/templates/mysql.yaml | 110 + helm/templates/opensearch-config.yaml | 18 + helm/templates/opensearch.yaml | 135 + helm/templates/ragflow.yaml | 119 + helm/templates/ragflow_config.yaml | 89 + helm/templates/redis.yaml | 133 + helm/templates/tests/test-connection.yaml | 17 + helm/values.yaml | 234 + main-ocr.py | 191 - main.py | 16 - ocr/__init__.py | 41 - ocr/api.py | 525 - ocr/client.py | 239 - ocr/config.py | 42 - ocr/ocr.py | 785 - ocr/operators.py | 726 - ocr/pdf_parser.py | 1319 - ocr/postprocess.py | 371 - ocr/requirements.txt | 25 - ocr/service.py | 290 - ocr/utils.py | 40 - pyproject.toml | 20 +- rag/app/book.py | 14 +- rag/app/manual.py | 4 +- rag/app/naive.py | 135 +- rag/app/one.py | 17 +- rag/app/paper.py | 5 +- rag/app/picture.py | 72 +- rag/app/tag.py | 4 +- rag/benchmark.py | 2 +- .../hierarchical_merger.py | 23 +- rag/flow/parser/parser.py | 97 +- rag/flow/splitter/schema.py | 2 +- rag/flow/splitter/splitter.py | 24 +- rag/flow/tokenizer/tokenizer.py | 4 +- rag/llm/chat_model.py | 239 +- rag/llm/cv_model.py | 186 +- rag/llm/sequence2txt_model.py | 48 +- rag/nlp/__init__.py | 90 +- rag/nlp/search.py | 100 +- rag/prompts/assign_toc_levels.md | 32 +- rag/prompts/generator.py | 123 +- rag/prompts/toc_from_text_system.md | 62 +- rag/prompts/toc_relevance_system.md | 118 + rag/prompts/toc_relevance_user.md | 17 + rag/raptor.py | 2 +- rag/svr/task_executor.py | 123 +- rag/utils/infinity_conn.py | 4 +- rag/utils/minio_conn.py | 44 +- rag/utils/oss_conn.py | 10 +- sdk/python/pyproject.toml | 2 +- sdk/python/ragflow_sdk/modules/dataset.py | 41 +- sdk/python/ragflow_sdk/modules/session.py | 49 +- sdk/python/uv.lock | 2 +- startup.md | 6 - uv.lock | 7785 ++++ 218 files changed, 19617 insertions(+), 72339 deletions(-) delete mode 100644 Dockerfile.base delete mode 100644 admin/admin_client.py delete mode 100644 admin/auth.py create mode 100644 admin/build_cli_release.sh rename admin/{ => client}/README.md (79%) create mode 100644 admin/client/admin_client.py create mode 100644 admin/client/pyproject.toml delete mode 100644 admin/models.py delete mode 100644 admin/responses.py delete mode 100644 admin/routes.py rename admin/{ => server}/admin_server.py (57%) create mode 100644 admin/server/auth.py rename admin/{ => server}/config.py (89%) rename admin/{ => server}/exceptions.py (100%) rename deepdoc/parser/resume/entities/__init__.py => admin/server/models.py (99%) rename deepdoc/parser/pdf_parser.py => admin/server/responses.py (63%) create mode 100644 admin/server/roles.py create mode 100644 admin/server/routes.py rename admin/{ => server}/services.py (82%) create mode 100644 agent/templates/advanced_ingestion_pipeline.json create mode 100644 agent/templates/chunk_summary.json create mode 100644 agent/templates/stock_research_report.json create mode 100644 agent/templates/title_chunker.json create mode 100644 api/apps/models/auth_dependencies.py create mode 100644 api/apps/models/canvas_models.py create mode 100644 api/apps/models/chunk_models.py create mode 100644 api/apps/models/document_models.py create mode 100644 api/apps/models/kb_models.py create mode 100644 api/apps/models/llm_models.py create mode 100644 api/apps/models/mcp_models.py delete mode 100644 api/models/chunk_models.py delete mode 100644 api/models/kb_models.py delete mode 100644 api/models/llm_models.py delete mode 100644 api/models/tenant_models.py create mode 100644 api/utils/email_templates.py create mode 100644 chat_demo/index.html create mode 100644 chat_demo/widget_demo.html delete mode 100644 deepdoc/README.md delete mode 100644 deepdoc/README_zh.md delete mode 100644 deepdoc/__init__.py delete mode 100644 deepdoc/parser/__init__.py delete mode 100644 deepdoc/parser/docx_parser.py delete mode 100644 deepdoc/parser/excel_parser.py delete mode 100644 deepdoc/parser/figure_parser.py delete mode 100644 deepdoc/parser/html_parser.py delete mode 100644 deepdoc/parser/json_parser.py delete mode 100644 deepdoc/parser/markdown_parser.py delete mode 100644 deepdoc/parser/ocr_http_client.py delete mode 100644 deepdoc/parser/ppt_parser.py delete mode 100644 deepdoc/parser/resume/__init__.py delete mode 100644 deepdoc/parser/resume/entities/corporations.py delete mode 100644 deepdoc/parser/resume/entities/degrees.py delete mode 100644 deepdoc/parser/resume/entities/industries.py delete mode 100644 deepdoc/parser/resume/entities/regions.py delete mode 100644 deepdoc/parser/resume/entities/res/corp.tks.freq.json delete mode 100644 deepdoc/parser/resume/entities/res/corp_baike_len.csv delete mode 100644 deepdoc/parser/resume/entities/res/corp_tag.json delete mode 100644 deepdoc/parser/resume/entities/res/good_corp.json delete mode 100644 deepdoc/parser/resume/entities/res/good_sch.json delete mode 100644 deepdoc/parser/resume/entities/res/school.rank.csv delete mode 100644 deepdoc/parser/resume/entities/res/schools.csv delete mode 100644 deepdoc/parser/resume/entities/schools.py delete mode 100644 deepdoc/parser/resume/step_one.py delete mode 100644 deepdoc/parser/resume/step_two.py delete mode 100644 deepdoc/parser/txt_parser.py delete mode 100644 deepdoc/parser/utils.py delete mode 100644 deepdoc/vision/__init__.py delete mode 100644 deepdoc/vision/layout_recognizer.py delete mode 100644 deepdoc/vision/ocr.py delete mode 100644 deepdoc/vision/operators.py delete mode 100644 deepdoc/vision/postprocess.py delete mode 100644 deepdoc/vision/recognizer.py delete mode 100644 deepdoc/vision/seeit.py delete mode 100644 deepdoc/vision/t_ocr.py delete mode 100644 deepdoc/vision/t_recognizer.py delete mode 100644 deepdoc/vision/table_structure_recognizer.py delete mode 100644 docker/ragflow.sh delete mode 100644 docker/start-ragflow.sh delete mode 100644 docker/stop-ragflow.sh create mode 100644 example/http/dataset_example.sh create mode 100644 example/sdk/dataset_example.py create mode 100644 helm/.helmignore create mode 100644 helm/Chart.yaml create mode 100644 helm/templates/_helpers.tpl create mode 100644 helm/templates/elasticsearch-config.yaml create mode 100644 helm/templates/elasticsearch.yaml create mode 100644 helm/templates/env.yaml create mode 100644 helm/templates/infinity.yaml create mode 100644 helm/templates/ingress.yaml create mode 100644 helm/templates/minio.yaml create mode 100644 helm/templates/mysql-config.yaml create mode 100644 helm/templates/mysql.yaml create mode 100644 helm/templates/opensearch-config.yaml create mode 100644 helm/templates/opensearch.yaml create mode 100644 helm/templates/ragflow.yaml create mode 100644 helm/templates/ragflow_config.yaml create mode 100644 helm/templates/redis.yaml create mode 100644 helm/templates/tests/test-connection.yaml create mode 100644 helm/values.yaml delete mode 100644 main-ocr.py delete mode 100644 main.py delete mode 100644 ocr/__init__.py delete mode 100644 ocr/api.py delete mode 100644 ocr/client.py delete mode 100644 ocr/config.py delete mode 100644 ocr/ocr.py delete mode 100644 ocr/operators.py delete mode 100644 ocr/pdf_parser.py delete mode 100644 ocr/postprocess.py delete mode 100644 ocr/requirements.txt delete mode 100644 ocr/service.py delete mode 100644 ocr/utils.py create mode 100644 rag/prompts/toc_relevance_system.md create mode 100644 rag/prompts/toc_relevance_user.md delete mode 100644 startup.md create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore index 956cd63..fbf80b3 100644 --- a/.gitignore +++ b/.gitignore @@ -149,7 +149,7 @@ out # Nuxt.js build / generate output .nuxt dist - +ragflow_cli.egg-info # Gatsby files .cache/ # Comment in the public line in if your project uses Gatsby and not Next.js diff --git a/Dockerfile b/Dockerfile index 4524d59..4279875 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,197 @@ -# Application stage - builds on top of the base image -# First build the base image using: docker build -f Dockerfile.base -t ragflow-base:latest . -FROM ragflow-base:latest AS production +# base stage +FROM ubuntu:22.04 AS base +USER root +SHELL ["/bin/bash", "-c"] + +ARG NEED_MIRROR=0 +ARG LIGHTEN=0 +ENV LIGHTEN=${LIGHTEN} + +WORKDIR /ragflow + +# Copy models downloaded via download_deps.py +RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \ + tar --exclude='.*' -cf - \ + /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ + /huggingface.co/InfiniFlow/deepdoc \ + | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + if [ "$LIGHTEN" != "1" ]; then \ + (tar -cf - \ + /huggingface.co/BAAI/bge-large-zh-v1.5 \ + /huggingface.co/maidalun1020/bce-embedding-base_v1 \ + | tar -xf - --strip-components=2 -C /root/.ragflow) \ + fi + +# https://github.com/chrismattmann/tika-python +# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + cp -r /deps/nltk_data /root/ && \ + cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ + cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 + +ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" +ENV DEBIAN_FRONTEND=noninteractive + +# Setup apt +# Python package and implicit dependencies: +# opencv-python: libglib2.0-0 libglx-mesa0 libgl1 +# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb +# python-pptx: default-jdk tika-server-standard-3.0.0.jar +# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 +# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + if [ "$NEED_MIRROR" == "1" ]; then \ + sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + fi; \ + rm -f /etc/apt/apt.conf.d/docker-clean && \ + echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ + chmod 1777 /tmp && \ + apt update && \ + apt --no-install-recommends install -y ca-certificates && \ + apt update && \ + apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \ + apt install -y pkg-config libicu-dev libgdiplus && \ + apt install -y default-jdk && \ + apt install -y libatk-bridge2.0-0 && \ + apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ + apt install -y libjemalloc-dev && \ + apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ + apt install -y ghostscript + +RUN if [ "$NEED_MIRROR" == "1" ]; then \ + pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip3 config set global.trusted-host mirrors.aliyun.com; \ + mkdir -p /etc/uv && \ + echo "[[index]]" > /etc/uv/uv.toml && \ + echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \ + echo "default = true" >> /etc/uv/uv.toml; \ + fi; \ + pipx install uv + +ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 +ENV PATH=/root/.local/bin:$PATH + +# nodejs 12.22 on Ubuntu 22.04 is too old +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt purge -y nodejs npm cargo && \ + apt autoremove -y && \ + apt update && \ + apt install -y nodejs + +# A modern version of cargo is needed for the latest version of the Rust compiler. +RUN apt update && apt install -y curl build-essential \ + && if [ "$NEED_MIRROR" == "1" ]; then \ + # Use TUNA mirrors for rustup/rust dist files + export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ + export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ + echo "Using TUNA mirrors for Rustup."; \ + fi; \ + # Force curl to use HTTP/1.1 + curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ + && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc + +ENV PATH="/root/.cargo/bin:${PATH}" + +RUN cargo --version && rustc --version + +# Add msssql ODBC driver +# macOS ARM64 environment, install msodbcsql18. +# general x86_64 environment, install msodbcsql17. +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ + curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ + apt update && \ + arch="$(uname -m)"; \ + if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \ + # ARM64 (macOS/Apple Silicon or Linux aarch64) + ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \ + else \ + # x86_64 or others + ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \ + fi || \ + { echo "Failed to install ODBC driver"; exit 1; } + + + +# Add dependencies of selenium +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ + unzip /chrome-linux64.zip && \ + mv chrome-linux64 /opt/chrome && \ + ln -s /opt/chrome/chrome /usr/local/bin/ +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ + unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ + mv chromedriver /usr/local/bin/ && \ + rm -f /usr/bin/google-chrome + +# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 +# aspose-slides on linux/arm64 is unavailable +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + if [ "$(uname -m)" = "x86_64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ + elif [ "$(uname -m)" = "aarch64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ + fi + + +# builder stage +FROM base AS builder USER root WORKDIR /ragflow -# Copy application source code (these files change frequently) +# install dependencies from uv.lock file +COPY pyproject.toml uv.lock ./ + +# https://github.com/astral-sh/uv/issues/10462 +# uv records index url into uv.lock but doesn't failover among multiple indexes +RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ + if [ "$NEED_MIRROR" == "1" ]; then \ + sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \ + else \ + sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \ + fi; \ + if [ "$LIGHTEN" == "1" ]; then \ + uv sync --python 3.10 --frozen; \ + else \ + uv sync --python 3.10 --frozen --all-extras; \ + fi + +COPY web web +COPY docs docs +RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \ + cd web && npm install && npm run build + +COPY .git /ragflow/.git + +RUN version_info=$(git describe --tags --match=v* --first-parent --always); \ + if [ "$LIGHTEN" == "1" ]; then \ + version_info="$version_info slim"; \ + else \ + version_info="$version_info full"; \ + fi; \ + echo "RAGFlow version: $version_info"; \ + echo $version_info > /ragflow/VERSION + +# production stage +FROM base AS production +USER root + +WORKDIR /ragflow + +# Copy Python environment and packages +ENV VIRTUAL_ENV=/ragflow/.venv +COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} +ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" + +ENV PYTHONPATH=/ragflow/ + +COPY web web +COPY admin admin COPY api api COPY conf conf COPY deepdoc deepdoc @@ -13,14 +199,16 @@ COPY rag rag COPY agent agent COPY graphrag graphrag COPY agentic_reasoning agentic_reasoning -COPY pyproject.toml ./ +COPY pyproject.toml uv.lock ./ COPY mcp mcp COPY plugin plugin -# Copy configuration templates and entrypoint COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template COPY docker/entrypoint.sh ./ RUN chmod +x ./entrypoint*.sh -# Set the entrypoint -ENTRYPOINT ["./entrypoint.sh"] \ No newline at end of file +# Copy compiled web pages +COPY --from=builder /ragflow/web/dist /ragflow/web/dist + +COPY --from=builder /ragflow/VERSION /ragflow/VERSION +ENTRYPOINT ["./entrypoint.sh"] diff --git a/Dockerfile.base b/Dockerfile.base deleted file mode 100644 index dded3f7..0000000 --- a/Dockerfile.base +++ /dev/null @@ -1,189 +0,0 @@ -# base stage -FROM ubuntu:22.04 AS base -USER root -SHELL ["/bin/bash", "-c"] - -ARG NEED_MIRROR=0 -ARG LIGHTEN=0 -ENV LIGHTEN=${LIGHTEN} - -WORKDIR /ragflow - -# Copy models downloaded via download_deps.py -RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ - cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \ - tar --exclude='.*' -cf - \ - /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ - /huggingface.co/InfiniFlow/deepdoc \ - | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ - if [ "$LIGHTEN" != "1" ]; then \ - (tar -cf - \ - /huggingface.co/BAAI/bge-large-zh-v1.5 \ - /huggingface.co/maidalun1020/bce-embedding-base_v1 \ - | tar -xf - --strip-components=2 -C /root/.ragflow) \ - fi - -# https://github.com/chrismattmann/tika-python -# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ - cp -r /deps/nltk_data /root/ && \ - cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ - cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 - -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" -ENV DEBIAN_FRONTEND=noninteractive - -# Setup apt -# Python package and implicit dependencies: -# opencv-python: libglib2.0-0 libglx-mesa0 libgl1 -# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb -# python-pptx: default-jdk tika-server-standard-3.0.0.jar -# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 -# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - if [ "$NEED_MIRROR" == "1" ]; then \ - sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ - sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ - fi; \ - rm -f /etc/apt/apt.conf.d/docker-clean && \ - echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ - chmod 1777 /tmp && \ - apt update && \ - apt --no-install-recommends install -y ca-certificates && \ - apt update && \ - apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \ - apt install -y pkg-config libicu-dev libgdiplus && \ - apt install -y default-jdk && \ - apt install -y libatk-bridge2.0-0 && \ - apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ - apt install -y libjemalloc-dev && \ - apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ - apt install -y ghostscript - -RUN if [ "$NEED_MIRROR" == "1" ]; then \ - pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip3 config set global.trusted-host mirrors.aliyun.com; \ - mkdir -p /etc/uv && \ - echo "[[index]]" > /etc/uv/uv.toml && \ - echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \ - echo "default = true" >> /etc/uv/uv.toml; \ - fi; \ - pipx install uv - -ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 -ENV PATH=/root/.local/bin:$PATH - -# nodejs 12.22 on Ubuntu 22.04 is too old -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ - apt purge -y nodejs npm cargo && \ - apt autoremove -y && \ - apt update && \ - apt install -y nodejs - -# A modern version of cargo is needed for the latest version of the Rust compiler. -RUN apt update && apt install -y curl build-essential \ - && if [ "$NEED_MIRROR" == "1" ]; then \ - # Use TUNA mirrors for rustup/rust dist files - export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ - export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ - echo "Using TUNA mirrors for Rustup."; \ - fi; \ - # Force curl to use HTTP/1.1 - curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ - && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc - -ENV PATH="/root/.cargo/bin:${PATH}" - -RUN cargo --version && rustc --version - -# Add msssql ODBC driver -# macOS ARM64 environment, install msodbcsql18. -# general x86_64 environment, install msodbcsql17. -RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ - curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ - curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ - apt update && \ - arch="$(uname -m)"; \ - if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \ - # ARM64 (macOS/Apple Silicon or Linux aarch64) - ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \ - else \ - # x86_64 or others - ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \ - fi || \ - { echo "Failed to install ODBC driver"; exit 1; } - -# Add dependencies of selenium -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ - unzip /chrome-linux64.zip && \ - mv chrome-linux64 /opt/chrome && \ - ln -s /opt/chrome/chrome /usr/local/bin/ -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ - unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ - mv chromedriver /usr/local/bin/ && \ - rm -f /usr/bin/google-chrome - -# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 -# aspose-slides on linux/arm64 is unavailable -RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ - if [ "$(uname -m)" = "x86_64" ]; then \ - dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ - elif [ "$(uname -m)" = "aarch64" ]; then \ - dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ - fi - -# builder stage -FROM base AS builder -USER root - -WORKDIR /ragflow - -# install dependencies from uv.lock file -COPY pyproject.toml ./ -RUN uv lock --python 3.10 - -# https://github.com/astral-sh/uv/issues/10462 -# uv records index url into uv.lock but doesn't failover among multiple indexes -RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ - if [ "$NEED_MIRROR" == "1" ]; then \ - sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \ - else \ - sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \ - fi; \ - if [ "$LIGHTEN" == "1" ]; then \ - uv sync --python 3.10 --frozen; \ - else \ - uv sync --python 3.10 --frozen --all-extras; \ - fi - -RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked - -COPY .git /ragflow/.git - -RUN version_info=$(git describe --tags --match=v* --first-parent --always); \ - if [ "$LIGHTEN" == "1" ]; then \ - version_info="$version_info slim"; \ - else \ - version_info="$version_info full"; \ - fi; \ - echo "RAGFlow version: $version_info"; \ - echo $version_info > /ragflow/VERSION - -# Final base image with Python environment -FROM base AS ragflow-base -USER root - -WORKDIR /ragflow - -# Copy Python environment and packages from builder -ENV VIRTUAL_ENV=/ragflow/.venv -COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - -ENV PYTHONPATH=/ragflow/ - -# Copy version info -COPY --from=builder /ragflow/VERSION /ragflow/VERSION diff --git a/README.md b/README.md index 915000b..2730ff1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
@@ -22,7 +22,7 @@