v0.21.1-fastapi
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -149,7 +149,7 @@ out
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
ragflow_cli.egg-info
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
|
||||
204
Dockerfile
204
Dockerfile
@@ -1,11 +1,197 @@
|
||||
# Application stage - builds on top of the base image
|
||||
# First build the base image using: docker build -f Dockerfile.base -t ragflow-base:latest .
|
||||
FROM ragflow-base:latest AS production
|
||||
# base stage
|
||||
FROM ubuntu:22.04 AS base
|
||||
USER root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG NEED_MIRROR=0
|
||||
ARG LIGHTEN=0
|
||||
ENV LIGHTEN=${LIGHTEN}
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
if [ "$LIGHTEN" != "1" ]; then \
|
||||
(tar -cf - \
|
||||
/huggingface.co/BAAI/bge-large-zh-v1.5 \
|
||||
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
|
||||
| tar -xf - --strip-components=2 -C /root/.ragflow) \
|
||||
fi
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
cp -r /deps/nltk_data /root/ && \
|
||||
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
||||
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
||||
|
||||
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Setup apt
|
||||
# Python package and implicit dependencies:
|
||||
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
||||
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
||||
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
||||
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
||||
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
||||
sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
||||
fi; \
|
||||
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
||||
chmod 1777 /tmp && \
|
||||
apt update && \
|
||||
apt --no-install-recommends install -y ca-certificates && \
|
||||
apt update && \
|
||||
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
||||
apt install -y pkg-config libicu-dev libgdiplus && \
|
||||
apt install -y default-jdk && \
|
||||
apt install -y libatk-bridge2.0-0 && \
|
||||
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
||||
apt install -y libjemalloc-dev && \
|
||||
apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
|
||||
apt install -y ghostscript
|
||||
|
||||
RUN if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
|
||||
pip3 config set global.trusted-host mirrors.aliyun.com; \
|
||||
mkdir -p /etc/uv && \
|
||||
echo "[[index]]" > /etc/uv/uv.toml && \
|
||||
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
|
||||
echo "default = true" >> /etc/uv/uv.toml; \
|
||||
fi; \
|
||||
pipx install uv
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# nodejs 12.22 on Ubuntu 22.04 is too old
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt purge -y nodejs npm cargo && \
|
||||
apt autoremove -y && \
|
||||
apt update && \
|
||||
apt install -y nodejs
|
||||
|
||||
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
||||
RUN apt update && apt install -y curl build-essential \
|
||||
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
# Use TUNA mirrors for rustup/rust dist files
|
||||
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
||||
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
||||
echo "Using TUNA mirrors for Rustup."; \
|
||||
fi; \
|
||||
# Force curl to use HTTP/1.1
|
||||
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
||||
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
RUN cargo --version && rustc --version
|
||||
|
||||
# Add msssql ODBC driver
|
||||
# macOS ARM64 environment, install msodbcsql18.
|
||||
# general x86_64 environment, install msodbcsql17.
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
||||
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
||||
apt update && \
|
||||
arch="$(uname -m)"; \
|
||||
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
||||
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
||||
else \
|
||||
# x86_64 or others
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
||||
fi || \
|
||||
{ echo "Failed to install ODBC driver"; exit 1; }
|
||||
|
||||
|
||||
|
||||
# Add dependencies of selenium
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
||||
unzip /chrome-linux64.zip && \
|
||||
mv chrome-linux64 /opt/chrome && \
|
||||
ln -s /opt/chrome/chrome /usr/local/bin/
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
||||
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
||||
mv chromedriver /usr/local/bin/ && \
|
||||
rm -f /usr/bin/google-chrome
|
||||
|
||||
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
||||
# aspose-slides on linux/arm64 is unavailable
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
if [ "$(uname -m)" = "x86_64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
||||
elif [ "$(uname -m)" = "aarch64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
||||
fi
|
||||
|
||||
|
||||
# builder stage
|
||||
FROM base AS builder
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy application source code (these files change frequently)
|
||||
# install dependencies from uv.lock file
|
||||
COPY pyproject.toml uv.lock ./
|
||||
|
||||
# https://github.com/astral-sh/uv/issues/10462
|
||||
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
||||
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
|
||||
else \
|
||||
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
|
||||
fi; \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
uv sync --python 3.10 --frozen; \
|
||||
else \
|
||||
uv sync --python 3.10 --frozen --all-extras; \
|
||||
fi
|
||||
|
||||
COPY web web
|
||||
COPY docs docs
|
||||
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
||||
cd web && npm install && npm run build
|
||||
|
||||
COPY .git /ragflow/.git
|
||||
|
||||
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
version_info="$version_info slim"; \
|
||||
else \
|
||||
version_info="$version_info full"; \
|
||||
fi; \
|
||||
echo "RAGFlow version: $version_info"; \
|
||||
echo $version_info > /ragflow/VERSION
|
||||
|
||||
# production stage
|
||||
FROM base AS production
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy Python environment and packages
|
||||
ENV VIRTUAL_ENV=/ragflow/.venv
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
|
||||
COPY web web
|
||||
COPY admin admin
|
||||
COPY api api
|
||||
COPY conf conf
|
||||
COPY deepdoc deepdoc
|
||||
@@ -13,14 +199,16 @@ COPY rag rag
|
||||
COPY agent agent
|
||||
COPY graphrag graphrag
|
||||
COPY agentic_reasoning agentic_reasoning
|
||||
COPY pyproject.toml ./
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY mcp mcp
|
||||
COPY plugin plugin
|
||||
|
||||
# Copy configuration templates and entrypoint
|
||||
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
||||
COPY docker/entrypoint.sh ./
|
||||
RUN chmod +x ./entrypoint*.sh
|
||||
|
||||
# Set the entrypoint
|
||||
ENTRYPOINT ["./entrypoint.sh"]
|
||||
# Copy compiled web pages
|
||||
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
||||
|
||||
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
||||
ENTRYPOINT ["./entrypoint.sh"]
|
||||
|
||||
189
Dockerfile.base
189
Dockerfile.base
@@ -1,189 +0,0 @@
|
||||
# base stage
|
||||
FROM ubuntu:22.04 AS base
|
||||
USER root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG NEED_MIRROR=0
|
||||
ARG LIGHTEN=0
|
||||
ENV LIGHTEN=${LIGHTEN}
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
if [ "$LIGHTEN" != "1" ]; then \
|
||||
(tar -cf - \
|
||||
/huggingface.co/BAAI/bge-large-zh-v1.5 \
|
||||
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
|
||||
| tar -xf - --strip-components=2 -C /root/.ragflow) \
|
||||
fi
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
cp -r /deps/nltk_data /root/ && \
|
||||
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
||||
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
||||
|
||||
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Setup apt
|
||||
# Python package and implicit dependencies:
|
||||
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
||||
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
||||
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
||||
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
||||
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
||||
sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
||||
fi; \
|
||||
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
||||
chmod 1777 /tmp && \
|
||||
apt update && \
|
||||
apt --no-install-recommends install -y ca-certificates && \
|
||||
apt update && \
|
||||
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
||||
apt install -y pkg-config libicu-dev libgdiplus && \
|
||||
apt install -y default-jdk && \
|
||||
apt install -y libatk-bridge2.0-0 && \
|
||||
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
||||
apt install -y libjemalloc-dev && \
|
||||
apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
|
||||
apt install -y ghostscript
|
||||
|
||||
RUN if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
|
||||
pip3 config set global.trusted-host mirrors.aliyun.com; \
|
||||
mkdir -p /etc/uv && \
|
||||
echo "[[index]]" > /etc/uv/uv.toml && \
|
||||
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
|
||||
echo "default = true" >> /etc/uv/uv.toml; \
|
||||
fi; \
|
||||
pipx install uv
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# nodejs 12.22 on Ubuntu 22.04 is too old
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt purge -y nodejs npm cargo && \
|
||||
apt autoremove -y && \
|
||||
apt update && \
|
||||
apt install -y nodejs
|
||||
|
||||
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
||||
RUN apt update && apt install -y curl build-essential \
|
||||
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
# Use TUNA mirrors for rustup/rust dist files
|
||||
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
||||
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
||||
echo "Using TUNA mirrors for Rustup."; \
|
||||
fi; \
|
||||
# Force curl to use HTTP/1.1
|
||||
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
||||
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
RUN cargo --version && rustc --version
|
||||
|
||||
# Add msssql ODBC driver
|
||||
# macOS ARM64 environment, install msodbcsql18.
|
||||
# general x86_64 environment, install msodbcsql17.
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
||||
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
||||
apt update && \
|
||||
arch="$(uname -m)"; \
|
||||
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
||||
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
||||
else \
|
||||
# x86_64 or others
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
||||
fi || \
|
||||
{ echo "Failed to install ODBC driver"; exit 1; }
|
||||
|
||||
# Add dependencies of selenium
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
||||
unzip /chrome-linux64.zip && \
|
||||
mv chrome-linux64 /opt/chrome && \
|
||||
ln -s /opt/chrome/chrome /usr/local/bin/
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
||||
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
||||
mv chromedriver /usr/local/bin/ && \
|
||||
rm -f /usr/bin/google-chrome
|
||||
|
||||
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
||||
# aspose-slides on linux/arm64 is unavailable
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
if [ "$(uname -m)" = "x86_64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
||||
elif [ "$(uname -m)" = "aarch64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
||||
fi
|
||||
|
||||
# builder stage
|
||||
FROM base AS builder
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# install dependencies from uv.lock file
|
||||
COPY pyproject.toml ./
|
||||
RUN uv lock --python 3.10
|
||||
|
||||
# https://github.com/astral-sh/uv/issues/10462
|
||||
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
||||
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
|
||||
else \
|
||||
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
|
||||
fi; \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
uv sync --python 3.10 --frozen; \
|
||||
else \
|
||||
uv sync --python 3.10 --frozen --all-extras; \
|
||||
fi
|
||||
|
||||
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked
|
||||
|
||||
COPY .git /ragflow/.git
|
||||
|
||||
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
version_info="$version_info slim"; \
|
||||
else \
|
||||
version_info="$version_info full"; \
|
||||
fi; \
|
||||
echo "RAGFlow version: $version_info"; \
|
||||
echo $version_info > /ragflow/VERSION
|
||||
|
||||
# Final base image with Python environment
|
||||
FROM base AS ragflow-base
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy Python environment and packages from builder
|
||||
ENV VIRTUAL_ENV=/ragflow/.venv
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
|
||||
# Copy version info
|
||||
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
||||
14
README.md
14
README.md
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||
@@ -84,8 +84,8 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
## 🔥 Latest Updates
|
||||
|
||||
- 2025-10-15 Supports orchestrable ingestion pipeline.
|
||||
- 2025-08-08 Supports OpenAI's latest GPT-5 series models.
|
||||
- 2025-08-04 Supports new models, including Kimi K2 and Grok 4.
|
||||
- 2025-08-01 Supports agentic workflow and MCP.
|
||||
- 2025-05-23 Adds a Python/JavaScript code executor component to Agent.
|
||||
- 2025-05-05 Supports cross-language query.
|
||||
@@ -135,7 +135,7 @@ releases! 🌟
|
||||
## 🔎 System Architecture
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Get Started
|
||||
@@ -187,7 +187,7 @@ releases! 🌟
|
||||
> All Docker images are built for x86 platforms. We don't currently offer Docker images for ARM64.
|
||||
> If you are on an ARM64 platform, follow [this guide](https://ragflow.io/docs/dev/build_docker_image) to build a Docker image compatible with your system.
|
||||
|
||||
> The command below downloads the `v0.20.5-slim` edition of the RAGFlow Docker image. See the following table for descriptions of different RAGFlow editions. To download a RAGFlow edition different from `v0.20.5-slim`, update the `RAGFLOW_IMAGE` variable accordingly in **docker/.env** before using `docker compose` to start the server. For example: set `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` for the full edition `v0.20.5`.
|
||||
> The command below downloads the `v0.21.1-slim` edition of the RAGFlow Docker image. See the following table for descriptions of different RAGFlow editions. To download a RAGFlow edition different from `v0.21.1-slim`, update the `RAGFLOW_IMAGE` variable accordingly in **docker/.env** before using `docker compose` to start the server. For example: set `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` for the full edition `v0.21.1`.
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -200,8 +200,8 @@ releases! 🌟
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
|-------------------|-----------------|-----------------------|--------------------------|
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
14
README_id.md
14
README_id.md
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="Logo ragflow">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="520" alt="Logo ragflow">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Lencana Daring" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Rilis%20Terbaru" alt="Rilis Terbaru">
|
||||
@@ -80,8 +80,8 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
## 🔥 Pembaruan Terbaru
|
||||
|
||||
- 2025-10-15 Dukungan untuk jalur data yang terorkestrasi.
|
||||
- 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI.
|
||||
- 2025-08-04 Mendukung model baru, termasuk Kimi K2 dan Grok 4.
|
||||
- 2025-08-01 Mendukung alur kerja agen dan MCP.
|
||||
- 2025-05-23 Menambahkan komponen pelaksana kode Python/JS ke Agen.
|
||||
- 2025-05-05 Mendukung kueri lintas bahasa.
|
||||
@@ -129,7 +129,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔎 Arsitektur Sistem
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Mulai
|
||||
@@ -181,7 +181,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
> Semua gambar Docker dibangun untuk platform x86. Saat ini, kami tidak menawarkan gambar Docker untuk ARM64.
|
||||
> Jika Anda menggunakan platform ARM64, [silakan gunakan panduan ini untuk membangun gambar Docker yang kompatibel dengan sistem Anda](https://ragflow.io/docs/dev/build_docker_image).
|
||||
|
||||
> Perintah di bawah ini mengunduh edisi v0.20.5-slim dari gambar Docker RAGFlow. Silakan merujuk ke tabel berikut untuk deskripsi berbagai edisi RAGFlow. Untuk mengunduh edisi RAGFlow yang berbeda dari v0.20.5-slim, perbarui variabel RAGFLOW_IMAGE di docker/.env sebelum menggunakan docker compose untuk memulai server. Misalnya, atur RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5 untuk edisi lengkap v0.20.5.
|
||||
> Perintah di bawah ini mengunduh edisi v0.21.1-slim dari gambar Docker RAGFlow. Silakan merujuk ke tabel berikut untuk deskripsi berbagai edisi RAGFlow. Untuk mengunduh edisi RAGFlow yang berbeda dari v0.21.1-slim, perbarui variabel RAGFLOW_IMAGE di docker/.env sebelum menggunakan docker compose untuk memulai server. Misalnya, atur RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1 untuk edisi lengkap v0.21.1.
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -194,8 +194,8 @@ $ docker compose -f docker-compose.yml up -d
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
14
README_ja.md
14
README_ja.md
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||
@@ -60,8 +60,8 @@
|
||||
|
||||
## 🔥 最新情報
|
||||
|
||||
- 2025-10-15 オーケストレーションされたデータパイプラインのサポート。
|
||||
- 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。
|
||||
- 2025-08-04 新モデル、キミK2およびGrok 4をサポート。
|
||||
- 2025-08-01 エージェントワークフローとMCPをサポート。
|
||||
- 2025-05-23 エージェントに Python/JS コードエグゼキュータコンポーネントを追加しました。
|
||||
- 2025-05-05 言語間クエリをサポートしました。
|
||||
@@ -109,7 +109,7 @@
|
||||
## 🔎 システム構成
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 初期設定
|
||||
@@ -160,7 +160,7 @@
|
||||
> 現在、公式に提供されているすべての Docker イメージは x86 アーキテクチャ向けにビルドされており、ARM64 用の Docker イメージは提供されていません。
|
||||
> ARM64 アーキテクチャのオペレーティングシステムを使用している場合は、[このドキュメント](https://ragflow.io/docs/dev/build_docker_image)を参照して Docker イメージを自分でビルドしてください。
|
||||
|
||||
> 以下のコマンドは、RAGFlow Docker イメージの v0.20.5-slim エディションをダウンロードします。異なる RAGFlow エディションの説明については、以下の表を参照してください。v0.20.5-slim とは異なるエディションをダウンロードするには、docker/.env ファイルの RAGFLOW_IMAGE 変数を適宜更新し、docker compose を使用してサーバーを起動してください。例えば、完全版 v0.20.5 をダウンロードするには、RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5 と設定します。
|
||||
> 以下のコマンドは、RAGFlow Docker イメージの v0.21.1-slim エディションをダウンロードします。異なる RAGFlow エディションの説明については、以下の表を参照してください。v0.21.1-slim とは異なるエディションをダウンロードするには、docker/.env ファイルの RAGFLOW_IMAGE 変数を適宜更新し、docker compose を使用してサーバーを起動してください。例えば、完全版 v0.21.1 をダウンロードするには、RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1 と設定します。
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -173,8 +173,8 @@
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
14
README_ko.md
14
README_ko.md
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||
@@ -60,8 +60,8 @@
|
||||
|
||||
## 🔥 업데이트
|
||||
|
||||
- 2025-10-15 조정된 데이터 파이프라인 지원.
|
||||
- 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다.
|
||||
- 2025-08-04 새로운 모델인 Kimi K2와 Grok 4를 포함하여 지원합니다.
|
||||
- 2025-08-01 에이전트 워크플로우와 MCP를 지원합니다.
|
||||
- 2025-05-23 Agent에 Python/JS 코드 실행기 구성 요소를 추가합니다.
|
||||
- 2025-05-05 언어 간 쿼리를 지원합니다.
|
||||
@@ -109,7 +109,7 @@
|
||||
## 🔎 시스템 아키텍처
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 시작하기
|
||||
@@ -160,7 +160,7 @@
|
||||
> 모든 Docker 이미지는 x86 플랫폼을 위해 빌드되었습니다. 우리는 현재 ARM64 플랫폼을 위한 Docker 이미지를 제공하지 않습니다.
|
||||
> ARM64 플랫폼을 사용 중이라면, [시스템과 호환되는 Docker 이미지를 빌드하려면 이 가이드를 사용해 주세요](https://ragflow.io/docs/dev/build_docker_image).
|
||||
|
||||
> 아래 명령어는 RAGFlow Docker 이미지의 v0.20.5-slim 버전을 다운로드합니다. 다양한 RAGFlow 버전에 대한 설명은 다음 표를 참조하십시오. v0.20.5-slim과 다른 RAGFlow 버전을 다운로드하려면, docker/.env 파일에서 RAGFLOW_IMAGE 변수를 적절히 업데이트한 후 docker compose를 사용하여 서버를 시작하십시오. 예를 들어, 전체 버전인 v0.20.5을 다운로드하려면 RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5로 설정합니다.
|
||||
> 아래 명령어는 RAGFlow Docker 이미지의 v0.21.1-slim 버전을 다운로드합니다. 다양한 RAGFlow 버전에 대한 설명은 다음 표를 참조하십시오. v0.21.1-slim과 다른 RAGFlow 버전을 다운로드하려면, docker/.env 파일에서 RAGFLOW_IMAGE 변수를 적절히 업데이트한 후 docker compose를 사용하여 서버를 시작하십시오. 예를 들어, 전체 버전인 v0.21.1을 다운로드하려면 RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1로 설정합니다.
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -173,8 +173,8 @@
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Badge Estático" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Última%20Relese" alt="Última Versão">
|
||||
@@ -80,8 +80,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
## 🔥 Últimas Atualizações
|
||||
|
||||
- 10-15-2025 Suporte para pipelines de dados orquestrados.
|
||||
- 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI.
|
||||
- 04-08-2025 Suporta novos modelos, incluindo Kimi K2 e Grok 4.
|
||||
- 01-08-2025 Suporta fluxo de trabalho agente e MCP.
|
||||
- 23-05-2025 Adicione o componente executor de código Python/JS ao Agente.
|
||||
- 05-05-2025 Suporte a consultas entre idiomas.
|
||||
@@ -129,7 +129,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔎 Arquitetura do Sistema
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Primeiros Passos
|
||||
@@ -180,7 +180,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
> Todas as imagens Docker são construídas para plataformas x86. Atualmente, não oferecemos imagens Docker para ARM64.
|
||||
> Se você estiver usando uma plataforma ARM64, por favor, utilize [este guia](https://ragflow.io/docs/dev/build_docker_image) para construir uma imagem Docker compatível com o seu sistema.
|
||||
|
||||
> O comando abaixo baixa a edição `v0.20.5-slim` da imagem Docker do RAGFlow. Consulte a tabela a seguir para descrições de diferentes edições do RAGFlow. Para baixar uma edição do RAGFlow diferente da `v0.20.5-slim`, atualize a variável `RAGFLOW_IMAGE` conforme necessário no **docker/.env** antes de usar `docker compose` para iniciar o servidor. Por exemplo: defina `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` para a edição completa `v0.20.5`.
|
||||
> O comando abaixo baixa a edição `v0.21.1-slim` da imagem Docker do RAGFlow. Consulte a tabela a seguir para descrições de diferentes edições do RAGFlow. Para baixar uma edição do RAGFlow diferente da `v0.21.1-slim`, atualize a variável `RAGFLOW_IMAGE` conforme necessário no **docker/.env** antes de usar `docker compose` para iniciar o servidor. Por exemplo: defina `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` para a edição completa `v0.21.1`.
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -193,8 +193,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
| Tag da imagem RAGFlow | Tamanho da imagem (GB) | Possui modelos de incorporação? | Estável? |
|
||||
| --------------------- | ---------------------- | ------------------------------- | ------------------------ |
|
||||
| v0.20.5 | ~9 | :heavy_check_mark: | Lançamento estável |
|
||||
| v0.20.5-slim | ~2 | ❌ | Lançamento estável |
|
||||
| v0.21.1 | ~9 | :heavy_check_mark: | Lançamento estável |
|
||||
| v0.21.1-slim | ~2 | ❌ | Lançamento estável |
|
||||
| nightly | ~9 | :heavy_check_mark: | _Instável_ build noturno |
|
||||
| nightly-slim | ~2 | ❌ | _Instável_ build noturno |
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||
@@ -83,8 +83,8 @@
|
||||
|
||||
## 🔥 近期更新
|
||||
|
||||
- 2025-10-15 支援可編排的資料管道。
|
||||
- 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。
|
||||
- 2025-08-04 支援 Kimi K2 和 Grok 4 等模型.
|
||||
- 2025-08-01 支援 agentic workflow 和 MCP
|
||||
- 2025-05-23 為 Agent 新增 Python/JS 程式碼執行器元件。
|
||||
- 2025-05-05 支援跨語言查詢。
|
||||
@@ -132,7 +132,7 @@
|
||||
## 🔎 系統架構
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 快速開始
|
||||
@@ -183,7 +183,7 @@
|
||||
> 所有 Docker 映像檔都是為 x86 平台建置的。目前,我們不提供 ARM64 平台的 Docker 映像檔。
|
||||
> 如果您使用的是 ARM64 平台,請使用 [這份指南](https://ragflow.io/docs/dev/build_docker_image) 來建置適合您系統的 Docker 映像檔。
|
||||
|
||||
> 執行以下指令會自動下載 RAGFlow slim Docker 映像 `v0.20.5-slim`。請參考下表查看不同 Docker 發行版的說明。如需下載不同於 `v0.20.5-slim` 的 Docker 映像,請在執行 `docker compose` 啟動服務之前先更新 **docker/.env** 檔案內的 `RAGFLOW_IMAGE` 變數。例如,你可以透過設定 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` 來下載 RAGFlow 鏡像的 `v0.20.5` 完整發行版。
|
||||
> 執行以下指令會自動下載 RAGFlow slim Docker 映像 `v0.21.1-slim`。請參考下表查看不同 Docker 發行版的說明。如需下載不同於 `v0.21.1-slim` 的 Docker 映像,請在執行 `docker compose` 啟動服務之前先更新 **docker/.env** 檔案內的 `RAGFLOW_IMAGE` 變數。例如,你可以透過設定 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` 來下載 RAGFlow 鏡像的 `v0.21.1` 完整發行版。
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -196,8 +196,8 @@
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
16
README_zh.md
16
README_zh.md
@@ -1,6 +1,6 @@
|
||||
<div align="center">
|
||||
<a href="https://demo.ragflow.io/">
|
||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
||||
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||
</a>
|
||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||
@@ -83,8 +83,8 @@
|
||||
|
||||
## 🔥 近期更新
|
||||
|
||||
- 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型.
|
||||
- 2025-08-04 新增对 Kimi K2 和 Grok 4 等模型的支持.
|
||||
- 2025-10-15 支持可编排的数据管道。
|
||||
- 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型。
|
||||
- 2025-08-01 支持 agentic workflow 和 MCP。
|
||||
- 2025-05-23 Agent 新增 Python/JS 代码执行器组件。
|
||||
- 2025-05-05 支持跨语言查询。
|
||||
@@ -132,7 +132,7 @@
|
||||
## 🔎 系统架构
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 快速开始
|
||||
@@ -183,7 +183,7 @@
|
||||
> 请注意,目前官方提供的所有 Docker 镜像均基于 x86 架构构建,并不提供基于 ARM64 的 Docker 镜像。
|
||||
> 如果你的操作系统是 ARM64 架构,请参考[这篇文档](https://ragflow.io/docs/dev/build_docker_image)自行构建 Docker 镜像。
|
||||
|
||||
> 运行以下命令会自动下载 RAGFlow slim Docker 镜像 `v0.20.5-slim`。请参考下表查看不同 Docker 发行版的描述。如需下载不同于 `v0.20.5-slim` 的 Docker 镜像,请在运行 `docker compose` 启动服务之前先更新 **docker/.env** 文件内的 `RAGFLOW_IMAGE` 变量。比如,你可以通过设置 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` 来下载 RAGFlow 镜像的 `v0.20.5` 完整发行版。
|
||||
> 运行以下命令会自动下载 RAGFlow slim Docker 镜像 `v0.21.1-slim`。请参考下表查看不同 Docker 发行版的描述。如需下载不同于 `v0.21.1-slim` 的 Docker 镜像,请在运行 `docker compose` 启动服务之前先更新 **docker/.env** 文件内的 `RAGFLOW_IMAGE` 变量。比如,你可以通过设置 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` 来下载 RAGFlow 镜像的 `v0.21.1` 完整发行版。
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
@@ -196,8 +196,8 @@
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
||||
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||
|
||||
|
||||
@@ -1,590 +0,0 @@
|
||||
import argparse
|
||||
import base64
|
||||
|
||||
from Cryptodome.PublicKey import RSA
|
||||
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
||||
from typing import Dict, List, Any
|
||||
from lark import Lark, Transformer, Tree
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from api.common.base64 import encode_to_base64
|
||||
|
||||
GRAMMAR = r"""
|
||||
start: command
|
||||
|
||||
command: sql_command | meta_command
|
||||
|
||||
sql_command: list_services
|
||||
| show_service
|
||||
| startup_service
|
||||
| shutdown_service
|
||||
| restart_service
|
||||
| list_users
|
||||
| show_user
|
||||
| drop_user
|
||||
| alter_user
|
||||
| create_user
|
||||
| activate_user
|
||||
| list_datasets
|
||||
| list_agents
|
||||
|
||||
// meta command definition
|
||||
meta_command: "\\" meta_command_name [meta_args]
|
||||
|
||||
meta_command_name: /[a-zA-Z?]+/
|
||||
meta_args: (meta_arg)+
|
||||
|
||||
meta_arg: /[^\\s"']+/ | quoted_string
|
||||
|
||||
// command definition
|
||||
|
||||
LIST: "LIST"i
|
||||
SERVICES: "SERVICES"i
|
||||
SHOW: "SHOW"i
|
||||
CREATE: "CREATE"i
|
||||
SERVICE: "SERVICE"i
|
||||
SHUTDOWN: "SHUTDOWN"i
|
||||
STARTUP: "STARTUP"i
|
||||
RESTART: "RESTART"i
|
||||
USERS: "USERS"i
|
||||
DROP: "DROP"i
|
||||
USER: "USER"i
|
||||
ALTER: "ALTER"i
|
||||
ACTIVE: "ACTIVE"i
|
||||
PASSWORD: "PASSWORD"i
|
||||
DATASETS: "DATASETS"i
|
||||
OF: "OF"i
|
||||
AGENTS: "AGENTS"i
|
||||
|
||||
list_services: LIST SERVICES ";"
|
||||
show_service: SHOW SERVICE NUMBER ";"
|
||||
startup_service: STARTUP SERVICE NUMBER ";"
|
||||
shutdown_service: SHUTDOWN SERVICE NUMBER ";"
|
||||
restart_service: RESTART SERVICE NUMBER ";"
|
||||
|
||||
list_users: LIST USERS ";"
|
||||
drop_user: DROP USER quoted_string ";"
|
||||
alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
|
||||
show_user: SHOW USER quoted_string ";"
|
||||
create_user: CREATE USER quoted_string quoted_string ";"
|
||||
activate_user: ALTER USER ACTIVE quoted_string status ";"
|
||||
|
||||
list_datasets: LIST DATASETS OF quoted_string ";"
|
||||
list_agents: LIST AGENTS OF quoted_string ";"
|
||||
|
||||
identifier: WORD
|
||||
quoted_string: QUOTED_STRING
|
||||
status: WORD
|
||||
|
||||
QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
|
||||
WORD: /[a-zA-Z0-9_\-\.]+/
|
||||
NUMBER: /[0-9]+/
|
||||
|
||||
%import common.WS
|
||||
%ignore WS
|
||||
"""
|
||||
|
||||
|
||||
class AdminTransformer(Transformer):
|
||||
|
||||
def start(self, items):
|
||||
return items[0]
|
||||
|
||||
def command(self, items):
|
||||
return items[0]
|
||||
|
||||
def list_services(self, items):
|
||||
result = {'type': 'list_services'}
|
||||
return result
|
||||
|
||||
def show_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "show_service", "number": service_id}
|
||||
|
||||
def startup_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "startup_service", "number": service_id}
|
||||
|
||||
def shutdown_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "shutdown_service", "number": service_id}
|
||||
|
||||
def restart_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "restart_service", "number": service_id}
|
||||
|
||||
def list_users(self, items):
|
||||
return {"type": "list_users"}
|
||||
|
||||
def show_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "show_user", "username": user_name}
|
||||
|
||||
def drop_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "drop_user", "username": user_name}
|
||||
|
||||
def alter_user(self, items):
|
||||
user_name = items[3]
|
||||
new_password = items[4]
|
||||
return {"type": "alter_user", "username": user_name, "password": new_password}
|
||||
|
||||
def create_user(self, items):
|
||||
user_name = items[2]
|
||||
password = items[3]
|
||||
return {"type": "create_user", "username": user_name, "password": password, "role": "user"}
|
||||
|
||||
def activate_user(self, items):
|
||||
user_name = items[3]
|
||||
activate_status = items[4]
|
||||
return {"type": "activate_user", "activate_status": activate_status, "username": user_name}
|
||||
|
||||
def list_datasets(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_datasets", "username": user_name}
|
||||
|
||||
def list_agents(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_agents", "username": user_name}
|
||||
|
||||
def meta_command(self, items):
|
||||
command_name = str(items[0]).lower()
|
||||
args = items[1:] if len(items) > 1 else []
|
||||
|
||||
# handle quoted parameter
|
||||
parsed_args = []
|
||||
for arg in args:
|
||||
if hasattr(arg, 'value'):
|
||||
parsed_args.append(arg.value)
|
||||
else:
|
||||
parsed_args.append(str(arg))
|
||||
|
||||
return {'type': 'meta', 'command': command_name, 'args': parsed_args}
|
||||
|
||||
def meta_command_name(self, items):
|
||||
return items[0]
|
||||
|
||||
def meta_args(self, items):
|
||||
return items
|
||||
|
||||
|
||||
def encrypt(input_string):
|
||||
pub = '-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----'
|
||||
pub_key = RSA.importKey(pub)
|
||||
cipher = Cipher_pkcs1_v1_5.new(pub_key)
|
||||
cipher_text = cipher.encrypt(base64.b64encode(input_string.encode('utf-8')))
|
||||
return base64.b64encode(cipher_text).decode("utf-8")
|
||||
|
||||
|
||||
class AdminCommandParser:
|
||||
def __init__(self):
|
||||
self.parser = Lark(GRAMMAR, start='start', parser='lalr', transformer=AdminTransformer())
|
||||
self.command_history = []
|
||||
|
||||
def parse_command(self, command_str: str) -> Dict[str, Any]:
|
||||
if not command_str.strip():
|
||||
return {'type': 'empty'}
|
||||
|
||||
self.command_history.append(command_str)
|
||||
|
||||
try:
|
||||
result = self.parser.parse(command_str)
|
||||
return result
|
||||
except Exception as e:
|
||||
return {'type': 'error', 'message': f'Parse error: {str(e)}'}
|
||||
|
||||
|
||||
class AdminCLI:
|
||||
def __init__(self):
|
||||
self.parser = AdminCommandParser()
|
||||
self.is_interactive = False
|
||||
self.admin_account = "admin@ragflow.io"
|
||||
self.admin_password: str = "admin"
|
||||
self.host: str = ""
|
||||
self.port: int = 0
|
||||
|
||||
def verify_admin(self, args):
|
||||
|
||||
conn_info = self._parse_connection_args(args)
|
||||
if 'error' in conn_info:
|
||||
print(f"Error: {conn_info['error']}")
|
||||
return
|
||||
|
||||
self.host = conn_info['host']
|
||||
self.port = conn_info['port']
|
||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/auth'
|
||||
|
||||
try_count = 0
|
||||
while True:
|
||||
try_count += 1
|
||||
if try_count > 3:
|
||||
return False
|
||||
|
||||
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
||||
try:
|
||||
self.admin_password = encode_to_base64(admin_passwd)
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
if response.status_code == 200:
|
||||
res_json = response.json()
|
||||
error_code = res_json.get('code', -1)
|
||||
if error_code == 0:
|
||||
print("Authentication successful.")
|
||||
return True
|
||||
else:
|
||||
error_message = res_json.get('message', 'Unknown error')
|
||||
print(f"Authentication failed: {error_message}, try again")
|
||||
continue
|
||||
else:
|
||||
print(f"Bad response,status: {response.status_code}, try again")
|
||||
except Exception:
|
||||
print(f"Can't access {self.host}, port: {self.port}")
|
||||
|
||||
def _print_table_simple(self, data):
|
||||
if not data:
|
||||
print("No data to print")
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
# handle single row data
|
||||
data = [data]
|
||||
|
||||
columns = list(data[0].keys())
|
||||
col_widths = {}
|
||||
|
||||
for col in columns:
|
||||
max_width = len(str(col))
|
||||
for item in data:
|
||||
value_len = len(str(item.get(col, '')))
|
||||
if value_len > max_width:
|
||||
max_width = value_len
|
||||
col_widths[col] = max(2, max_width)
|
||||
|
||||
# Generate delimiter
|
||||
separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
|
||||
|
||||
# Print header
|
||||
print(separator)
|
||||
header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
|
||||
print(header)
|
||||
print(separator)
|
||||
|
||||
# Print data
|
||||
for item in data:
|
||||
row = "|"
|
||||
for col in columns:
|
||||
value = str(item.get(col, ''))
|
||||
if len(value) > col_widths[col]:
|
||||
value = value[:col_widths[col] - 3] + "..."
|
||||
row += f" {value:<{col_widths[col]}} |"
|
||||
print(row)
|
||||
|
||||
print(separator)
|
||||
|
||||
def run_interactive(self):
|
||||
|
||||
self.is_interactive = True
|
||||
print("RAGFlow Admin command line interface - Type '\\?' for help, '\\q' to quit")
|
||||
|
||||
while True:
|
||||
try:
|
||||
command = input("admin> ").strip()
|
||||
if not command:
|
||||
continue
|
||||
|
||||
print(f"command: {command}")
|
||||
result = self.parser.parse_command(command)
|
||||
self.execute_command(result)
|
||||
|
||||
if isinstance(result, Tree):
|
||||
continue
|
||||
|
||||
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nUse '\\q' to quit")
|
||||
except EOFError:
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
|
||||
def run_single_command(self, args):
|
||||
conn_info = self._parse_connection_args(args)
|
||||
if 'error' in conn_info:
|
||||
print(f"Error: {conn_info['error']}")
|
||||
return
|
||||
|
||||
def _parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
||||
parser = argparse.ArgumentParser(description='Admin CLI Client', add_help=False)
|
||||
parser.add_argument('-h', '--host', default='localhost', help='Admin service host')
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help='Admin service port')
|
||||
|
||||
try:
|
||||
parsed_args, remaining_args = parser.parse_known_args(args)
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
}
|
||||
except SystemExit:
|
||||
return {'error': 'Invalid connection arguments'}
|
||||
|
||||
def execute_command(self, parsed_command: Dict[str, Any]):
|
||||
|
||||
command_dict: dict
|
||||
if isinstance(parsed_command, Tree):
|
||||
command_dict = parsed_command.children[0]
|
||||
else:
|
||||
if parsed_command['type'] == 'error':
|
||||
print(f"Error: {parsed_command['message']}")
|
||||
return
|
||||
else:
|
||||
command_dict = parsed_command
|
||||
|
||||
# print(f"Parsed command: {command_dict}")
|
||||
|
||||
command_type = command_dict['type']
|
||||
|
||||
match command_type:
|
||||
case 'list_services':
|
||||
self._handle_list_services(command_dict)
|
||||
case 'show_service':
|
||||
self._handle_show_service(command_dict)
|
||||
case 'restart_service':
|
||||
self._handle_restart_service(command_dict)
|
||||
case 'shutdown_service':
|
||||
self._handle_shutdown_service(command_dict)
|
||||
case 'startup_service':
|
||||
self._handle_startup_service(command_dict)
|
||||
case 'list_users':
|
||||
self._handle_list_users(command_dict)
|
||||
case 'show_user':
|
||||
self._handle_show_user(command_dict)
|
||||
case 'drop_user':
|
||||
self._handle_drop_user(command_dict)
|
||||
case 'alter_user':
|
||||
self._handle_alter_user(command_dict)
|
||||
case 'create_user':
|
||||
self._handle_create_user(command_dict)
|
||||
case 'activate_user':
|
||||
self._handle_activate_user(command_dict)
|
||||
case 'list_datasets':
|
||||
self._handle_list_datasets(command_dict)
|
||||
case 'list_agents':
|
||||
self._handle_list_agents(command_dict)
|
||||
case 'meta':
|
||||
self._handle_meta_command(command_dict)
|
||||
case _:
|
||||
print(f"Command '{command_type}' would be executed with API")
|
||||
|
||||
def _handle_list_services(self, command):
|
||||
print("Listing all services")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Showing service: {service_id}")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services/{service_id}'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
res_data = res_json['data']
|
||||
if res_data['alive']:
|
||||
print(f"Service {res_data['service_name']} is alive. Detail:")
|
||||
if isinstance(res_data['message'], str):
|
||||
print(res_data['message'])
|
||||
else:
|
||||
self._print_table_simple(res_data['message'])
|
||||
else:
|
||||
print(f"Service {res_data['service_name']} is down. Detail: {res_data['message']}")
|
||||
else:
|
||||
print(f"Fail to show service, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_restart_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Restart service {service_id}")
|
||||
|
||||
def _handle_shutdown_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Shutdown service {service_id}")
|
||||
|
||||
def _handle_startup_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Startup service {service_id}")
|
||||
|
||||
def _handle_list_users(self, command):
|
||||
print("Listing all users")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Showing user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_drop_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Drop user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}'
|
||||
response = requests.delete(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to drop user, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_alter_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
print(f"Alter user: {username}, password: {password}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/password'
|
||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'new_password': encrypt(password)})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_create_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
role: str = command['role']
|
||||
print(f"Create user: {username}, password: {password}, role: {role}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = requests.post(
|
||||
url,
|
||||
auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'username': username, 'password': encrypt(password), 'role': role}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to create user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_activate_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
activate_tree: Tree = command['activate_status']
|
||||
activate_status: str = activate_tree.children[0].strip("'\"")
|
||||
if activate_status.lower() in ['on', 'off']:
|
||||
print(f"Alter user {username} activate status, turn {activate_status.lower()}.")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/activate'
|
||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'activate_status': activate_status})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to alter activate status, code: {res_json['code']}, message: {res_json['message']}")
|
||||
else:
|
||||
print(f"Unknown activate status: {activate_status}.")
|
||||
|
||||
def _handle_list_datasets(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all datasets of user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/datasets'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all datasets of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_list_agents(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all agents of user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/agents'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all agents of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_meta_command(self, command):
|
||||
meta_command = command['command']
|
||||
args = command.get('args', [])
|
||||
|
||||
if meta_command in ['?', 'h', 'help']:
|
||||
self.show_help()
|
||||
elif meta_command in ['q', 'quit', 'exit']:
|
||||
print("Goodbye!")
|
||||
else:
|
||||
print(f"Meta command '{meta_command}' with args {args}")
|
||||
|
||||
def show_help(self):
|
||||
"""Help info"""
|
||||
help_text = """
|
||||
Commands:
|
||||
LIST SERVICES
|
||||
SHOW SERVICE <service>
|
||||
STARTUP SERVICE <service>
|
||||
SHUTDOWN SERVICE <service>
|
||||
RESTART SERVICE <service>
|
||||
LIST USERS
|
||||
SHOW USER <user>
|
||||
DROP USER <user>
|
||||
CREATE USER <user> <password>
|
||||
ALTER USER PASSWORD <user> <new_password>
|
||||
ALTER USER ACTIVE <user> <on/off>
|
||||
LIST DATASETS OF <user>
|
||||
LIST AGENTS OF <user>
|
||||
|
||||
Meta Commands:
|
||||
\\?, \\h, \\help Show this help
|
||||
\\q, \\quit, \\exit Quit the CLI
|
||||
"""
|
||||
print(help_text)
|
||||
|
||||
|
||||
def main():
|
||||
import sys
|
||||
|
||||
cli = AdminCLI()
|
||||
|
||||
if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == '-'):
|
||||
print(r"""
|
||||
____ ___ ______________ ___ __ _
|
||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||
""")
|
||||
if cli.verify_admin(sys.argv):
|
||||
cli.run_interactive()
|
||||
else:
|
||||
if cli.verify_admin(sys.argv):
|
||||
cli.run_interactive()
|
||||
# cli.run_single_command(sys.argv[1:])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,57 +0,0 @@
|
||||
import logging
|
||||
import uuid
|
||||
from functools import wraps
|
||||
from flask import request, jsonify
|
||||
|
||||
from api.common.exceptions import AdminException
|
||||
from api.db.init_data import encode_to_base64
|
||||
from api.db.services import UserService
|
||||
|
||||
|
||||
def check_admin(username: str, password: str):
|
||||
users = UserService.query(email=username)
|
||||
if not users:
|
||||
logging.info(f"Username: {username} is not registered!")
|
||||
user_info = {
|
||||
"id": uuid.uuid1().hex,
|
||||
"password": encode_to_base64("admin"),
|
||||
"nickname": "admin",
|
||||
"is_superuser": True,
|
||||
"email": "admin@ragflow.io",
|
||||
"creator": "system",
|
||||
"status": "1",
|
||||
}
|
||||
if not UserService.save(**user_info):
|
||||
raise AdminException("Can't init admin.", 500)
|
||||
|
||||
user = UserService.query_user(username, password)
|
||||
if user:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def login_verify(f):
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
auth = request.authorization
|
||||
if not auth or 'username' not in auth.parameters or 'password' not in auth.parameters:
|
||||
return jsonify({
|
||||
"code": 401,
|
||||
"message": "Authentication required",
|
||||
"data": None
|
||||
}), 200
|
||||
|
||||
username = auth.parameters['username']
|
||||
password = auth.parameters['password']
|
||||
# TODO: to check the username and password from DB
|
||||
if check_admin(username, password) is False:
|
||||
return jsonify({
|
||||
"code": 403,
|
||||
"message": "Access denied",
|
||||
"data": None
|
||||
}), 200
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
47
admin/build_cli_release.sh
Normal file
47
admin/build_cli_release.sh
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Start building..."
|
||||
echo "================================"
|
||||
|
||||
PROJECT_NAME="ragflow-cli"
|
||||
|
||||
RELEASE_DIR="release"
|
||||
BUILD_DIR="dist"
|
||||
SOURCE_DIR="src"
|
||||
PACKAGE_DIR="ragflow_cli"
|
||||
|
||||
echo "🧹 Clean old build folder..."
|
||||
rm -rf release/
|
||||
|
||||
echo "📁 Prepare source code..."
|
||||
mkdir release/$PROJECT_NAME/$SOURCE_DIR -p
|
||||
cp pyproject.toml release/$PROJECT_NAME/pyproject.toml
|
||||
cp README.md release/$PROJECT_NAME/README.md
|
||||
|
||||
mkdir release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR -p
|
||||
cp admin_client.py release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR/admin_client.py
|
||||
|
||||
if [ -d "release/$PROJECT_NAME/$SOURCE_DIR" ]; then
|
||||
echo "✅ source dir: release/$PROJECT_NAME/$SOURCE_DIR"
|
||||
else
|
||||
echo "❌ source dir not exist: release/$PROJECT_NAME/$SOURCE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "🔨 Make build file..."
|
||||
cd release/$PROJECT_NAME
|
||||
export PYTHONPATH=$(pwd)
|
||||
python -m build
|
||||
|
||||
echo "✅ check build result..."
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
echo "📦 Package generated:"
|
||||
ls -la $BUILD_DIR/
|
||||
else
|
||||
echo "❌ Build Failed: $BUILD_DIR not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "🎉 Build finished successfully!"
|
||||
@@ -15,22 +15,55 @@ It consists of a server-side Service and a command-line client (CLI), both imple
|
||||
- **Admin Service**: A backend service that interfaces with the RAGFlow system to execute administrative operations and monitor its status.
|
||||
- **Admin CLI**: A command-line interface that allows users to connect to the Admin Service and issue commands for system management.
|
||||
|
||||
|
||||
|
||||
### Starting the Admin Service
|
||||
|
||||
1. Before start Admin Service, please make sure RAGFlow system is already started.
|
||||
#### Launching from source code
|
||||
|
||||
1. Before start Admin Service, please make sure RAGFlow system is already started.
|
||||
|
||||
2. Launch from source code:
|
||||
|
||||
```bash
|
||||
python admin/server/admin_server.py
|
||||
```
|
||||
The service will start and listen for incoming connections from the CLI on the configured port.
|
||||
|
||||
#### Using docker image
|
||||
|
||||
1. Before startup, please configure the `docker_compose.yml` file to enable admin server:
|
||||
|
||||
```bash
|
||||
command:
|
||||
- --enable-adminserver
|
||||
```
|
||||
|
||||
2. Start the containers, the service will start and listen for incoming connections from the CLI on the configured port.
|
||||
|
||||
|
||||
2. Run the service script:
|
||||
```bash
|
||||
python admin/admin_server.py
|
||||
```
|
||||
The service will start and listen for incoming connections from the CLI on the configured port.
|
||||
|
||||
### Using the Admin CLI
|
||||
|
||||
1. Ensure the Admin Service is running.
|
||||
2. Launch the CLI client:
|
||||
2. Install ragflow-cli.
|
||||
```bash
|
||||
python admin/admin_client.py -h 0.0.0.0 -p 9381
|
||||
pip install ragflow-cli==0.21.1
|
||||
```
|
||||
3. Launch the CLI client:
|
||||
```bash
|
||||
ragflow-cli -h 127.0.0.1 -p 9381
|
||||
```
|
||||
You will be prompted to enter the superuser's password to log in.
|
||||
The default password is admin.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- -h: RAGFlow admin server host address
|
||||
|
||||
- -p: RAGFlow admin server port
|
||||
|
||||
|
||||
|
||||
## Supported Commands
|
||||
|
||||
@@ -42,12 +75,7 @@ Commands are case-insensitive and must be terminated with a semicolon (`;`).
|
||||
- Lists all available services within the RAGFlow system.
|
||||
- `SHOW SERVICE <id>;`
|
||||
- Shows detailed status information for the service identified by `<id>`.
|
||||
- `STARTUP SERVICE <id>;`
|
||||
- Attempts to start the service identified by `<id>`.
|
||||
- `SHUTDOWN SERVICE <id>;`
|
||||
- Attempts to gracefully shut down the service identified by `<id>`.
|
||||
- `RESTART SERVICE <id>;`
|
||||
- Attempts to restart the service identified by `<id>`.
|
||||
|
||||
|
||||
### User Management Commands
|
||||
|
||||
@@ -55,10 +83,17 @@ Commands are case-insensitive and must be terminated with a semicolon (`;`).
|
||||
- Lists all users known to the system.
|
||||
- `SHOW USER '<username>';`
|
||||
- Shows details and permissions for the specified user. The username must be enclosed in single or double quotes.
|
||||
|
||||
- `CREATE USER <username> <password>;`
|
||||
- Create user by username and password. The username and password must be enclosed in single or double quotes.
|
||||
|
||||
- `DROP USER '<username>';`
|
||||
- Removes the specified user from the system. Use with caution.
|
||||
- `ALTER USER PASSWORD '<username>' '<new_password>';`
|
||||
- Changes the password for the specified user.
|
||||
- `ALTER USER ACTIVE <username> <on/off>;`
|
||||
- Changes the user to active or inactive.
|
||||
|
||||
|
||||
### Data and Agent Commands
|
||||
|
||||
931
admin/client/admin_client.py
Normal file
931
admin/client/admin_client.py
Normal file
@@ -0,0 +1,931 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
from cmd import Cmd
|
||||
|
||||
from Cryptodome.PublicKey import RSA
|
||||
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
||||
from typing import Dict, List, Any
|
||||
from lark import Lark, Transformer, Tree
|
||||
import requests
|
||||
|
||||
GRAMMAR = r"""
|
||||
start: command
|
||||
|
||||
command: sql_command | meta_command
|
||||
|
||||
sql_command: list_services
|
||||
| show_service
|
||||
| startup_service
|
||||
| shutdown_service
|
||||
| restart_service
|
||||
| list_users
|
||||
| show_user
|
||||
| drop_user
|
||||
| alter_user
|
||||
| create_user
|
||||
| activate_user
|
||||
| list_datasets
|
||||
| list_agents
|
||||
| create_role
|
||||
| drop_role
|
||||
| alter_role
|
||||
| list_roles
|
||||
| show_role
|
||||
| grant_permission
|
||||
| revoke_permission
|
||||
| alter_user_role
|
||||
| show_user_permission
|
||||
|
||||
// meta command definition
|
||||
meta_command: "\\" meta_command_name [meta_args]
|
||||
|
||||
meta_command_name: /[a-zA-Z?]+/
|
||||
meta_args: (meta_arg)+
|
||||
|
||||
meta_arg: /[^\\s"']+/ | quoted_string
|
||||
|
||||
// command definition
|
||||
|
||||
LIST: "LIST"i
|
||||
SERVICES: "SERVICES"i
|
||||
SHOW: "SHOW"i
|
||||
CREATE: "CREATE"i
|
||||
SERVICE: "SERVICE"i
|
||||
SHUTDOWN: "SHUTDOWN"i
|
||||
STARTUP: "STARTUP"i
|
||||
RESTART: "RESTART"i
|
||||
USERS: "USERS"i
|
||||
DROP: "DROP"i
|
||||
USER: "USER"i
|
||||
ALTER: "ALTER"i
|
||||
ACTIVE: "ACTIVE"i
|
||||
PASSWORD: "PASSWORD"i
|
||||
DATASETS: "DATASETS"i
|
||||
OF: "OF"i
|
||||
AGENTS: "AGENTS"i
|
||||
ROLE: "ROLE"i
|
||||
ROLES: "ROLES"i
|
||||
DESCRIPTION: "DESCRIPTION"i
|
||||
GRANT: "GRANT"i
|
||||
REVOKE: "REVOKE"i
|
||||
ALL: "ALL"i
|
||||
PERMISSION: "PERMISSION"i
|
||||
TO: "TO"i
|
||||
FROM: "FROM"i
|
||||
FOR: "FOR"i
|
||||
RESOURCES: "RESOURCES"i
|
||||
ON: "ON"i
|
||||
SET: "SET"i
|
||||
|
||||
list_services: LIST SERVICES ";"
|
||||
show_service: SHOW SERVICE NUMBER ";"
|
||||
startup_service: STARTUP SERVICE NUMBER ";"
|
||||
shutdown_service: SHUTDOWN SERVICE NUMBER ";"
|
||||
restart_service: RESTART SERVICE NUMBER ";"
|
||||
|
||||
list_users: LIST USERS ";"
|
||||
drop_user: DROP USER quoted_string ";"
|
||||
alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
|
||||
show_user: SHOW USER quoted_string ";"
|
||||
create_user: CREATE USER quoted_string quoted_string ";"
|
||||
activate_user: ALTER USER ACTIVE quoted_string status ";"
|
||||
|
||||
list_datasets: LIST DATASETS OF quoted_string ";"
|
||||
list_agents: LIST AGENTS OF quoted_string ";"
|
||||
|
||||
create_role: CREATE ROLE identifier [DESCRIPTION quoted_string] ";"
|
||||
drop_role: DROP ROLE identifier ";"
|
||||
alter_role: ALTER ROLE identifier SET DESCRIPTION quoted_string ";"
|
||||
list_roles: LIST ROLES ";"
|
||||
show_role: SHOW ROLE identifier ";"
|
||||
|
||||
grant_permission: GRANT action_list ON identifier TO ROLE identifier ";"
|
||||
revoke_permission: REVOKE action_list ON identifier FROM ROLE identifier ";"
|
||||
alter_user_role: ALTER USER quoted_string SET ROLE identifier ";"
|
||||
show_user_permission: SHOW USER PERMISSION quoted_string ";"
|
||||
|
||||
action_list: identifier ("," identifier)*
|
||||
|
||||
identifier: WORD
|
||||
quoted_string: QUOTED_STRING
|
||||
status: WORD
|
||||
|
||||
QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
|
||||
WORD: /[a-zA-Z0-9_\-\.]+/
|
||||
NUMBER: /[0-9]+/
|
||||
|
||||
%import common.WS
|
||||
%ignore WS
|
||||
"""
|
||||
|
||||
|
||||
class AdminTransformer(Transformer):
|
||||
|
||||
def start(self, items):
|
||||
return items[0]
|
||||
|
||||
def command(self, items):
|
||||
return items[0]
|
||||
|
||||
def list_services(self, items):
|
||||
result = {'type': 'list_services'}
|
||||
return result
|
||||
|
||||
def show_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "show_service", "number": service_id}
|
||||
|
||||
def startup_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "startup_service", "number": service_id}
|
||||
|
||||
def shutdown_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "shutdown_service", "number": service_id}
|
||||
|
||||
def restart_service(self, items):
|
||||
service_id = int(items[2])
|
||||
return {"type": "restart_service", "number": service_id}
|
||||
|
||||
def list_users(self, items):
|
||||
return {"type": "list_users"}
|
||||
|
||||
def show_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "show_user", "user_name": user_name}
|
||||
|
||||
def drop_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "drop_user", "user_name": user_name}
|
||||
|
||||
def alter_user(self, items):
|
||||
user_name = items[3]
|
||||
new_password = items[4]
|
||||
return {"type": "alter_user", "user_name": user_name, "password": new_password}
|
||||
|
||||
def create_user(self, items):
|
||||
user_name = items[2]
|
||||
password = items[3]
|
||||
return {"type": "create_user", "user_name": user_name, "password": password, "role": "user"}
|
||||
|
||||
def activate_user(self, items):
|
||||
user_name = items[3]
|
||||
activate_status = items[4]
|
||||
return {"type": "activate_user", "activate_status": activate_status, "user_name": user_name}
|
||||
|
||||
def list_datasets(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_datasets", "user_name": user_name}
|
||||
|
||||
def list_agents(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_agents", "user_name": user_name}
|
||||
|
||||
def create_role(self, items):
|
||||
role_name = items[2]
|
||||
if len(items) > 4:
|
||||
description = items[4]
|
||||
return {"type": "create_role", "role_name": role_name, "description": description}
|
||||
else:
|
||||
return {"type": "create_role", "role_name": role_name}
|
||||
|
||||
def drop_role(self, items):
|
||||
role_name = items[2]
|
||||
return {"type": "drop_role", "role_name": role_name}
|
||||
|
||||
def alter_role(self, items):
|
||||
role_name = items[2]
|
||||
description = items[5]
|
||||
return {"type": "alter_role", "role_name": role_name, "description": description}
|
||||
|
||||
def list_roles(self, items):
|
||||
return {"type": "list_roles"}
|
||||
|
||||
def show_role(self, items):
|
||||
role_name = items[2]
|
||||
return {"type": "show_role", "role_name": role_name}
|
||||
|
||||
def grant_permission(self, items):
|
||||
action_list = items[1]
|
||||
resource = items[3]
|
||||
role_name = items[6]
|
||||
return {"type": "grant_permission", "role_name": role_name, "resource": resource, "actions": action_list}
|
||||
|
||||
def revoke_permission(self, items):
|
||||
action_list = items[1]
|
||||
resource = items[3]
|
||||
role_name = items[6]
|
||||
return {
|
||||
"type": "revoke_permission",
|
||||
"role_name": role_name,
|
||||
"resource": resource, "actions": action_list
|
||||
}
|
||||
|
||||
def alter_user_role(self, items):
|
||||
user_name = items[2]
|
||||
role_name = items[5]
|
||||
return {"type": "alter_user_role", "user_name": user_name, "role_name": role_name}
|
||||
|
||||
def show_user_permission(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "show_user_permission", "user_name": user_name}
|
||||
|
||||
def action_list(self, items):
|
||||
return items
|
||||
|
||||
def meta_command(self, items):
|
||||
command_name = str(items[0]).lower()
|
||||
args = items[1:] if len(items) > 1 else []
|
||||
|
||||
# handle quoted parameter
|
||||
parsed_args = []
|
||||
for arg in args:
|
||||
if hasattr(arg, 'value'):
|
||||
parsed_args.append(arg.value)
|
||||
else:
|
||||
parsed_args.append(str(arg))
|
||||
|
||||
return {'type': 'meta', 'command': command_name, 'args': parsed_args}
|
||||
|
||||
def meta_command_name(self, items):
|
||||
return items[0]
|
||||
|
||||
def meta_args(self, items):
|
||||
return items
|
||||
|
||||
|
||||
def encrypt(input_string):
|
||||
pub = '-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----'
|
||||
pub_key = RSA.importKey(pub)
|
||||
cipher = Cipher_pkcs1_v1_5.new(pub_key)
|
||||
cipher_text = cipher.encrypt(base64.b64encode(input_string.encode('utf-8')))
|
||||
return base64.b64encode(cipher_text).decode("utf-8")
|
||||
|
||||
|
||||
def encode_to_base64(input_string):
|
||||
base64_encoded = base64.b64encode(input_string.encode('utf-8'))
|
||||
return base64_encoded.decode('utf-8')
|
||||
|
||||
|
||||
class AdminCLI(Cmd):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.parser = Lark(GRAMMAR, start='start', parser='lalr', transformer=AdminTransformer())
|
||||
self.command_history = []
|
||||
self.is_interactive = False
|
||||
self.admin_account = "admin@ragflow.io"
|
||||
self.admin_password: str = "admin"
|
||||
self.session = requests.Session()
|
||||
self.access_token: str = ""
|
||||
self.host: str = ""
|
||||
self.port: int = 0
|
||||
|
||||
intro = r"""Type "\h" for help."""
|
||||
prompt = "admin> "
|
||||
|
||||
def onecmd(self, command: str) -> bool:
|
||||
try:
|
||||
result = self.parse_command(command)
|
||||
|
||||
if isinstance(result, dict):
|
||||
if 'type' in result and result.get('type') == 'empty':
|
||||
return False
|
||||
|
||||
self.execute_command(result)
|
||||
|
||||
if isinstance(result, Tree):
|
||||
return False
|
||||
|
||||
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
||||
return True
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nUse '\\q' to quit")
|
||||
except EOFError:
|
||||
print("\nGoodbye!")
|
||||
return True
|
||||
return False
|
||||
|
||||
def emptyline(self) -> bool:
|
||||
return False
|
||||
|
||||
def default(self, line: str) -> bool:
|
||||
return self.onecmd(line)
|
||||
|
||||
def parse_command(self, command_str: str) -> dict[str, str]:
|
||||
if not command_str.strip():
|
||||
return {'type': 'empty'}
|
||||
|
||||
self.command_history.append(command_str)
|
||||
|
||||
try:
|
||||
result = self.parser.parse(command_str)
|
||||
return result
|
||||
except Exception as e:
|
||||
return {'type': 'error', 'message': f'Parse error: {str(e)}'}
|
||||
|
||||
def verify_admin(self, arguments: dict, single_command: bool):
|
||||
self.host = arguments['host']
|
||||
self.port = arguments['port']
|
||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
||||
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
||||
|
||||
attempt_count = 3
|
||||
if single_command:
|
||||
attempt_count = 1
|
||||
|
||||
try_count = 0
|
||||
while True:
|
||||
try_count += 1
|
||||
if try_count > attempt_count:
|
||||
return False
|
||||
|
||||
if single_command:
|
||||
admin_passwd = arguments['password']
|
||||
else:
|
||||
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
||||
try:
|
||||
self.admin_password = encrypt(admin_passwd)
|
||||
response = self.session.post(url, json={'email': self.admin_account, 'password': self.admin_password})
|
||||
if response.status_code == 200:
|
||||
res_json = response.json()
|
||||
error_code = res_json.get('code', -1)
|
||||
if error_code == 0:
|
||||
self.session.headers.update({
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': response.headers['Authorization'],
|
||||
'User-Agent': 'RAGFlow-CLI/0.21.1'
|
||||
})
|
||||
print("Authentication successful.")
|
||||
return True
|
||||
else:
|
||||
error_message = res_json.get('message', 'Unknown error')
|
||||
print(f"Authentication failed: {error_message}, try again")
|
||||
continue
|
||||
else:
|
||||
print(f"Bad response,status: {response.status_code}, password is wrong")
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
print(f"Can't access {self.host}, port: {self.port}")
|
||||
|
||||
def _print_table_simple(self, data):
|
||||
if not data:
|
||||
print("No data to print")
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
# handle single row data
|
||||
data = [data]
|
||||
|
||||
columns = list(data[0].keys())
|
||||
col_widths = {}
|
||||
|
||||
def get_string_width(text):
|
||||
half_width_chars = (
|
||||
" !\"#$%&'()*+,-./0123456789:;<=>?@"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
|
||||
"abcdefghijklmnopqrstuvwxyz{|}~"
|
||||
"\t\n\r"
|
||||
)
|
||||
width = 0
|
||||
for char in text:
|
||||
if char in half_width_chars:
|
||||
width += 1
|
||||
else:
|
||||
width += 2
|
||||
return width
|
||||
|
||||
for col in columns:
|
||||
max_width = get_string_width(str(col))
|
||||
for item in data:
|
||||
value_len = get_string_width(str(item.get(col, '')))
|
||||
if value_len > max_width:
|
||||
max_width = value_len
|
||||
col_widths[col] = max(2, max_width)
|
||||
|
||||
# Generate delimiter
|
||||
separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
|
||||
|
||||
# Print header
|
||||
print(separator)
|
||||
header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
|
||||
print(header)
|
||||
print(separator)
|
||||
|
||||
# Print data
|
||||
for item in data:
|
||||
row = "|"
|
||||
for col in columns:
|
||||
value = str(item.get(col, ''))
|
||||
if get_string_width(value) > col_widths[col]:
|
||||
value = value[:col_widths[col] - 3] + "..."
|
||||
row += f" {value:<{col_widths[col] - (get_string_width(value) - len(value))}} |"
|
||||
print(row)
|
||||
|
||||
print(separator)
|
||||
|
||||
def run_interactive(self):
|
||||
|
||||
self.is_interactive = True
|
||||
print("RAGFlow Admin command line interface - Type '\\?' for help, '\\q' to quit")
|
||||
|
||||
while True:
|
||||
try:
|
||||
command = input("admin> ").strip()
|
||||
if not command:
|
||||
continue
|
||||
|
||||
print(f"command: {command}")
|
||||
result = self.parse_command(command)
|
||||
self.execute_command(result)
|
||||
|
||||
if isinstance(result, Tree):
|
||||
continue
|
||||
|
||||
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nUse '\\q' to quit")
|
||||
except EOFError:
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
|
||||
def run_single_command(self, command: str):
|
||||
result = self.parse_command(command)
|
||||
self.execute_command(result)
|
||||
|
||||
def parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
||||
parser = argparse.ArgumentParser(description='Admin CLI Client', add_help=False)
|
||||
parser.add_argument('-h', '--host', default='localhost', help='Admin service host')
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help='Admin service port')
|
||||
parser.add_argument('-w', '--password', default='admin', type=str, help='Superuser password')
|
||||
parser.add_argument('command', nargs='?', help='Single command')
|
||||
try:
|
||||
parsed_args, remaining_args = parser.parse_known_args(args)
|
||||
if remaining_args:
|
||||
command = remaining_args[0]
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
'password': parsed_args.password,
|
||||
'command': command
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
}
|
||||
except SystemExit:
|
||||
return {'error': 'Invalid connection arguments'}
|
||||
|
||||
def execute_command(self, parsed_command: Dict[str, Any]):
|
||||
|
||||
command_dict: dict
|
||||
if isinstance(parsed_command, Tree):
|
||||
command_dict = parsed_command.children[0]
|
||||
else:
|
||||
if parsed_command['type'] == 'error':
|
||||
print(f"Error: {parsed_command['message']}")
|
||||
return
|
||||
else:
|
||||
command_dict = parsed_command
|
||||
|
||||
# print(f"Parsed command: {command_dict}")
|
||||
|
||||
command_type = command_dict['type']
|
||||
|
||||
match command_type:
|
||||
case 'list_services':
|
||||
self._handle_list_services(command_dict)
|
||||
case 'show_service':
|
||||
self._handle_show_service(command_dict)
|
||||
case 'restart_service':
|
||||
self._handle_restart_service(command_dict)
|
||||
case 'shutdown_service':
|
||||
self._handle_shutdown_service(command_dict)
|
||||
case 'startup_service':
|
||||
self._handle_startup_service(command_dict)
|
||||
case 'list_users':
|
||||
self._handle_list_users(command_dict)
|
||||
case 'show_user':
|
||||
self._handle_show_user(command_dict)
|
||||
case 'drop_user':
|
||||
self._handle_drop_user(command_dict)
|
||||
case 'alter_user':
|
||||
self._handle_alter_user(command_dict)
|
||||
case 'create_user':
|
||||
self._handle_create_user(command_dict)
|
||||
case 'activate_user':
|
||||
self._handle_activate_user(command_dict)
|
||||
case 'list_datasets':
|
||||
self._handle_list_datasets(command_dict)
|
||||
case 'list_agents':
|
||||
self._handle_list_agents(command_dict)
|
||||
case 'create_role':
|
||||
self._create_role(command_dict)
|
||||
case 'drop_role':
|
||||
self._drop_role(command_dict)
|
||||
case 'alter_role':
|
||||
self._alter_role(command_dict)
|
||||
case 'list_roles':
|
||||
self._list_roles(command_dict)
|
||||
case 'show_role':
|
||||
self._show_role(command_dict)
|
||||
case 'grant_permission':
|
||||
self._grant_permission(command_dict)
|
||||
case 'revoke_permission':
|
||||
self._revoke_permission(command_dict)
|
||||
case 'alter_user_role':
|
||||
self._alter_user_role(command_dict)
|
||||
case 'show_user_permission':
|
||||
self._show_user_permission(command_dict)
|
||||
case 'meta':
|
||||
self._handle_meta_command(command_dict)
|
||||
case _:
|
||||
print(f"Command '{command_type}' would be executed with API")
|
||||
|
||||
def _handle_list_services(self, command):
|
||||
print("Listing all services")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all services, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Showing service: {service_id}")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services/{service_id}'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
res_data = res_json['data']
|
||||
if 'status' in res_data and res_data['status'] == 'alive':
|
||||
print(f"Service {res_data['service_name']} is alive, ")
|
||||
if isinstance(res_data['message'], str):
|
||||
print(res_data['message'])
|
||||
else:
|
||||
self._print_table_simple(res_data['message'])
|
||||
else:
|
||||
print(f"Service {res_data['service_name']} is down, {res_data['message']}")
|
||||
else:
|
||||
print(f"Fail to show service, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_restart_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Restart service {service_id}")
|
||||
|
||||
def _handle_shutdown_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Shutdown service {service_id}")
|
||||
|
||||
def _handle_startup_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Startup service {service_id}")
|
||||
|
||||
def _handle_list_users(self, command):
|
||||
print("Listing all users")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_user(self, command):
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Showing user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_drop_user(self, command):
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Drop user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||
response = self.session.delete(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to drop user, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_alter_user(self, command):
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
print(f"Alter user: {user_name}, password: {password}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
||||
response = self.session.put(url, json={'new_password': encrypt(password)})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_create_user(self, command):
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
role: str = command['role']
|
||||
print(f"Create user: {user_name}, password: {password}, role: {role}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = self.session.post(
|
||||
url,
|
||||
json={'user_name': user_name, 'password': encrypt(password), 'role': role}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to create user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_activate_user(self, command):
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
activate_tree: Tree = command['activate_status']
|
||||
activate_status: str = activate_tree.children[0].strip("'\"")
|
||||
if activate_status.lower() in ['on', 'off']:
|
||||
print(f"Alter user {user_name} activate status, turn {activate_status.lower()}.")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/activate'
|
||||
response = self.session.put(url, json={'activate_status': activate_status})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
else:
|
||||
print(f"Fail to alter activate status, code: {res_json['code']}, message: {res_json['message']}")
|
||||
else:
|
||||
print(f"Unknown activate status: {activate_status}.")
|
||||
|
||||
def _handle_list_datasets(self, command):
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all datasets of user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/datasets'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all datasets of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_list_agents(self, command):
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all agents of user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/agents'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all agents of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _create_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
desc_str: str = ''
|
||||
if 'description' in command:
|
||||
desc_tree: Tree = command['description']
|
||||
desc_str = desc_tree.children[0].strip("'\"")
|
||||
|
||||
print(f"create role name: {role_name}, description: {desc_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||
response = self.session.post(
|
||||
url,
|
||||
json={'role_name': role_name, 'description': desc_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to create role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _drop_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
print(f"drop role name: {role_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||
response = self.session.delete(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to drop role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _alter_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
desc_tree: Tree = command['description']
|
||||
desc_str: str = desc_tree.children[0].strip("'\"")
|
||||
|
||||
print(f"alter role name: {role_name}, description: {desc_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||
response = self.session.put(
|
||||
url,
|
||||
json={'description': desc_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to update role {role_name} with description: {desc_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _list_roles(self, command):
|
||||
print("Listing all roles")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _show_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
print(f"show role: {role_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}/permission'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _grant_permission(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
resource_tree: Tree = command['resource']
|
||||
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||
action_tree_list: list = command['actions']
|
||||
actions: list = []
|
||||
for action_tree in action_tree_list:
|
||||
action_str: str = action_tree.children[0].strip("'\"")
|
||||
actions.append(action_str)
|
||||
print(f"grant role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||
response = self.session.post(
|
||||
url,
|
||||
json={'actions': actions, 'resource': resource_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to grant role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _revoke_permission(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
resource_tree: Tree = command['resource']
|
||||
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||
action_tree_list: list = command['actions']
|
||||
actions: list = []
|
||||
for action_tree in action_tree_list:
|
||||
action_str: str = action_tree.children[0].strip("'\"")
|
||||
actions.append(action_str)
|
||||
print(f"revoke role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||
response = self.session.delete(
|
||||
url,
|
||||
json={'actions': actions, 'resource': resource_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to revoke role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _alter_user_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||
print(f"alter_user_role user_name: {user_name_str}, role_name: {role_name_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/role'
|
||||
response = self.session.put(
|
||||
url,
|
||||
json={'role_name': role_name_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to alter user: {user_name_str} to role {role_name_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _show_user_permission(self, command):
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||
print(f"show_user_permission user_name: {user_name_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/permission'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to show user: {user_name_str} permission, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_meta_command(self, command):
|
||||
meta_command = command['command']
|
||||
args = command.get('args', [])
|
||||
|
||||
if meta_command in ['?', 'h', 'help']:
|
||||
self.show_help()
|
||||
elif meta_command in ['q', 'quit', 'exit']:
|
||||
print("Goodbye!")
|
||||
else:
|
||||
print(f"Meta command '{meta_command}' with args {args}")
|
||||
|
||||
def show_help(self):
|
||||
"""Help info"""
|
||||
help_text = """
|
||||
Commands:
|
||||
LIST SERVICES
|
||||
SHOW SERVICE <service>
|
||||
STARTUP SERVICE <service>
|
||||
SHUTDOWN SERVICE <service>
|
||||
RESTART SERVICE <service>
|
||||
LIST USERS
|
||||
SHOW USER <user>
|
||||
DROP USER <user>
|
||||
CREATE USER <user> <password>
|
||||
ALTER USER PASSWORD <user> <new_password>
|
||||
ALTER USER ACTIVE <user> <on/off>
|
||||
LIST DATASETS OF <user>
|
||||
LIST AGENTS OF <user>
|
||||
|
||||
Meta Commands:
|
||||
\\?, \\h, \\help Show this help
|
||||
\\q, \\quit, \\exit Quit the CLI
|
||||
"""
|
||||
print(help_text)
|
||||
|
||||
|
||||
def main():
|
||||
import sys
|
||||
|
||||
cli = AdminCLI()
|
||||
|
||||
args = cli.parse_connection_args(sys.argv)
|
||||
if 'error' in args:
|
||||
print(f"Error: {args['error']}")
|
||||
return
|
||||
|
||||
if 'command' in args:
|
||||
if 'password' not in args:
|
||||
print("Error: password is missing")
|
||||
return
|
||||
if cli.verify_admin(args, single_command=True):
|
||||
command: str = args['command']
|
||||
print(f"Run single command: {command}")
|
||||
cli.run_single_command(command)
|
||||
else:
|
||||
if cli.verify_admin(args, single_command=False):
|
||||
print(r"""
|
||||
____ ___ ______________ ___ __ _
|
||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||
""")
|
||||
cli.cmdloop()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
24
admin/client/pyproject.toml
Normal file
24
admin/client/pyproject.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[project]
|
||||
name = "ragflow-cli"
|
||||
version = "0.21.1"
|
||||
description = "Admin Service's client of [RAGFlow](https://github.com/infiniflow/ragflow). The Admin Service provides user management and system monitoring. "
|
||||
authors = [{ name = "Lynn", email = "lynn_inf@hotmail.com" }]
|
||||
license = { text = "Apache License, Version 2.0" }
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10,<3.13"
|
||||
dependencies = [
|
||||
"requests>=2.30.0,<3.0.0",
|
||||
"beartype>=0.18.5,<0.19.0",
|
||||
"pycryptodomex>=3.10.0",
|
||||
"lark>=1.1.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
test = [
|
||||
"pytest>=8.3.5",
|
||||
"requests>=2.32.3",
|
||||
"requests-toolbelt>=1.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
ragflow-cli = "admin_client:main"
|
||||
@@ -1,15 +0,0 @@
|
||||
from flask import jsonify
|
||||
|
||||
def success_response(data=None, message="Success", code = 0):
|
||||
return jsonify({
|
||||
"code": code,
|
||||
"message": message,
|
||||
"data": data
|
||||
}), 200
|
||||
|
||||
def error_response(message="Error", code=-1, data=None):
|
||||
return jsonify({
|
||||
"code": code,
|
||||
"message": message,
|
||||
"data": data
|
||||
}), 400
|
||||
190
admin/routes.py
190
admin/routes.py
@@ -1,190 +0,0 @@
|
||||
from flask import Blueprint, request
|
||||
|
||||
from auth import login_verify
|
||||
from responses import success_response, error_response
|
||||
from services import UserMgr, ServiceMgr, UserServiceMgr
|
||||
from api.common.exceptions import AdminException
|
||||
|
||||
admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
|
||||
|
||||
|
||||
@admin_bp.route('/auth', methods=['GET'])
|
||||
@login_verify
|
||||
def auth_admin():
|
||||
try:
|
||||
return success_response(None, "Admin is authorized", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['GET'])
|
||||
@login_verify
|
||||
def list_users():
|
||||
try:
|
||||
users = UserMgr.get_all_users()
|
||||
return success_response(users, "Get all users", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['POST'])
|
||||
@login_verify
|
||||
def create_user():
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'username' not in data or 'password' not in data:
|
||||
return error_response("Username and password are required", 400)
|
||||
|
||||
username = data['username']
|
||||
password = data['password']
|
||||
role = data.get('role', 'user')
|
||||
|
||||
res = UserMgr.create_user(username, password, role)
|
||||
if res["success"]:
|
||||
user_info = res["user_info"]
|
||||
user_info.pop("password") # do not return password
|
||||
return success_response(user_info, "User created successfully")
|
||||
else:
|
||||
return error_response("create user failed")
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e))
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['DELETE'])
|
||||
@login_verify
|
||||
def delete_user(username):
|
||||
try:
|
||||
res = UserMgr.delete_user(username)
|
||||
if res["success"]:
|
||||
return success_response(None, res["message"])
|
||||
else:
|
||||
return error_response(res["message"])
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/password', methods=['PUT'])
|
||||
@login_verify
|
||||
def change_password(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'new_password' not in data:
|
||||
return error_response("New password is required", 400)
|
||||
|
||||
new_password = data['new_password']
|
||||
msg = UserMgr.update_user_password(username, new_password)
|
||||
return success_response(None, msg)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/activate', methods=['PUT'])
|
||||
@login_verify
|
||||
def alter_user_activate_status(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'activate_status' not in data:
|
||||
return error_response("Activation status is required", 400)
|
||||
activate_status = data['activate_status']
|
||||
msg = UserMgr.update_user_activate_status(username, activate_status)
|
||||
return success_response(None, msg)
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['GET'])
|
||||
@login_verify
|
||||
def get_user_details(username):
|
||||
try:
|
||||
user_details = UserMgr.get_user_details(username)
|
||||
return success_response(user_details)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
@admin_bp.route('/users/<username>/datasets', methods=['GET'])
|
||||
@login_verify
|
||||
def get_user_datasets(username):
|
||||
try:
|
||||
datasets_list = UserServiceMgr.get_user_datasets(username)
|
||||
return success_response(datasets_list)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/agents', methods=['GET'])
|
||||
@login_verify
|
||||
def get_user_agents(username):
|
||||
try:
|
||||
agents_list = UserServiceMgr.get_user_agents(username)
|
||||
return success_response(agents_list)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services', methods=['GET'])
|
||||
@login_verify
|
||||
def get_services():
|
||||
try:
|
||||
services = ServiceMgr.get_all_services()
|
||||
return success_response(services, "Get all services", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/service_types/<service_type>', methods=['GET'])
|
||||
@login_verify
|
||||
def get_services_by_type(service_type_str):
|
||||
try:
|
||||
services = ServiceMgr.get_services_by_type(service_type_str)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['GET'])
|
||||
@login_verify
|
||||
def get_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.get_service_details(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['DELETE'])
|
||||
@login_verify
|
||||
def shutdown_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.shutdown_service(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['PUT'])
|
||||
@login_verify
|
||||
def restart_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.restart_service(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
@@ -1,3 +1,18 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import signal
|
||||
@@ -12,6 +27,9 @@ from api.utils.log_utils import init_root_logger
|
||||
from api.constants import SERVICE_CONF
|
||||
from api import settings
|
||||
from config import load_configurations, SERVICE_CONFIGS
|
||||
from auth import init_default_admin, setup_auth
|
||||
from flask_session import Session
|
||||
from flask_login import LoginManager
|
||||
|
||||
stop_event = threading.Event()
|
||||
|
||||
@@ -27,7 +45,17 @@ if __name__ == '__main__':
|
||||
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(admin_bp)
|
||||
app.config["SESSION_PERMANENT"] = False
|
||||
app.config["SESSION_TYPE"] = "filesystem"
|
||||
app.config["MAX_CONTENT_LENGTH"] = int(
|
||||
os.environ.get("MAX_CONTENT_LENGTH", 1024 * 1024 * 1024)
|
||||
)
|
||||
Session(app)
|
||||
login_manager = LoginManager()
|
||||
login_manager.init_app(app)
|
||||
settings.init_settings()
|
||||
setup_auth(login_manager)
|
||||
init_default_admin()
|
||||
SERVICE_CONFIGS.configs = load_configurations(SERVICE_CONF)
|
||||
|
||||
try:
|
||||
193
admin/server/auth.py
Normal file
193
admin/server/auth.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from functools import wraps
|
||||
from datetime import datetime
|
||||
from flask import request, jsonify
|
||||
from flask_login import current_user, login_user
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
|
||||
from api import settings
|
||||
from api.common.exceptions import AdminException, UserNotFoundError
|
||||
from api.db.init_data import encode_to_base64
|
||||
from api.db.services import UserService
|
||||
from api.db import ActiveEnum, StatusEnum
|
||||
from api.utils.crypt import decrypt
|
||||
from api.utils import (
|
||||
current_timestamp,
|
||||
datetime_format,
|
||||
get_format_time,
|
||||
get_uuid,
|
||||
)
|
||||
from api.utils.api_utils import (
|
||||
construct_response,
|
||||
)
|
||||
|
||||
|
||||
def setup_auth(login_manager):
|
||||
@login_manager.request_loader
|
||||
def load_user(web_request):
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = web_request.headers.get("Authorization")
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
logging.warning("Authentication attempt with empty access token")
|
||||
return None
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
logging.warning(f"Authentication attempt with invalid token format: {len(access_token)} chars")
|
||||
return None
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
logging.warning(f"User {user[0].email} has empty access_token in database")
|
||||
return None
|
||||
return user[0]
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def init_default_admin():
|
||||
# Verify that at least one active admin user exists. If not, create a default one.
|
||||
users = UserService.query(is_superuser=True)
|
||||
if not users:
|
||||
default_admin = {
|
||||
"id": uuid.uuid1().hex,
|
||||
"password": encode_to_base64("admin"),
|
||||
"nickname": "admin",
|
||||
"is_superuser": True,
|
||||
"email": "admin@ragflow.io",
|
||||
"creator": "system",
|
||||
"status": "1",
|
||||
}
|
||||
if not UserService.save(**default_admin):
|
||||
raise AdminException("Can't init admin.", 500)
|
||||
elif not any([u.is_active == ActiveEnum.ACTIVE.value for u in users]):
|
||||
raise AdminException("No active admin. Please update 'is_active' in db manually.", 500)
|
||||
|
||||
|
||||
def check_admin_auth(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
user = UserService.filter_by_id(current_user.id)
|
||||
if not user:
|
||||
raise UserNotFoundError(current_user.email)
|
||||
if not user.is_superuser:
|
||||
raise AdminException("Not admin", 403)
|
||||
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||
raise AdminException(f"User {current_user.email} inactive", 403)
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def login_admin(email: str, password: str):
|
||||
"""
|
||||
:param email: admin email
|
||||
:param password: string before decrypt
|
||||
"""
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
raise UserNotFoundError(email)
|
||||
psw = decrypt(password)
|
||||
user = UserService.query_user(email, psw)
|
||||
if not user:
|
||||
raise AdminException("Email and password do not match!")
|
||||
if not user.is_superuser:
|
||||
raise AdminException("Not admin", 403)
|
||||
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||
raise AdminException(f"User {email} inactive", 403)
|
||||
|
||||
resp = user.to_json()
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.last_login_time = get_format_time()
|
||||
user.save()
|
||||
msg = "Welcome back!"
|
||||
return construct_response(data=resp, auth=user.get_id(), message=msg)
|
||||
|
||||
|
||||
def check_admin(username: str, password: str):
|
||||
users = UserService.query(email=username)
|
||||
if not users:
|
||||
logging.info(f"Username: {username} is not registered!")
|
||||
user_info = {
|
||||
"id": uuid.uuid1().hex,
|
||||
"password": encode_to_base64("admin"),
|
||||
"nickname": "admin",
|
||||
"is_superuser": True,
|
||||
"email": "admin@ragflow.io",
|
||||
"creator": "system",
|
||||
"status": "1",
|
||||
}
|
||||
if not UserService.save(**user_info):
|
||||
raise AdminException("Can't init admin.", 500)
|
||||
|
||||
user = UserService.query_user(username, password)
|
||||
if user:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def login_verify(f):
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
auth = request.authorization
|
||||
if not auth or 'username' not in auth.parameters or 'password' not in auth.parameters:
|
||||
return jsonify({
|
||||
"code": 401,
|
||||
"message": "Authentication required",
|
||||
"data": None
|
||||
}), 200
|
||||
|
||||
username = auth.parameters['username']
|
||||
password = auth.parameters['password']
|
||||
try:
|
||||
if check_admin(username, password) is False:
|
||||
return jsonify({
|
||||
"code": 500,
|
||||
"message": "Access denied",
|
||||
"data": None
|
||||
}), 200
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
return jsonify({
|
||||
"code": 500,
|
||||
"message": error_msg
|
||||
}), 200
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
@@ -1,3 +1,20 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from enum import Enum
|
||||
@@ -9,6 +26,8 @@ from urllib.parse import urlparse
|
||||
|
||||
|
||||
class ServiceConfigs:
|
||||
configs = dict
|
||||
|
||||
def __init__(self):
|
||||
self.configs = []
|
||||
self.lock = threading.Lock()
|
||||
@@ -35,7 +54,8 @@ class BaseConfig(BaseModel):
|
||||
detail_func_name: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port, 'service_type': self.service_type}
|
||||
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port,
|
||||
'service_type': self.service_type}
|
||||
|
||||
|
||||
class MetaConfig(BaseConfig):
|
||||
@@ -211,7 +231,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
host: str = v['host']
|
||||
http_port: int = v['http_port']
|
||||
config = RAGFlowServerConfig(id=id_count, name=name, host=host, port=http_port,
|
||||
service_type="ragflow_server", detail_func_name="check_ragflow_server_alive")
|
||||
service_type="ragflow_server",
|
||||
detail_func_name="check_ragflow_server_alive")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
case "es":
|
||||
@@ -236,7 +257,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
host = parts[0]
|
||||
port = int(parts[1])
|
||||
database: str = v.get('db_name', 'default_db')
|
||||
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval", retrieval_type="infinity",
|
||||
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval",
|
||||
retrieval_type="infinity",
|
||||
db_name=database, detail_func_name="get_infinity_status")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
@@ -248,7 +270,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
port = int(parts[1])
|
||||
user = v.get('user')
|
||||
password = v.get('password')
|
||||
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password, service_type="file_store",
|
||||
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password,
|
||||
service_type="file_store",
|
||||
store_type="minio", detail_func_name="check_minio_alive")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
@@ -12,4 +12,4 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
@@ -1,24 +1,34 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
|
||||
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from flask import jsonify
|
||||
|
||||
|
||||
def success_response(data=None, message="Success", code=0):
|
||||
return jsonify({
|
||||
"code": code,
|
||||
"message": message,
|
||||
"data": data
|
||||
}), 200
|
||||
|
||||
|
||||
def error_response(message="Error", code=-1, data=None):
|
||||
return jsonify({
|
||||
"code": code,
|
||||
"message": message,
|
||||
"data": data
|
||||
}), 400
|
||||
76
admin/server/roles.py
Normal file
76
admin/server/roles.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
from api.common.exceptions import AdminException
|
||||
|
||||
|
||||
class RoleMgr:
|
||||
@staticmethod
|
||||
def create_role(role_name: str, description: str):
|
||||
error_msg = f"not implement: create role: {role_name}, description: {description}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def update_role_description(role_name: str, description: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: update role: {role_name} with description: {description}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def delete_role(role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: drop role: {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def list_roles() -> Dict[str, Any]:
|
||||
error_msg = "not implement: list roles"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def get_role_permission(role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: show role {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def grant_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: grant role {role_name} actions: {actions} on {resource}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def revoke_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: revoke role {role_name} actions: {actions} on {resource}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def update_user_role(user_name: str, role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: update user role: {user_name} to role {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def get_user_permission(user_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: get user permission: {user_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
371
admin/server/routes.py
Normal file
371
admin/server/routes.py
Normal file
@@ -0,0 +1,371 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import secrets
|
||||
|
||||
from flask import Blueprint, request
|
||||
from flask_login import current_user, logout_user, login_required
|
||||
|
||||
from auth import login_verify, login_admin, check_admin_auth
|
||||
from responses import success_response, error_response
|
||||
from services import UserMgr, ServiceMgr, UserServiceMgr
|
||||
from roles import RoleMgr
|
||||
from api.common.exceptions import AdminException
|
||||
|
||||
admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
|
||||
|
||||
|
||||
@admin_bp.route('/login', methods=['POST'])
|
||||
def login():
|
||||
if not request.json:
|
||||
return error_response('Authorize admin failed.' ,400)
|
||||
try:
|
||||
email = request.json.get("email", "")
|
||||
password = request.json.get("password", "")
|
||||
return login_admin(email, password)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/logout', methods=['GET'])
|
||||
@login_required
|
||||
def logout():
|
||||
try:
|
||||
current_user.access_token = f"INVALID_{secrets.token_hex(16)}"
|
||||
current_user.save()
|
||||
logout_user()
|
||||
return success_response(True)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/auth', methods=['GET'])
|
||||
@login_verify
|
||||
def auth_admin():
|
||||
try:
|
||||
return success_response(None, "Admin is authorized", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def list_users():
|
||||
try:
|
||||
users = UserMgr.get_all_users()
|
||||
return success_response(users, "Get all users", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['POST'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def create_user():
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'username' not in data or 'password' not in data:
|
||||
return error_response("Username and password are required", 400)
|
||||
|
||||
username = data['username']
|
||||
password = data['password']
|
||||
role = data.get('role', 'user')
|
||||
|
||||
res = UserMgr.create_user(username, password, role)
|
||||
if res["success"]:
|
||||
user_info = res["user_info"]
|
||||
user_info.pop("password") # do not return password
|
||||
return success_response(user_info, "User created successfully")
|
||||
else:
|
||||
return error_response("create user failed")
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e))
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def delete_user(username):
|
||||
try:
|
||||
res = UserMgr.delete_user(username)
|
||||
if res["success"]:
|
||||
return success_response(None, res["message"])
|
||||
else:
|
||||
return error_response(res["message"])
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/password', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def change_password(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'new_password' not in data:
|
||||
return error_response("New password is required", 400)
|
||||
|
||||
new_password = data['new_password']
|
||||
msg = UserMgr.update_user_password(username, new_password)
|
||||
return success_response(None, msg)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/activate', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def alter_user_activate_status(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'activate_status' not in data:
|
||||
return error_response("Activation status is required", 400)
|
||||
activate_status = data['activate_status']
|
||||
msg = UserMgr.update_user_activate_status(username, activate_status)
|
||||
return success_response(None, msg)
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_details(username):
|
||||
try:
|
||||
user_details = UserMgr.get_user_details(username)
|
||||
return success_response(user_details)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/datasets', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_datasets(username):
|
||||
try:
|
||||
datasets_list = UserServiceMgr.get_user_datasets(username)
|
||||
return success_response(datasets_list)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/agents', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_agents(username):
|
||||
try:
|
||||
agents_list = UserServiceMgr.get_user_agents(username)
|
||||
return success_response(agents_list)
|
||||
|
||||
except AdminException as e:
|
||||
return error_response(e.message, e.code)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_services():
|
||||
try:
|
||||
services = ServiceMgr.get_all_services()
|
||||
return success_response(services, "Get all services", 0)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/service_types/<service_type>', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_services_by_type(service_type_str):
|
||||
try:
|
||||
services = ServiceMgr.get_services_by_type(service_type_str)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.get_service_details(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def shutdown_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.shutdown_service(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def restart_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.restart_service(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles', methods=['POST'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def create_role():
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'role_name' not in data:
|
||||
return error_response("Role name is required", 400)
|
||||
role_name: str = data['role_name']
|
||||
description: str = data['description']
|
||||
res = RoleMgr.create_role(role_name, description)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def update_role(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'description' not in data:
|
||||
return error_response("Role description is required", 400)
|
||||
description: str = data['description']
|
||||
res = RoleMgr.update_role_description(role_name, description)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def delete_role(role_name: str):
|
||||
try:
|
||||
res = RoleMgr.delete_role(role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def list_roles():
|
||||
try:
|
||||
res = RoleMgr.list_roles()
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_role_permission(role_name: str):
|
||||
try:
|
||||
res = RoleMgr.get_role_permission(role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['POST'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def grant_role_permission(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'actions' not in data or 'resource' not in data:
|
||||
return error_response("Permission is required", 400)
|
||||
actions: list = data['actions']
|
||||
resource: str = data['resource']
|
||||
res = RoleMgr.grant_role_permission(role_name, actions, resource)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def revoke_role_permission(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'actions' not in data or 'resource' not in data:
|
||||
return error_response("Permission is required", 400)
|
||||
actions: list = data['actions']
|
||||
resource: str = data['resource']
|
||||
res = RoleMgr.revoke_role_permission(role_name, actions, resource)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<user_name>/role', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def update_user_role(user_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'role_name' not in data:
|
||||
return error_response("Role name is required", 400)
|
||||
role_name: str = data['role_name']
|
||||
res = RoleMgr.update_user_role(user_name, role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<user_name>/permission', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_permission(user_name: str):
|
||||
try:
|
||||
res = RoleMgr.get_user_permission(user_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
@@ -1,3 +1,20 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import re
|
||||
from werkzeug.security import check_password_hash
|
||||
from api.db import ActiveEnum
|
||||
@@ -12,13 +29,20 @@ from api.utils import health_utils
|
||||
from api.common.exceptions import AdminException, UserAlreadyExistsError, UserNotFoundError
|
||||
from config import SERVICE_CONFIGS
|
||||
|
||||
|
||||
class UserMgr:
|
||||
@staticmethod
|
||||
def get_all_users():
|
||||
users = UserService.get_all_users()
|
||||
result = []
|
||||
for user in users:
|
||||
result.append({'email': user.email, 'nickname': user.nickname, 'create_date': user.create_date, 'is_active': user.is_active})
|
||||
result.append({
|
||||
'email': user.email,
|
||||
'nickname': user.nickname,
|
||||
'create_date': user.create_date,
|
||||
'is_active': user.is_active,
|
||||
'is_superuser': user.is_superuser,
|
||||
})
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
@@ -31,7 +55,6 @@ class UserMgr:
|
||||
'email': user.email,
|
||||
'language': user.language,
|
||||
'last_login_time': user.last_login_time,
|
||||
'is_authenticated': user.is_authenticated,
|
||||
'is_active': user.is_active,
|
||||
'is_anonymous': user.is_anonymous,
|
||||
'login_channel': user.login_channel,
|
||||
@@ -112,6 +135,7 @@ class UserMgr:
|
||||
UserService.update_user(usr.id, {"is_active": target_status})
|
||||
return f"Turn {_activate_status} user activate status successfully!"
|
||||
|
||||
|
||||
class UserServiceMgr:
|
||||
|
||||
@staticmethod
|
||||
@@ -146,18 +170,27 @@ class UserServiceMgr:
|
||||
return [{
|
||||
'title': r['title'],
|
||||
'permission': r['permission'],
|
||||
'canvas_type': r['canvas_type'],
|
||||
'canvas_category': r['canvas_category']
|
||||
'canvas_category': r['canvas_category'].split('_')[0]
|
||||
} for r in res]
|
||||
|
||||
|
||||
class ServiceMgr:
|
||||
|
||||
@staticmethod
|
||||
def get_all_services():
|
||||
result = []
|
||||
configs = SERVICE_CONFIGS.configs
|
||||
for config in configs:
|
||||
result.append(config.to_dict())
|
||||
for service_id, config in enumerate(configs):
|
||||
config_dict = config.to_dict()
|
||||
try:
|
||||
service_detail = ServiceMgr.get_service_details(service_id)
|
||||
if "status" in service_detail:
|
||||
config_dict['status'] = service_detail['status']
|
||||
else:
|
||||
config_dict['status'] = 'timeout'
|
||||
except Exception:
|
||||
config_dict['status'] = 'timeout'
|
||||
result.append(config_dict)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
@@ -176,7 +209,7 @@ class ServiceMgr:
|
||||
}
|
||||
service_info = service_config_mapping.get(service_id, {})
|
||||
if not service_info:
|
||||
raise AdminException(f"Invalid service_id: {service_id}")
|
||||
raise AdminException(f"invalid service_id: {service_id}")
|
||||
|
||||
detail_func = getattr(health_utils, service_info.get('detail_func_name'))
|
||||
res = detail_func()
|
||||
@@ -203,7 +203,6 @@ class Canvas(Graph):
|
||||
self.history = []
|
||||
self.retrieval = []
|
||||
self.memory = []
|
||||
|
||||
for k in self.globals.keys():
|
||||
if isinstance(self.globals[k], str):
|
||||
self.globals[k] = ""
|
||||
@@ -292,7 +291,6 @@ class Canvas(Graph):
|
||||
"thoughts": self.get_component_thoughts(self.path[i])
|
||||
})
|
||||
_run_batch(idx, to)
|
||||
|
||||
# post processing of components invocation
|
||||
for i in range(idx, to):
|
||||
cpn = self.get_component(self.path[i])
|
||||
@@ -393,7 +391,6 @@ class Canvas(Graph):
|
||||
self.path = path
|
||||
yield decorate("user_inputs", {"inputs": another_inputs, "tips": tips})
|
||||
return
|
||||
|
||||
self.path = self.path[:idx]
|
||||
if not self.error:
|
||||
yield decorate("workflow_finished",
|
||||
|
||||
@@ -346,7 +346,11 @@ Respond immediately with your final comprehensive answer.
|
||||
|
||||
return "Error occurred."
|
||||
|
||||
def reset(self):
|
||||
def reset(self, temp=False):
|
||||
"""
|
||||
Reset all tools if they have a reset method. This avoids errors for tools like MCPToolCallSession.
|
||||
"""
|
||||
for k, cpn in self.tools.items():
|
||||
cpn.reset()
|
||||
if hasattr(cpn, "reset") and callable(cpn.reset):
|
||||
cpn.reset()
|
||||
|
||||
|
||||
726
agent/templates/advanced_ingestion_pipeline.json
Normal file
726
agent/templates/advanced_ingestion_pipeline.json
Normal file
File diff suppressed because one or more lines are too long
493
agent/templates/chunk_summary.json
Normal file
493
agent/templates/chunk_summary.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1172
agent/templates/stock_research_report.json
Normal file
1172
agent/templates/stock_research_report.json
Normal file
File diff suppressed because one or more lines are too long
369
agent/templates/title_chunker.json
Normal file
369
agent/templates/title_chunker.json
Normal file
File diff suppressed because one or more lines are too long
@@ -53,12 +53,13 @@ class ExeSQLParam(ToolParamBase):
|
||||
self.max_records = 1024
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql', 'IBM DB2'])
|
||||
self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql', 'IBM DB2', 'trino'])
|
||||
self.check_empty(self.database, "Database name")
|
||||
self.check_empty(self.username, "database username")
|
||||
self.check_empty(self.host, "IP Address")
|
||||
self.check_positive_integer(self.port, "IP Port")
|
||||
self.check_empty(self.password, "Database password")
|
||||
if self.db_type != "trino":
|
||||
self.check_empty(self.password, "Database password")
|
||||
self.check_positive_integer(self.max_records, "Maximum number of records")
|
||||
if self.database == "rag_flow":
|
||||
if self.host == "ragflow-mysql":
|
||||
@@ -123,6 +124,45 @@ class ExeSQL(ToolBase, ABC):
|
||||
r'PWD=' + self._param.password
|
||||
)
|
||||
db = pyodbc.connect(conn_str)
|
||||
elif self._param.db_type == 'trino':
|
||||
try:
|
||||
import trino
|
||||
from trino.auth import BasicAuthentication
|
||||
except Exception:
|
||||
raise Exception("Missing dependency 'trino'. Please install: pip install trino")
|
||||
|
||||
def _parse_catalog_schema(db: str):
|
||||
if not db:
|
||||
return None, None
|
||||
if "." in db:
|
||||
c, s = db.split(".", 1)
|
||||
elif "/" in db:
|
||||
c, s = db.split("/", 1)
|
||||
else:
|
||||
c, s = db, "default"
|
||||
return c, s
|
||||
|
||||
catalog, schema = _parse_catalog_schema(self._param.database)
|
||||
if not catalog:
|
||||
raise Exception("For Trino, `database` must be 'catalog.schema' or at least 'catalog'.")
|
||||
|
||||
http_scheme = "https" if os.environ.get("TRINO_USE_TLS", "0") == "1" else "http"
|
||||
auth = None
|
||||
if http_scheme == "https" and self._param.password:
|
||||
auth = BasicAuthentication(self._param.username, self._param.password)
|
||||
|
||||
try:
|
||||
db = trino.dbapi.connect(
|
||||
host=self._param.host,
|
||||
port=int(self._param.port or 8080),
|
||||
user=self._param.username or "ragflow",
|
||||
catalog=catalog,
|
||||
schema=schema or "default",
|
||||
http_scheme=http_scheme,
|
||||
auth=auth
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception("Database Connection Failed! \n" + str(e))
|
||||
elif self._param.db_type == 'IBM DB2':
|
||||
import ibm_db
|
||||
conn_str = (
|
||||
|
||||
@@ -85,13 +85,7 @@ class PubMed(ToolBase, ABC):
|
||||
self._retrieve_chunks(pubmedcnt.findall("PubmedArticle"),
|
||||
get_title=lambda child: child.find("MedlineCitation").find("Article").find("ArticleTitle").text,
|
||||
get_url=lambda child: "https://pubmed.ncbi.nlm.nih.gov/" + child.find("MedlineCitation").find("PMID").text,
|
||||
get_content=lambda child: child.find("MedlineCitation") \
|
||||
.find("Article") \
|
||||
.find("Abstract") \
|
||||
.find("AbstractText").text \
|
||||
if child.find("MedlineCitation")\
|
||||
.find("Article").find("Abstract") \
|
||||
else "No abstract available")
|
||||
get_content=lambda child: self._format_pubmed_content(child),)
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
last_e = e
|
||||
@@ -104,5 +98,50 @@ class PubMed(ToolBase, ABC):
|
||||
|
||||
assert False, self.output()
|
||||
|
||||
def _format_pubmed_content(self, child):
|
||||
"""Extract structured reference info from PubMed XML"""
|
||||
def safe_find(path):
|
||||
node = child
|
||||
for p in path.split("/"):
|
||||
if node is None:
|
||||
return None
|
||||
node = node.find(p)
|
||||
return node.text if node is not None and node.text else None
|
||||
|
||||
title = safe_find("MedlineCitation/Article/ArticleTitle") or "No title"
|
||||
abstract = safe_find("MedlineCitation/Article/Abstract/AbstractText") or "No abstract available"
|
||||
journal = safe_find("MedlineCitation/Article/Journal/Title") or "Unknown Journal"
|
||||
volume = safe_find("MedlineCitation/Article/Journal/JournalIssue/Volume") or "-"
|
||||
issue = safe_find("MedlineCitation/Article/Journal/JournalIssue/Issue") or "-"
|
||||
pages = safe_find("MedlineCitation/Article/Pagination/MedlinePgn") or "-"
|
||||
|
||||
# Authors
|
||||
authors = []
|
||||
for author in child.findall(".//AuthorList/Author"):
|
||||
lastname = safe_find("LastName") or ""
|
||||
forename = safe_find("ForeName") or ""
|
||||
fullname = f"{forename} {lastname}".strip()
|
||||
if fullname:
|
||||
authors.append(fullname)
|
||||
authors_str = ", ".join(authors) if authors else "Unknown Authors"
|
||||
|
||||
# DOI
|
||||
doi = None
|
||||
for eid in child.findall(".//ArticleId"):
|
||||
if eid.attrib.get("IdType") == "doi":
|
||||
doi = eid.text
|
||||
break
|
||||
|
||||
return (
|
||||
f"Title: {title}\n"
|
||||
f"Authors: {authors_str}\n"
|
||||
f"Journal: {journal}\n"
|
||||
f"Volume: {volume}\n"
|
||||
f"Issue: {issue}\n"
|
||||
f"Pages: {pages}\n"
|
||||
f"DOI: {doi or '-'}\n"
|
||||
f"Abstract: {abstract.strip()}"
|
||||
)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Looking for scholarly papers on `{}`,” prioritising reputable sources.".format(self.get_input().get("query", "-_-!"))
|
||||
|
||||
@@ -18,12 +18,14 @@ import re
|
||||
from abc import ABC
|
||||
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
|
||||
from api.db import LLMType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.dialog_service import meta_filter
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api import settings
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.app.tag import label_question
|
||||
from rag.prompts.generator import cross_languages, kb_prompt
|
||||
from rag.prompts.generator import cross_languages, kb_prompt, gen_meta_filter
|
||||
|
||||
|
||||
class RetrievalParam(ToolParamBase):
|
||||
@@ -57,6 +59,8 @@ class RetrievalParam(ToolParamBase):
|
||||
self.empty_response = ""
|
||||
self.use_kg = False
|
||||
self.cross_languages = []
|
||||
self.toc_enhance = False
|
||||
self.meta_data_filter={}
|
||||
|
||||
def check(self):
|
||||
self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
|
||||
@@ -116,12 +120,27 @@ class Retrieval(ToolBase, ABC):
|
||||
vars = self.get_input_elements_from_text(kwargs["query"])
|
||||
vars = {k:o["value"] for k,o in vars.items()}
|
||||
query = self.string_format(kwargs["query"], vars)
|
||||
|
||||
doc_ids=[]
|
||||
if self._param.meta_data_filter!={}:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if self._param.meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
|
||||
filters = gen_meta_filter(chat_mdl, metas, query)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif self._param.meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, self._param.meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
|
||||
if self._param.cross_languages:
|
||||
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
||||
|
||||
if kbs:
|
||||
query = re.sub(r"^user[::\s]*", "", query, flags=re.IGNORECASE)
|
||||
kbinfos = settings.retrievaler.retrieval(
|
||||
kbinfos = settings.retriever.retrieval(
|
||||
query,
|
||||
embd_mdl,
|
||||
[kb.tenant_id for kb in kbs],
|
||||
@@ -130,12 +149,18 @@ class Retrieval(ToolBase, ABC):
|
||||
self._param.top_n,
|
||||
self._param.similarity_threshold,
|
||||
1 - self._param.keywords_similarity_weight,
|
||||
doc_ids=doc_ids,
|
||||
aggs=False,
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=label_question(query, kbs),
|
||||
)
|
||||
if self._param.toc_enhance:
|
||||
chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT)
|
||||
cks = settings.retriever.retrieval_by_toc(query, kbinfos["chunks"], [kb.tenant_id for kb in kbs], chat_mdl, self._param.top_n)
|
||||
if cks:
|
||||
kbinfos["chunks"] = cks
|
||||
if self._param.use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(query,
|
||||
ck = settings.kg_retriever.retrieval(query,
|
||||
[kb.tenant_id for kb in kbs],
|
||||
kb_ids,
|
||||
embd_mdl,
|
||||
@@ -146,7 +171,7 @@ class Retrieval(ToolBase, ABC):
|
||||
kbinfos = {"chunks": [], "doc_aggs": []}
|
||||
|
||||
if self._param.use_kg and kbs:
|
||||
ck = settings.kg_retrievaler.retrieval(query, [kb.tenant_id for kb in kbs], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
|
||||
ck = settings.kg_retriever.retrieval(query, [kb.tenant_id for kb in kbs], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ck["content"] = ck["content_with_weight"]
|
||||
del ck["content_with_weight"]
|
||||
|
||||
@@ -52,6 +52,40 @@ def create_app() -> FastAPI:
|
||||
openapi_url="/apispec.json"
|
||||
)
|
||||
|
||||
# 自定义 OpenAPI schema 以支持 Bearer Token 认证
|
||||
def custom_openapi():
|
||||
if app.openapi_schema:
|
||||
return app.openapi_schema
|
||||
from fastapi.openapi.utils import get_openapi
|
||||
openapi_schema = get_openapi(
|
||||
title=app.title,
|
||||
version=app.version,
|
||||
description=app.description,
|
||||
routes=app.routes,
|
||||
)
|
||||
# 添加安全方案定义(HTTPBearer 会自动注册为 "HTTPBearer")
|
||||
# 如果 components 不存在,先创建它
|
||||
if "components" not in openapi_schema:
|
||||
openapi_schema["components"] = {}
|
||||
if "securitySchemes" not in openapi_schema["components"]:
|
||||
openapi_schema["components"]["securitySchemes"] = {}
|
||||
|
||||
# 移除 CustomHTTPBearer(如果存在),只保留 HTTPBearer
|
||||
if "CustomHTTPBearer" in openapi_schema["components"]["securitySchemes"]:
|
||||
del openapi_schema["components"]["securitySchemes"]["CustomHTTPBearer"]
|
||||
|
||||
# 添加/更新 HTTPBearer 安全方案(FastAPI 默认名称)
|
||||
openapi_schema["components"]["securitySchemes"]["HTTPBearer"] = {
|
||||
"type": "http",
|
||||
"scheme": "bearer",
|
||||
"bearerFormat": "JWT",
|
||||
"description": "输入 token,可以不带 'Bearer ' 前缀,系统会自动添加"
|
||||
}
|
||||
app.openapi_schema = openapi_schema
|
||||
return app.openapi_schema
|
||||
|
||||
app.openapi = custom_openapi
|
||||
|
||||
# 添加CORS中间件
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -124,22 +158,20 @@ def setup_routes(app: FastAPI):
|
||||
from api.apps.user_app_fastapi import router as user_router
|
||||
from api.apps.kb_app import router as kb_router
|
||||
from api.apps.document_app import router as document_router
|
||||
from api.apps.file_app import router as file_router
|
||||
from api.apps.file2document_app import router as file2document_router
|
||||
from api.apps.mcp_server_app import router as mcp_router
|
||||
from api.apps.tenant_app import router as tenant_router
|
||||
from api.apps.llm_app import router as llm_router
|
||||
from api.apps.chunk_app import router as chunk_router
|
||||
from api.apps.mcp_server_app import router as mcp_router
|
||||
from api.apps.canvas_app import router as canvas_router
|
||||
|
||||
app.include_router(user_router, prefix=f"/{API_VERSION}/user", tags=["User"])
|
||||
app.include_router(kb_router, prefix=f"/{API_VERSION}/kb", tags=["KB"])
|
||||
app.include_router(kb_router, prefix=f"/{API_VERSION}/kb", tags=["KnowledgeBase"])
|
||||
app.include_router(document_router, prefix=f"/{API_VERSION}/document", tags=["Document"])
|
||||
app.include_router(file_router, prefix=f"/{API_VERSION}/file", tags=["File"])
|
||||
app.include_router(file2document_router, prefix=f"/{API_VERSION}/file2document", tags=["File2Document"])
|
||||
app.include_router(mcp_router, prefix=f"/{API_VERSION}/mcp", tags=["MCP"])
|
||||
app.include_router(tenant_router, prefix=f"/{API_VERSION}/tenant", tags=["Tenant"])
|
||||
app.include_router(llm_router, prefix=f"/{API_VERSION}/llm", tags=["LLM"])
|
||||
app.include_router(chunk_router, prefix=f"/{API_VERSION}/chunk", tags=["Chunk"])
|
||||
app.include_router(mcp_router, prefix=f"/{API_VERSION}/mcp", tags=["MCP"])
|
||||
app.include_router(canvas_router, prefix=f"/{API_VERSION}/canvas", tags=["Canvas"])
|
||||
|
||||
|
||||
|
||||
def get_current_user_from_token(authorization: str):
|
||||
"""从token获取当前用户"""
|
||||
|
||||
@@ -490,7 +490,7 @@ def upload():
|
||||
|
||||
@manager.route('/document/upload_and_parse', methods=['POST']) # noqa: F821
|
||||
@validate_request("conversation_id")
|
||||
async def upload_parse():
|
||||
def upload_parse():
|
||||
token = request.headers.get('Authorization').split()[1]
|
||||
objs = APIToken.query(token=token)
|
||||
if not objs:
|
||||
@@ -507,7 +507,7 @@ async def upload_parse():
|
||||
return get_json_result(
|
||||
data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
doc_ids = await doc_upload_and_parse(request.form.get("conversation_id"), file_objs, objs[0].tenant_id)
|
||||
doc_ids = doc_upload_and_parse(request.form.get("conversation_id"), file_objs, objs[0].tenant_id)
|
||||
return get_json_result(data=doc_ids)
|
||||
|
||||
|
||||
@@ -536,7 +536,7 @@ def list_chunks():
|
||||
)
|
||||
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
||||
|
||||
res = settings.retrievaler.chunk_list(doc_id, tenant_id, kb_ids)
|
||||
res = settings.retriever.chunk_list(doc_id, tenant_id, kb_ids)
|
||||
res = [
|
||||
{
|
||||
"content": res_item["content_with_weight"],
|
||||
@@ -884,7 +884,7 @@ def retrieval():
|
||||
if req.get("keyword", False):
|
||||
chat_mdl = LLMBundle(kbs[0].tenant_id, LLMType.CHAT)
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
ranks = settings.retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
|
||||
ranks = settings.retriever.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
|
||||
similarity_threshold, vector_similarity_weight, top,
|
||||
doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
|
||||
rank_feature=label_question(question, kbs))
|
||||
|
||||
@@ -18,11 +18,12 @@ import logging
|
||||
import re
|
||||
import sys
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import flask
|
||||
import trio
|
||||
from flask import request, Response
|
||||
from flask_login import login_required, current_user
|
||||
from fastapi import APIRouter, Depends, Query, Header, UploadFile, File, Form
|
||||
from fastapi.responses import StreamingResponse, Response
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
|
||||
from agent.component import LLM
|
||||
from api import settings
|
||||
@@ -36,7 +37,24 @@ from api.db.services.user_service import TenantService
|
||||
from api.db.services.user_canvas_version import UserCanvasVersionService
|
||||
from api.settings import RetCode
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
||||
from api.utils.api_utils import get_json_result, server_error_response, get_data_error_result
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.canvas_models import (
|
||||
DeleteCanvasRequest,
|
||||
SaveCanvasRequest,
|
||||
CompletionRequest,
|
||||
RerunRequest,
|
||||
ResetCanvasRequest,
|
||||
InputFormQuery,
|
||||
DebugRequest,
|
||||
TestDBConnectRequest,
|
||||
ListCanvasQuery,
|
||||
SettingRequest,
|
||||
TraceQuery,
|
||||
ListSessionsQuery,
|
||||
DownloadQuery,
|
||||
UploadQuery,
|
||||
)
|
||||
from agent.canvas import Canvas
|
||||
from peewee import MySQLDatabase, PostgresqlDatabase
|
||||
from api.db.db_models import APIToken, Task
|
||||
@@ -47,18 +65,25 @@ from rag.flow.pipeline import Pipeline
|
||||
from rag.nlp import search
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
|
||||
@manager.route('/templates', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def templates():
|
||||
return get_json_result(data=[c.to_dict() for c in CanvasTemplateService.query(canvas_category=CanvasCategory.Agent)])
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@manager.route('/rm', methods=['POST']) # noqa: F821
|
||||
@validate_request("canvas_ids")
|
||||
@login_required
|
||||
def rm():
|
||||
for i in request.json["canvas_ids"]:
|
||||
@router.get('/templates')
|
||||
async def templates(
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取画布模板列表"""
|
||||
return get_json_result(data=[c.to_dict() for c in CanvasTemplateService.get_all()])
|
||||
|
||||
|
||||
@router.post('/rm')
|
||||
async def rm(
|
||||
request: DeleteCanvasRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除画布"""
|
||||
for i in request.canvas_ids:
|
||||
if not UserCanvasService.accessible(i, current_user.id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
@@ -67,16 +92,18 @@ def rm():
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route('/set', methods=['POST']) # noqa: F821
|
||||
@validate_request("dsl", "title")
|
||||
@login_required
|
||||
def save():
|
||||
req = request.json
|
||||
@router.post('/set')
|
||||
async def save(
|
||||
request: SaveCanvasRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""保存画布"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
if not isinstance(req["dsl"], str):
|
||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
req["dsl"] = json.loads(req["dsl"])
|
||||
cate = req.get("canvas_category", CanvasCategory.Agent)
|
||||
if "id" not in req:
|
||||
if "id" not in req or not req.get("id"):
|
||||
req["user_id"] = current_user.id
|
||||
if UserCanvasService.query(user_id=current_user.id, title=req["title"].strip(), canvas_category=cate):
|
||||
return get_data_error_result(message=f"{req['title'].strip()} already exists.")
|
||||
@@ -95,21 +122,28 @@ def save():
|
||||
return get_json_result(data=req)
|
||||
|
||||
|
||||
@manager.route('/get/<canvas_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def get(canvas_id):
|
||||
@router.get('/get/{canvas_id}')
|
||||
async def get(
|
||||
canvas_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取画布详情"""
|
||||
if not UserCanvasService.accessible(canvas_id, current_user.id):
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
e, c = UserCanvasService.get_by_canvas_id(canvas_id)
|
||||
return get_json_result(data=c)
|
||||
|
||||
|
||||
@manager.route('/getsse/<canvas_id>', methods=['GET']) # type: ignore # noqa: F821
|
||||
def getsse(canvas_id):
|
||||
token = request.headers.get('Authorization').split()
|
||||
if len(token) != 2:
|
||||
@router.get('/getsse/{canvas_id}')
|
||||
async def getsse(
|
||||
canvas_id: str,
|
||||
authorization: str = Header(..., description="Authorization header")
|
||||
):
|
||||
"""获取画布详情(SSE,通过API token认证)"""
|
||||
token_parts = authorization.split()
|
||||
if len(token_parts) != 2:
|
||||
return get_data_error_result(message='Authorization is not valid!"')
|
||||
token = token[1]
|
||||
token = token_parts[1]
|
||||
objs = APIToken.query(beta=token)
|
||||
if not objs:
|
||||
return get_data_error_result(message='Authentication error: API key is invalid!"')
|
||||
@@ -126,21 +160,23 @@ def getsse(canvas_id):
|
||||
return get_json_result(data=c.to_dict())
|
||||
|
||||
|
||||
@manager.route('/completion', methods=['POST']) # noqa: F821
|
||||
@validate_request("id")
|
||||
@login_required
|
||||
def run():
|
||||
req = request.json
|
||||
query = req.get("query", "")
|
||||
files = req.get("files", [])
|
||||
inputs = req.get("inputs", {})
|
||||
user_id = req.get("user_id", current_user.id)
|
||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
||||
@router.post('/completion')
|
||||
async def run(
|
||||
request: CompletionRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""运行画布(完成/执行)"""
|
||||
query = request.query or ""
|
||||
files = request.files or []
|
||||
inputs = request.inputs or {}
|
||||
user_id = request.user_id or current_user.id
|
||||
|
||||
if not UserCanvasService.accessible(request.id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
e, cvs = UserCanvasService.get_by_id(req["id"])
|
||||
e, cvs = UserCanvasService.get_by_id(request.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
|
||||
@@ -149,43 +185,48 @@ def run():
|
||||
|
||||
if cvs.canvas_category == CanvasCategory.DataFlow:
|
||||
task_id = get_uuid()
|
||||
Pipeline(cvs.dsl, tenant_id=current_user.id, doc_id=CANVAS_DEBUG_DOC_ID, task_id=task_id, flow_id=req["id"])
|
||||
ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=req["id"], task_id=task_id, file=files[0], priority=0)
|
||||
Pipeline(cvs.dsl, tenant_id=current_user.id, doc_id=CANVAS_DEBUG_DOC_ID, task_id=task_id, flow_id=request.id)
|
||||
ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=request.id, task_id=task_id, file=files[0] if files else None, priority=0)
|
||||
if not ok:
|
||||
return get_data_error_result(message=error_message)
|
||||
return get_json_result(data={"message_id": task_id})
|
||||
|
||||
try:
|
||||
canvas = Canvas(cvs.dsl, current_user.id, req["id"])
|
||||
canvas = Canvas(cvs.dsl, current_user.id, request.id)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
def sse():
|
||||
async def sse():
|
||||
nonlocal canvas, user_id
|
||||
try:
|
||||
for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
|
||||
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
||||
|
||||
cvs.dsl = json.loads(str(canvas))
|
||||
UserCanvasService.update_by_id(req["id"], cvs.to_dict())
|
||||
UserCanvasService.update_by_id(request.id, cvs.to_dict())
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": False}, ensure_ascii=False) + "\n\n"
|
||||
|
||||
resp = Response(sse(), mimetype="text/event-stream")
|
||||
resp.headers.add_header("Cache-control", "no-cache")
|
||||
resp.headers.add_header("Connection", "keep-alive")
|
||||
resp.headers.add_header("X-Accel-Buffering", "no")
|
||||
resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
|
||||
return resp
|
||||
return StreamingResponse(
|
||||
sse(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"Content-Type": "text/event-stream; charset=utf-8"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@manager.route('/rerun', methods=['POST']) # noqa: F821
|
||||
@validate_request("id", "dsl", "component_id")
|
||||
@login_required
|
||||
def rerun():
|
||||
req = request.json
|
||||
doc = PipelineOperationLogService.get_documents_info(req["id"])
|
||||
@router.post('/rerun')
|
||||
async def rerun(
|
||||
request: RerunRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""重新运行流水线"""
|
||||
doc = PipelineOperationLogService.get_documents_info(request.id)
|
||||
if not doc:
|
||||
return get_data_error_result(message="Document not found.")
|
||||
doc = doc[0]
|
||||
@@ -198,19 +239,22 @@ def rerun():
|
||||
doc["chunk_num"] = 0
|
||||
doc["token_num"] = 0
|
||||
DocumentService.clear_chunk_num_when_rerun(doc["id"])
|
||||
DocumentService.update_by_id(id, doc)
|
||||
TaskService.filter_delete([Task.doc_id == id])
|
||||
DocumentService.update_by_id(request.id, doc)
|
||||
TaskService.filter_delete([Task.doc_id == request.id])
|
||||
|
||||
dsl = req["dsl"]
|
||||
dsl["path"] = [req["component_id"]]
|
||||
PipelineOperationLogService.update_by_id(req["id"], {"dsl": dsl})
|
||||
queue_dataflow(tenant_id=current_user.id, flow_id=req["id"], task_id=get_uuid(), doc_id=doc["id"], priority=0, rerun=True)
|
||||
dsl = request.dsl
|
||||
dsl["path"] = [request.component_id]
|
||||
PipelineOperationLogService.update_by_id(request.id, {"dsl": dsl})
|
||||
queue_dataflow(tenant_id=current_user.id, flow_id=request.id, task_id=get_uuid(), doc_id=doc["id"], priority=0, rerun=True)
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route('/cancel/<task_id>', methods=['PUT']) # noqa: F821
|
||||
@login_required
|
||||
def cancel(task_id):
|
||||
@router.put('/cancel/{task_id}')
|
||||
async def cancel(
|
||||
task_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""取消任务"""
|
||||
try:
|
||||
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
||||
except Exception as e:
|
||||
@@ -218,36 +262,43 @@ def cancel(task_id):
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route('/reset', methods=['POST']) # noqa: F821
|
||||
@validate_request("id")
|
||||
@login_required
|
||||
def reset():
|
||||
req = request.json
|
||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
||||
@router.post('/reset')
|
||||
async def reset(
|
||||
request: ResetCanvasRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""重置画布"""
|
||||
if not UserCanvasService.accessible(request.id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
try:
|
||||
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
||||
e, user_canvas = UserCanvasService.get_by_id(request.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
|
||||
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
||||
canvas.reset()
|
||||
req["dsl"] = json.loads(str(canvas))
|
||||
UserCanvasService.update_by_id(req["id"], {"dsl": req["dsl"]})
|
||||
return get_json_result(data=req["dsl"])
|
||||
dsl = json.loads(str(canvas))
|
||||
UserCanvasService.update_by_id(request.id, {"dsl": dsl})
|
||||
return get_json_result(data=dsl)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/upload/<canvas_id>", methods=["POST"]) # noqa: F821
|
||||
def upload(canvas_id):
|
||||
@router.post("/upload/{canvas_id}")
|
||||
async def upload(
|
||||
canvas_id: str,
|
||||
url: Optional[str] = Query(None, description="URL(可选,用于从URL下载)"),
|
||||
file: Optional[UploadFile] = File(None, description="上传的文件")
|
||||
):
|
||||
"""上传文件到画布"""
|
||||
e, cvs = UserCanvasService.get_by_canvas_id(canvas_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
|
||||
user_id = cvs["user_id"]
|
||||
|
||||
def structured(filename, filetype, blob, content_type):
|
||||
nonlocal user_id
|
||||
if filetype == FileType.PDF.value:
|
||||
@@ -267,7 +318,7 @@ def upload(canvas_id):
|
||||
"preview_url": None
|
||||
}
|
||||
|
||||
if request.args.get("url"):
|
||||
if url:
|
||||
from crawl4ai import (
|
||||
AsyncWebCrawler,
|
||||
BrowserConfig,
|
||||
@@ -277,7 +328,6 @@ def upload(canvas_id):
|
||||
CrawlResult
|
||||
)
|
||||
try:
|
||||
url = request.args.get("url")
|
||||
filename = re.sub(r"\?.*", "", url.split("/")[-1])
|
||||
async def adownload():
|
||||
browser_config = BrowserConfig(
|
||||
@@ -301,61 +351,67 @@ def upload(canvas_id):
|
||||
if page.pdf:
|
||||
if filename.split(".")[-1].lower() != "pdf":
|
||||
filename += ".pdf"
|
||||
return get_json_result(data=structured(filename, "pdf", page.pdf, page.response_headers["content-type"]))
|
||||
return get_json_result(data=structured(filename, "pdf", page.pdf, page.response_headers.get("content-type", "application/pdf")))
|
||||
|
||||
return get_json_result(data=structured(filename, "html", str(page.markdown).encode("utf-8"), page.response_headers["content-type"], user_id))
|
||||
return get_json_result(data=structured(filename, "html", str(page.markdown).encode("utf-8"), page.response_headers.get("content-type", "text/html"), user_id))
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
file = request.files['file']
|
||||
if not file:
|
||||
return get_data_error_result(message="No file provided.")
|
||||
|
||||
try:
|
||||
DocumentService.check_doc_health(user_id, file.filename)
|
||||
return get_json_result(data=structured(file.filename, filename_type(file.filename), file.read(), file.content_type))
|
||||
blob = await file.read()
|
||||
return get_json_result(data=structured(file.filename, filename_type(file.filename), blob, file.content_type or "application/octet-stream"))
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/input_form', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def input_form():
|
||||
cvs_id = request.args.get("id")
|
||||
cpn_id = request.args.get("component_id")
|
||||
@router.get('/input_form')
|
||||
async def input_form(
|
||||
id: str = Query(..., description="画布ID"),
|
||||
component_id: str = Query(..., description="组件ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取组件输入表单"""
|
||||
try:
|
||||
e, user_canvas = UserCanvasService.get_by_id(cvs_id)
|
||||
e, user_canvas = UserCanvasService.get_by_id(id)
|
||||
if not e:
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
if not UserCanvasService.query(user_id=current_user.id, id=cvs_id):
|
||||
if not UserCanvasService.query(user_id=current_user.id, id=id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
||||
return get_json_result(data=canvas.get_component_input_form(cpn_id))
|
||||
return get_json_result(data=canvas.get_component_input_form(component_id))
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/debug', methods=['POST']) # noqa: F821
|
||||
@validate_request("id", "component_id", "params")
|
||||
@login_required
|
||||
def debug():
|
||||
req = request.json
|
||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
||||
@router.post('/debug')
|
||||
async def debug(
|
||||
request: DebugRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""调试组件"""
|
||||
if not UserCanvasService.accessible(request.id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
try:
|
||||
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
||||
e, user_canvas = UserCanvasService.get_by_id(request.id)
|
||||
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
||||
canvas.reset()
|
||||
canvas.message_id = get_uuid()
|
||||
component = canvas.get_component(req["component_id"])["obj"]
|
||||
component = canvas.get_component(request.component_id)["obj"]
|
||||
component.reset()
|
||||
|
||||
if isinstance(component, LLM):
|
||||
component.set_debug_inputs(req["params"])
|
||||
component.invoke(**{k: o["value"] for k,o in req["params"].items()})
|
||||
component.set_debug_inputs(request.params)
|
||||
component.invoke(**{k: o["value"] for k,o in request.params.items()})
|
||||
outputs = component.output()
|
||||
for k in outputs.keys():
|
||||
if isinstance(outputs[k], partial):
|
||||
@@ -368,11 +424,13 @@ def debug():
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/test_db_connect', methods=['POST']) # noqa: F821
|
||||
@validate_request("db_type", "database", "username", "host", "port", "password")
|
||||
@login_required
|
||||
def test_db_connect():
|
||||
req = request.json
|
||||
@router.post('/test_db_connect')
|
||||
async def test_db_connect(
|
||||
request: TestDBConnectRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""测试数据库连接"""
|
||||
req = request.model_dump()
|
||||
try:
|
||||
if req["db_type"] in ["mysql", "mariadb"]:
|
||||
db = MySQLDatabase(req["database"], user=req["username"], host=req["host"], port=req["port"],
|
||||
@@ -409,6 +467,49 @@ def test_db_connect():
|
||||
ibm_db.fetch_assoc(stmt)
|
||||
ibm_db.close(conn)
|
||||
return get_json_result(data="Database Connection Successful!")
|
||||
elif req["db_type"] == 'trino':
|
||||
def _parse_catalog_schema(db: str):
|
||||
if not db:
|
||||
return None, None
|
||||
if "." in db:
|
||||
c, s = db.split(".", 1)
|
||||
elif "/" in db:
|
||||
c, s = db.split("/", 1)
|
||||
else:
|
||||
c, s = db, "default"
|
||||
return c, s
|
||||
try:
|
||||
import trino
|
||||
import os
|
||||
from trino.auth import BasicAuthentication
|
||||
except Exception:
|
||||
return server_error_response("Missing dependency 'trino'. Please install: pip install trino")
|
||||
|
||||
catalog, schema = _parse_catalog_schema(req["database"])
|
||||
if not catalog:
|
||||
return server_error_response("For Trino, 'database' must be 'catalog.schema' or at least 'catalog'.")
|
||||
|
||||
http_scheme = "https" if os.environ.get("TRINO_USE_TLS", "0") == "1" else "http"
|
||||
|
||||
auth = None
|
||||
if http_scheme == "https" and req.get("password"):
|
||||
auth = BasicAuthentication(req.get("username") or "ragflow", req["password"])
|
||||
|
||||
conn = trino.dbapi.connect(
|
||||
host=req["host"],
|
||||
port=int(req["port"] or 8080),
|
||||
user=req["username"] or "ragflow",
|
||||
catalog=catalog,
|
||||
schema=schema or "default",
|
||||
http_scheme=http_scheme,
|
||||
auth=auth
|
||||
)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT 1")
|
||||
cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
return get_json_result(data="Database Connection Successful!")
|
||||
else:
|
||||
return server_error_response("Unsupported database type.")
|
||||
if req["db_type"] != 'mssql':
|
||||
@@ -421,42 +522,48 @@ def test_db_connect():
|
||||
|
||||
|
||||
#api get list version dsl of canvas
|
||||
@manager.route('/getlistversion/<canvas_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def getlistversion(canvas_id):
|
||||
@router.get('/getlistversion/{canvas_id}')
|
||||
async def getlistversion(
|
||||
canvas_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取画布版本列表"""
|
||||
try:
|
||||
list =sorted([c.to_dict() for c in UserCanvasVersionService.list_by_canvas_id(canvas_id)], key=lambda x: x["update_time"]*-1)
|
||||
list = sorted([c.to_dict() for c in UserCanvasVersionService.list_by_canvas_id(canvas_id)], key=lambda x: x["update_time"]*-1)
|
||||
return get_json_result(data=list)
|
||||
except Exception as e:
|
||||
return get_data_error_result(message=f"Error getting history files: {e}")
|
||||
|
||||
|
||||
#api get version dsl of canvas
|
||||
@manager.route('/getversion/<version_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def getversion( version_id):
|
||||
@router.get('/getversion/{version_id}')
|
||||
async def getversion(
|
||||
version_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取画布版本详情"""
|
||||
try:
|
||||
|
||||
e, version = UserCanvasVersionService.get_by_id(version_id)
|
||||
if version:
|
||||
return get_json_result(data=version.to_dict())
|
||||
return get_data_error_result(message="Version not found.")
|
||||
except Exception as e:
|
||||
return get_json_result(data=f"Error getting history file: {e}")
|
||||
return get_data_error_result(message=f"Error getting history file: {e}")
|
||||
|
||||
|
||||
@manager.route('/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def list_canvas():
|
||||
keywords = request.args.get("keywords", "")
|
||||
page_number = int(request.args.get("page", 0))
|
||||
items_per_page = int(request.args.get("page_size", 0))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
canvas_category = request.args.get("canvas_category")
|
||||
if request.args.get("desc", "true").lower() == "false":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
owner_ids = [id for id in request.args.get("owner_ids", "").strip().split(",") if id]
|
||||
@router.get('/list')
|
||||
async def list_canvas(
|
||||
query: ListCanvasQuery = Depends(),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出画布"""
|
||||
keywords = query.keywords or ""
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
orderby = query.orderby or "create_time"
|
||||
canvas_category = query.canvas_category
|
||||
desc = query.desc.lower() == "false" if query.desc else True
|
||||
owner_ids = [id for id in (query.owner_ids or "").strip().split(",") if id]
|
||||
|
||||
if not owner_ids:
|
||||
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
||||
tenants = [m["tenant_id"] for m in tenants]
|
||||
@@ -472,68 +579,73 @@ def list_canvas():
|
||||
return get_json_result(data={"canvas": canvas, "total": total})
|
||||
|
||||
|
||||
@manager.route('/setting', methods=['POST']) # noqa: F821
|
||||
@validate_request("id", "title", "permission")
|
||||
@login_required
|
||||
def setting():
|
||||
req = request.json
|
||||
req["user_id"] = current_user.id
|
||||
|
||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
||||
@router.post('/setting')
|
||||
async def setting(
|
||||
request: SettingRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""设置画布"""
|
||||
if not UserCanvasService.accessible(request.id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
e,flow = UserCanvasService.get_by_id(req["id"])
|
||||
e, flow = UserCanvasService.get_by_id(request.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="canvas not found.")
|
||||
flow = flow.to_dict()
|
||||
flow["title"] = req["title"]
|
||||
flow["title"] = request.title
|
||||
|
||||
for key in ["description", "permission", "avatar"]:
|
||||
if value := req.get(key):
|
||||
value = getattr(request, key, None)
|
||||
if value:
|
||||
flow[key] = value
|
||||
|
||||
num= UserCanvasService.update_by_id(req["id"], flow)
|
||||
num = UserCanvasService.update_by_id(request.id, flow)
|
||||
return get_json_result(data=num)
|
||||
|
||||
|
||||
@manager.route('/trace', methods=['GET']) # noqa: F821
|
||||
def trace():
|
||||
cvs_id = request.args.get("canvas_id")
|
||||
msg_id = request.args.get("message_id")
|
||||
@router.get('/trace')
|
||||
async def trace(
|
||||
canvas_id: str = Query(..., description="画布ID"),
|
||||
message_id: str = Query(..., description="消息ID")
|
||||
):
|
||||
"""追踪日志"""
|
||||
try:
|
||||
bin = REDIS_CONN.get(f"{cvs_id}-{msg_id}-logs")
|
||||
bin = REDIS_CONN.get(f"{canvas_id}-{message_id}-logs")
|
||||
if not bin:
|
||||
return get_json_result(data={})
|
||||
|
||||
return get_json_result(data=json.loads(bin.encode("utf-8")))
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/<canvas_id>/sessions', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def sessions(canvas_id):
|
||||
@router.get('/{canvas_id}/sessions')
|
||||
async def sessions(
|
||||
canvas_id: str,
|
||||
query: ListSessionsQuery = Depends(),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出画布会话"""
|
||||
tenant_id = current_user.id
|
||||
if not UserCanvasService.accessible(canvas_id, tenant_id):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of canvas authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
user_id = request.args.get("user_id")
|
||||
page_number = int(request.args.get("page", 1))
|
||||
items_per_page = int(request.args.get("page_size", 30))
|
||||
keywords = request.args.get("keywords")
|
||||
from_date = request.args.get("from_date")
|
||||
to_date = request.args.get("to_date")
|
||||
orderby = request.args.get("orderby", "update_time")
|
||||
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
user_id = query.user_id
|
||||
page_number = int(query.page or 1)
|
||||
items_per_page = int(query.page_size or 30)
|
||||
keywords = query.keywords
|
||||
from_date = query.from_date
|
||||
to_date = query.to_date
|
||||
orderby = query.orderby or "update_time"
|
||||
desc = query.desc.lower() in ["false", "False"] if query.desc else True
|
||||
# dsl defaults to True in all cases except for False and false
|
||||
include_dsl = request.args.get("dsl") != "False" and request.args.get("dsl") != "false"
|
||||
include_dsl = query.dsl.lower() not in ["false", "False"] if query.dsl else True
|
||||
|
||||
total, sess = API4ConversationService.get_list(canvas_id, tenant_id, page_number, items_per_page, orderby, desc,
|
||||
None, user_id, include_dsl, keywords, from_date, to_date)
|
||||
try:
|
||||
@@ -542,9 +654,11 @@ def sessions(canvas_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/prompts', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def prompts():
|
||||
@router.get('/prompts')
|
||||
async def prompts(
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取提示词模板"""
|
||||
from rag.prompts.generator import ANALYZE_TASK_SYSTEM, ANALYZE_TASK_USER, NEXT_STEP, REFLECT, CITATION_PROMPT_TEMPLATE
|
||||
return get_json_result(data={
|
||||
"task_analysis": ANALYZE_TASK_SYSTEM +"\n\n"+ ANALYZE_TASK_USER,
|
||||
@@ -556,9 +670,11 @@ def prompts():
|
||||
})
|
||||
|
||||
|
||||
@manager.route('/download', methods=['GET']) # noqa: F821
|
||||
def download():
|
||||
id = request.args.get("id")
|
||||
created_by = request.args.get("created_by")
|
||||
@router.get('/download')
|
||||
async def download(
|
||||
id: str = Query(..., description="文件ID"),
|
||||
created_by: str = Query(..., description="创建者ID")
|
||||
):
|
||||
"""下载文件"""
|
||||
blob = FileService.get_blob(created_by, id)
|
||||
return flask.make_response(blob)
|
||||
return Response(content=blob)
|
||||
@@ -16,10 +16,19 @@
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, List
|
||||
|
||||
import xxhash
|
||||
from fastapi import APIRouter, Depends, Query, HTTPException
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.chunk_models import (
|
||||
ListChunksRequest,
|
||||
SetChunkRequest,
|
||||
SwitchChunksRequest,
|
||||
DeleteChunksRequest,
|
||||
CreateChunkRequest,
|
||||
RetrievalTestRequest,
|
||||
)
|
||||
|
||||
from api import settings
|
||||
from api.db import LLMType, ParserType
|
||||
@@ -29,17 +38,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.search_service import SearchService
|
||||
from api.db.services.user_service import UserTenantService
|
||||
from api.models.chunk_models import (
|
||||
ListChunkRequest,
|
||||
GetChunkRequest,
|
||||
SetChunkRequest,
|
||||
SwitchChunkRequest,
|
||||
RemoveChunkRequest,
|
||||
CreateChunkRequest,
|
||||
RetrievalTestRequest,
|
||||
KnowledgeGraphRequest
|
||||
)
|
||||
from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, get_current_user
|
||||
from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response
|
||||
from rag.app.qa import beAdoc, rmPrefix
|
||||
from rag.app.tag import label_question
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
@@ -47,19 +46,20 @@ from rag.prompts.generator import gen_meta_filter, cross_languages, keyword_extr
|
||||
from rag.settings import PAGERANK_FLD
|
||||
from rag.utils import rmSpace
|
||||
|
||||
# 创建 FastAPI 路由器
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post('/list')
|
||||
async def list_chunk(
|
||||
request: ListChunkRequest,
|
||||
request: ListChunksRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出文档块"""
|
||||
doc_id = request.doc_id
|
||||
page = request.page
|
||||
size = request.size
|
||||
question = request.keywords
|
||||
page = request.page or 1
|
||||
size = request.size or 30
|
||||
question = request.keywords or ""
|
||||
try:
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
@@ -73,7 +73,7 @@ async def list_chunk(
|
||||
}
|
||||
if request.available_int is not None:
|
||||
query["available_int"] = int(request.available_int)
|
||||
sres = settings.retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
|
||||
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
@@ -105,6 +105,7 @@ async def get(
|
||||
chunk_id: str = Query(..., description="块ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取文档块"""
|
||||
try:
|
||||
chunk = None
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
@@ -138,6 +139,7 @@ async def set(
|
||||
request: SetChunkRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""设置文档块"""
|
||||
d = {
|
||||
"id": request.chunk_id,
|
||||
"content_with_weight": request.content_with_weight}
|
||||
@@ -192,9 +194,10 @@ async def set(
|
||||
|
||||
@router.post('/switch')
|
||||
async def switch(
|
||||
request: SwitchChunkRequest,
|
||||
request: SwitchChunksRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""切换文档块状态"""
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(request.doc_id)
|
||||
if not e:
|
||||
@@ -212,9 +215,10 @@ async def switch(
|
||||
|
||||
@router.post('/rm')
|
||||
async def rm(
|
||||
request: RemoveChunkRequest,
|
||||
request: DeleteChunksRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除文档块"""
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(request.doc_id)
|
||||
@@ -240,15 +244,16 @@ async def create(
|
||||
request: CreateChunkRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""创建文档块"""
|
||||
chunck_id = xxhash.xxh64((request.content_with_weight + request.doc_id).encode("utf-8")).hexdigest()
|
||||
d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(request.content_with_weight),
|
||||
"content_with_weight": request.content_with_weight}
|
||||
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
||||
d["important_kwd"] = request.important_kwd
|
||||
d["important_kwd"] = request.important_kwd or []
|
||||
if not isinstance(d["important_kwd"], list):
|
||||
return get_data_error_result(message="`important_kwd` is required to be a list")
|
||||
d["important_tks"] = rag_tokenizer.tokenize(" ".join(d["important_kwd"]))
|
||||
d["question_kwd"] = request.question_kwd
|
||||
d["question_kwd"] = request.question_kwd or []
|
||||
if not isinstance(d["question_kwd"], list):
|
||||
return get_data_error_result(message="`question_kwd` is required to be a list")
|
||||
d["question_tks"] = rag_tokenizer.tokenize("\n".join(d["question_kwd"]))
|
||||
@@ -296,8 +301,9 @@ async def retrieval_test(
|
||||
request: RetrievalTestRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
page = request.page
|
||||
size = request.size
|
||||
"""检索测试"""
|
||||
page = request.page or 1
|
||||
size = request.size or 30
|
||||
question = request.question
|
||||
kb_ids = request.kb_id
|
||||
if isinstance(kb_ids, str):
|
||||
@@ -306,10 +312,10 @@ async def retrieval_test(
|
||||
return get_json_result(data=False, message='Please specify dataset firstly.',
|
||||
code=settings.RetCode.DATA_ERROR)
|
||||
|
||||
doc_ids = request.doc_ids
|
||||
use_kg = request.use_kg
|
||||
top = request.top_k
|
||||
langs = request.cross_languages
|
||||
doc_ids = request.doc_ids or []
|
||||
use_kg = request.use_kg or False
|
||||
top = request.top_k or 1024
|
||||
langs = request.cross_languages or []
|
||||
tenant_ids = []
|
||||
|
||||
if request.search_id:
|
||||
@@ -358,15 +364,16 @@ async def retrieval_test(
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
|
||||
labels = label_question(question, [kb])
|
||||
ranks = settings.retrievaler.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
|
||||
float(request.similarity_threshold),
|
||||
float(request.vector_similarity_weight),
|
||||
ranks = settings.retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
|
||||
float(request.similarity_threshold or 0.0),
|
||||
float(request.vector_similarity_weight or 0.3),
|
||||
top,
|
||||
doc_ids, rerank_mdl=rerank_mdl, highlight=request.highlight,
|
||||
doc_ids, rerank_mdl=rerank_mdl,
|
||||
highlight=request.highlight or False,
|
||||
rank_feature=labels
|
||||
)
|
||||
if use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(question,
|
||||
ck = settings.kg_retriever.retrieval(question,
|
||||
tenant_ids,
|
||||
kb_ids,
|
||||
embd_mdl,
|
||||
@@ -391,13 +398,14 @@ async def knowledge_graph(
|
||||
doc_id: str = Query(..., description="文档ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取知识图谱"""
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
||||
req = {
|
||||
"doc_ids": [doc_id],
|
||||
"knowledge_graph_kwd": ["graph", "mind_map"]
|
||||
}
|
||||
sres = settings.retrievaler.search(req, search.index_name(tenant_id), kb_ids)
|
||||
sres = settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
for id in sres.ids[:2]:
|
||||
ty = sres.field[id]["knowledge_graph_kwd"]
|
||||
|
||||
@@ -17,15 +17,30 @@ import json
|
||||
import os.path
|
||||
import pathlib
|
||||
import re
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
from api.utils.api_utils import security
|
||||
from fastapi import APIRouter, Depends, Query, UploadFile, File, Form
|
||||
from fastapi.responses import Response
|
||||
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.document_models import (
|
||||
CreateDocumentRequest,
|
||||
WebCrawlRequest,
|
||||
ListDocumentsQuery,
|
||||
ListDocumentsBody,
|
||||
FilterDocumentsRequest,
|
||||
GetDocumentInfosRequest,
|
||||
ChangeStatusRequest,
|
||||
DeleteDocumentRequest,
|
||||
RunDocumentRequest,
|
||||
RenameDocumentRequest,
|
||||
ChangeParserRequest,
|
||||
ChangeParserSimpleRequest,
|
||||
UploadAndParseRequest,
|
||||
ParseRequest,
|
||||
SetMetaRequest,
|
||||
)
|
||||
from api import settings
|
||||
from api.common.check_team_permission import check_kb_team_permission
|
||||
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
|
||||
@@ -43,159 +58,29 @@ from api.utils.api_utils import (
|
||||
get_data_error_result,
|
||||
get_json_result,
|
||||
server_error_response,
|
||||
validate_request,
|
||||
)
|
||||
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||
from rag.nlp import search
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from pydantic import BaseModel
|
||||
from api.db.db_models import User
|
||||
|
||||
# Security
|
||||
|
||||
# Pydantic models for request/response
|
||||
class WebCrawlRequest(BaseModel):
|
||||
kb_id: str
|
||||
name: str
|
||||
url: str
|
||||
|
||||
class CreateDocumentRequest(BaseModel):
|
||||
name: str
|
||||
kb_id: str
|
||||
|
||||
class DocumentListRequest(BaseModel):
|
||||
run_status: List[str] = []
|
||||
types: List[str] = []
|
||||
suffix: List[str] = []
|
||||
|
||||
class DocumentFilterRequest(BaseModel):
|
||||
kb_id: str
|
||||
keywords: str = ""
|
||||
run_status: List[str] = []
|
||||
types: List[str] = []
|
||||
suffix: List[str] = []
|
||||
|
||||
class DocumentInfosRequest(BaseModel):
|
||||
doc_ids: List[str]
|
||||
|
||||
class ChangeStatusRequest(BaseModel):
|
||||
doc_ids: List[str]
|
||||
status: str
|
||||
|
||||
class RemoveDocumentRequest(BaseModel):
|
||||
doc_id: List[str]
|
||||
|
||||
class RunDocumentRequest(BaseModel):
|
||||
doc_ids: List[str]
|
||||
run: int
|
||||
delete: bool = False
|
||||
|
||||
class RenameDocumentRequest(BaseModel):
|
||||
doc_id: str
|
||||
name: str
|
||||
|
||||
class ChangeParserRequest(BaseModel):
|
||||
doc_id: str
|
||||
parser_id: str
|
||||
pipeline_id: Optional[str] = None
|
||||
parser_config: Optional[dict] = None
|
||||
|
||||
class UploadAndParseRequest(BaseModel):
|
||||
conversation_id: str
|
||||
|
||||
class ParseRequest(BaseModel):
|
||||
url: Optional[str] = None
|
||||
|
||||
class SetMetaRequest(BaseModel):
|
||||
doc_id: str
|
||||
meta: str
|
||||
|
||||
|
||||
# Dependency injection
|
||||
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
"""获取当前用户"""
|
||||
from api.db import StatusEnum
|
||||
from api.db.services.user_service import UserService
|
||||
from fastapi import HTTPException, status
|
||||
import logging
|
||||
|
||||
try:
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
except ImportError:
|
||||
# 如果没有itsdangerous,使用jwt作为替代
|
||||
import jwt
|
||||
Serializer = jwt
|
||||
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"User {user[0].email} has empty access_token in database"
|
||||
)
|
||||
return user[0]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header required"
|
||||
)
|
||||
|
||||
# Create router
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload(
|
||||
kb_id: str = Form(...),
|
||||
file: List[UploadFile] = File(...),
|
||||
files: List[UploadFile] = File(...),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
if not file:
|
||||
"""上传文档"""
|
||||
if not files:
|
||||
return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
# Use UploadFile directly - file is already a list from multiple file fields
|
||||
file_objs = file
|
||||
|
||||
for file_obj in file_objs:
|
||||
if file_obj.filename == "":
|
||||
for file_obj in files:
|
||||
if not file_obj.filename or file_obj.filename == "":
|
||||
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
@@ -206,29 +91,30 @@ async def upload(
|
||||
if not check_kb_team_permission(kb, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
err, files = await FileService.upload_document(kb, file_objs, current_user.id)
|
||||
err, uploaded_files = FileService.upload_document(kb, files, current_user.id)
|
||||
if err:
|
||||
return get_json_result(data=files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
|
||||
return get_json_result(data=uploaded_files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
|
||||
|
||||
if not files:
|
||||
return get_json_result(data=files, message="There seems to be an issue with your file format. Please verify it is correct and not corrupted.", code=settings.RetCode.DATA_ERROR)
|
||||
files = [f[0] for f in files] # remove the blob
|
||||
if not uploaded_files:
|
||||
return get_json_result(data=uploaded_files, message="There seems to be an issue with your file format. Please verify it is correct and not corrupted.", code=settings.RetCode.DATA_ERROR)
|
||||
files_result = [f[0] for f in uploaded_files] # remove the blob
|
||||
|
||||
return get_json_result(data=files)
|
||||
return get_json_result(data=files_result)
|
||||
|
||||
|
||||
@router.post("/web_crawl")
|
||||
async def web_crawl(
|
||||
req: WebCrawlRequest,
|
||||
request: WebCrawlRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_id = req.kb_id
|
||||
if not kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
name = req.name
|
||||
url = req.url
|
||||
"""网页爬取"""
|
||||
kb_id = request.kb_id
|
||||
name = request.name
|
||||
url = request.url
|
||||
|
||||
if not is_valid_url(url):
|
||||
return get_json_result(data=False, message="The URL format is invalid", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||
if not e:
|
||||
raise LookupError("Can't find this knowledgebase!")
|
||||
@@ -259,6 +145,7 @@ async def web_crawl(
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
"parser_id": kb.parser_id,
|
||||
"pipeline_id": kb.pipeline_id,
|
||||
"parser_config": kb.parser_config,
|
||||
"created_by": current_user.id,
|
||||
"type": filetype,
|
||||
@@ -285,25 +172,26 @@ async def web_crawl(
|
||||
|
||||
@router.post("/create")
|
||||
async def create(
|
||||
req: CreateDocumentRequest,
|
||||
request: CreateDocumentRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_id = req.kb_id
|
||||
if not kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
if len(req.name.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
"""创建文档"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
kb_id = req["kb_id"]
|
||||
|
||||
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
if req.name.strip() == "":
|
||||
if req["name"].strip() == "":
|
||||
return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
req.name = req.name.strip()
|
||||
req["name"] = req["name"].strip()
|
||||
|
||||
try:
|
||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Can't find this knowledgebase!")
|
||||
|
||||
if DocumentService.query(name=req.name, kb_id=kb_id):
|
||||
if DocumentService.query(name=req["name"], kb_id=kb_id):
|
||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
||||
|
||||
kb_root_folder = FileService.get_kb_folder(kb.tenant_id)
|
||||
@@ -326,8 +214,8 @@ async def create(
|
||||
"parser_config": kb.parser_config,
|
||||
"created_by": current_user.id,
|
||||
"type": FileType.VIRTUAL,
|
||||
"name": req.name,
|
||||
"suffix": Path(req.name).suffix.lstrip("."),
|
||||
"name": req["name"],
|
||||
"suffix": Path(req["name"]).suffix.lstrip("."),
|
||||
"location": "",
|
||||
"size": 0,
|
||||
}
|
||||
@@ -342,47 +230,46 @@ async def create(
|
||||
|
||||
@router.post("/list")
|
||||
async def list_docs(
|
||||
kb_id: str = Query(...),
|
||||
keywords: str = Query(""),
|
||||
page: int = Query(0),
|
||||
page_size: int = Query(0),
|
||||
orderby: str = Query("create_time"),
|
||||
desc: str = Query("true"),
|
||||
create_time_from: int = Query(0),
|
||||
create_time_to: int = Query(0),
|
||||
req: DocumentListRequest = None,
|
||||
query: ListDocumentsQuery = Depends(),
|
||||
body: Optional[ListDocumentsBody] = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
"""列出文档"""
|
||||
if body is None:
|
||||
body = ListDocumentsBody()
|
||||
|
||||
kb_id = query.kb_id
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
for tenant in tenants:
|
||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||
break
|
||||
else:
|
||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
||||
|
||||
keywords = query.keywords or ""
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
orderby = query.orderby or "create_time"
|
||||
desc = query.desc.lower() == "true" if query.desc else True
|
||||
create_time_from = int(query.create_time_from or 0)
|
||||
create_time_to = int(query.create_time_to or 0)
|
||||
|
||||
if desc.lower() == "false":
|
||||
desc_bool = False
|
||||
else:
|
||||
desc_bool = True
|
||||
|
||||
run_status = req.run_status if req else []
|
||||
run_status = body.run_status or []
|
||||
if run_status:
|
||||
invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS}
|
||||
if invalid_status:
|
||||
return get_data_error_result(message=f"Invalid filter run status conditions: {', '.join(invalid_status)}")
|
||||
|
||||
types = req.types if req else []
|
||||
types = body.types or []
|
||||
if types:
|
||||
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||
if invalid_types:
|
||||
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
|
||||
|
||||
suffix = req.suffix if req else []
|
||||
suffix = body.suffix or []
|
||||
|
||||
try:
|
||||
docs, tol = DocumentService.get_by_kb_id(kb_id, page, page_size, orderby, desc_bool, keywords, run_status, types, suffix)
|
||||
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix)
|
||||
|
||||
if create_time_from or create_time_to:
|
||||
filtered_docs = []
|
||||
@@ -403,12 +290,11 @@ async def list_docs(
|
||||
|
||||
@router.post("/filter")
|
||||
async def get_filter(
|
||||
req: DocumentFilterRequest,
|
||||
request: FilterDocumentsRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_id = req.kb_id
|
||||
if not kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
"""过滤文档"""
|
||||
kb_id = request.kb_id
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
for tenant in tenants:
|
||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||
@@ -416,15 +302,16 @@ async def get_filter(
|
||||
else:
|
||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
||||
|
||||
keywords = req.keywords
|
||||
suffix = req.suffix
|
||||
run_status = req.run_status
|
||||
keywords = request.keywords or ""
|
||||
suffix = request.suffix or []
|
||||
run_status = request.run_status or []
|
||||
|
||||
if run_status:
|
||||
invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS}
|
||||
if invalid_status:
|
||||
return get_data_error_result(message=f"Invalid filter run status conditions: {', '.join(invalid_status)}")
|
||||
|
||||
types = req.types
|
||||
types = request.types or []
|
||||
if types:
|
||||
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||
if invalid_types:
|
||||
@@ -439,10 +326,11 @@ async def get_filter(
|
||||
|
||||
@router.post("/infos")
|
||||
async def docinfos(
|
||||
req: DocumentInfosRequest,
|
||||
request: GetDocumentInfosRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
doc_ids = req.doc_ids
|
||||
"""获取文档信息"""
|
||||
doc_ids = request.doc_ids
|
||||
for doc_id in doc_ids:
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
@@ -452,8 +340,9 @@ async def docinfos(
|
||||
|
||||
@router.get("/thumbnails")
|
||||
async def thumbnails(
|
||||
doc_ids: List[str] = Query(...)
|
||||
doc_ids: List[str] = Query(..., description="文档ID列表"),
|
||||
):
|
||||
"""获取文档缩略图"""
|
||||
if not doc_ids:
|
||||
return get_json_result(data=False, message='Lack of "Document ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
@@ -471,14 +360,12 @@ async def thumbnails(
|
||||
|
||||
@router.post("/change_status")
|
||||
async def change_status(
|
||||
req: ChangeStatusRequest,
|
||||
request: ChangeStatusRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
doc_ids = req.doc_ids
|
||||
status = str(req.status)
|
||||
|
||||
if status not in ["0", "1"]:
|
||||
return get_json_result(data=False, message='"Status" must be either 0 or 1!', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
"""修改文档状态"""
|
||||
doc_ids = request.doc_ids
|
||||
status = request.status
|
||||
|
||||
result = {}
|
||||
for doc_id in doc_ids:
|
||||
@@ -511,10 +398,11 @@ async def change_status(
|
||||
|
||||
@router.post("/rm")
|
||||
async def rm(
|
||||
req: RemoveDocumentRequest,
|
||||
request: DeleteDocumentRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
doc_ids = req.doc_id
|
||||
"""删除文档"""
|
||||
doc_ids = request.doc_id
|
||||
if isinstance(doc_ids, str):
|
||||
doc_ids = [doc_ids]
|
||||
|
||||
@@ -570,17 +458,19 @@ async def rm(
|
||||
|
||||
@router.post("/run")
|
||||
async def run(
|
||||
req: RunDocumentRequest,
|
||||
request: RunDocumentRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
for doc_id in req.doc_ids:
|
||||
"""运行文档解析"""
|
||||
for doc_id in request.doc_ids:
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
try:
|
||||
kb_table_num_map = {}
|
||||
for id in req.doc_ids:
|
||||
info = {"run": str(req.run), "progress": 0}
|
||||
if req.run == int(TaskStatus.RUNNING.value) and req.delete:
|
||||
for id in request.doc_ids:
|
||||
info = {"run": str(request.run), "progress": 0}
|
||||
if str(request.run) == TaskStatus.RUNNING.value and request.delete:
|
||||
info["progress_msg"] = ""
|
||||
info["chunk_num"] = 0
|
||||
info["token_num"] = 0
|
||||
@@ -592,21 +482,21 @@ async def run(
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
if req.run == int(TaskStatus.CANCEL.value):
|
||||
if str(request.run) == TaskStatus.CANCEL.value:
|
||||
if str(doc.run) == TaskStatus.RUNNING.value:
|
||||
cancel_all_task_of(id)
|
||||
else:
|
||||
return get_data_error_result(message="Cannot cancel a task that is not in RUNNING status")
|
||||
if all([req.delete, req.run == int(TaskStatus.RUNNING.value), str(doc.run) == TaskStatus.DONE.value]):
|
||||
if all([not request.delete, str(request.run) == TaskStatus.RUNNING.value, str(doc.run) == TaskStatus.DONE.value]):
|
||||
DocumentService.clear_chunk_num_when_rerun(doc.id)
|
||||
|
||||
DocumentService.update_by_id(id, info)
|
||||
if req.delete:
|
||||
if request.delete:
|
||||
TaskService.filter_delete([Task.doc_id == id])
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
|
||||
|
||||
if req.run == int(TaskStatus.RUNNING.value):
|
||||
if str(request.run) == TaskStatus.RUNNING.value:
|
||||
doc = doc.to_dict()
|
||||
doc["tenant_id"] = tenant_id
|
||||
|
||||
@@ -628,37 +518,53 @@ async def run(
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post("/rename")
|
||||
async def rename(
|
||||
req: RenameDocumentRequest,
|
||||
request: RenameDocumentRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not DocumentService.accessible(req.doc_id, current_user.id):
|
||||
"""重命名文档"""
|
||||
if not DocumentService.accessible(request.doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(req.doc_id)
|
||||
e, doc = DocumentService.get_by_id(request.doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if pathlib.Path(req.name.lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||
if pathlib.Path(request.name.lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
if len(req.name.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
if len(request.name.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
for d in DocumentService.query(name=req.name, kb_id=doc.kb_id):
|
||||
if d.name == req.name:
|
||||
for d in DocumentService.query(name=request.name, kb_id=doc.kb_id):
|
||||
if d.name == request.name:
|
||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
||||
|
||||
if not DocumentService.update_by_id(req.doc_id, {"name": req.name}):
|
||||
if not DocumentService.update_by_id(request.doc_id, {"name": request.name}):
|
||||
return get_data_error_result(message="Database error (Document rename)!")
|
||||
|
||||
informs = File2DocumentService.get_by_document_id(req.doc_id)
|
||||
informs = File2DocumentService.get_by_document_id(request.doc_id)
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req.name})
|
||||
FileService.update_by_id(file.id, {"name": request.name})
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(request.doc_id)
|
||||
title_tks = rag_tokenizer.tokenize(request.name)
|
||||
es_body = {
|
||||
"docnm_kwd": request.name,
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": request.doc_id},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
@@ -667,6 +573,7 @@ async def rename(
|
||||
|
||||
@router.get("/get/{doc_id}")
|
||||
async def get(doc_id: str):
|
||||
"""获取文档文件"""
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
@@ -677,70 +584,79 @@ async def get(doc_id: str):
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
|
||||
content_type = "application/octet-stream"
|
||||
if ext:
|
||||
if doc.type == FileType.VISUAL.value:
|
||||
media_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||
else:
|
||||
media_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
else:
|
||||
media_type = "application/octet-stream"
|
||||
|
||||
return StreamingResponse(
|
||||
iter([content]),
|
||||
media_type=media_type,
|
||||
headers={"Content-Disposition": f"attachment; filename={doc.name}"}
|
||||
)
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
|
||||
return Response(content=content, media_type=content_type)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post("/change_parser")
|
||||
async def change_parser(
|
||||
req: ChangeParserRequest,
|
||||
request: ChangeParserSimpleRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not DocumentService.accessible(req.doc_id, current_user.id):
|
||||
"""修改文档解析器"""
|
||||
if not DocumentService.accessible(request.doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req.doc_id)
|
||||
e, doc = DocumentService.get_by_id(request.doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
def reset_doc():
|
||||
def reset_doc(update_data_override=None):
|
||||
nonlocal doc
|
||||
e = DocumentService.update_by_id(doc.id, {"parser_id": req.parser_id, "progress": 0, "progress_msg": "", "run": TaskStatus.UNSTART.value})
|
||||
update_data = update_data_override or {}
|
||||
if request.pipeline_id is not None:
|
||||
update_data["pipeline_id"] = request.pipeline_id
|
||||
if request.parser_id is not None:
|
||||
update_data["parser_id"] = request.parser_id
|
||||
update_data.update({
|
||||
"progress": 0,
|
||||
"progress_msg": "",
|
||||
"run": TaskStatus.UNSTART.value
|
||||
})
|
||||
e = DocumentService.update_by_id(doc.id, update_data)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if doc.token_num > 0:
|
||||
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, doc.process_duration * -1)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(req.doc_id)
|
||||
tenant_id = DocumentService.get_tenant_id(request.doc_id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
||||
|
||||
try:
|
||||
if req.pipeline_id:
|
||||
if doc.pipeline_id == req.pipeline_id:
|
||||
if request.pipeline_id is not None and request.pipeline_id != "":
|
||||
if doc.pipeline_id == request.pipeline_id:
|
||||
return get_json_result(data=True)
|
||||
DocumentService.update_by_id(doc.id, {"pipeline_id": req.pipeline_id})
|
||||
reset_doc()
|
||||
reset_doc({"pipeline_id": request.pipeline_id})
|
||||
return get_json_result(data=True)
|
||||
|
||||
if doc.parser_id.lower() == req.parser_id.lower():
|
||||
if req.parser_config:
|
||||
if req.parser_config == doc.parser_config:
|
||||
if request.parser_id is None:
|
||||
return get_json_result(data=False, message="缺少 parser_id 或 pipeline_id", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
if doc.parser_id.lower() == request.parser_id.lower():
|
||||
if request.parser_config is not None:
|
||||
if request.parser_config == doc.parser_config:
|
||||
return get_json_result(data=True)
|
||||
else:
|
||||
return get_json_result(data=True)
|
||||
|
||||
if (doc.type == FileType.VISUAL and req.parser_id != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and req.parser_id != "presentation"):
|
||||
if (doc.type == FileType.VISUAL and request.parser_id != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and request.parser_id != "presentation"):
|
||||
return get_data_error_result(message="Not supported yet!")
|
||||
if req.parser_config:
|
||||
DocumentService.update_parser_config(doc.id, req.parser_config)
|
||||
|
||||
if request.parser_config is not None:
|
||||
DocumentService.update_parser_config(doc.id, request.parser_config)
|
||||
|
||||
reset_doc()
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
@@ -749,16 +665,14 @@ async def change_parser(
|
||||
|
||||
@router.get("/image/{image_id}")
|
||||
async def get_image(image_id: str):
|
||||
"""获取图片"""
|
||||
try:
|
||||
arr = image_id.split("-")
|
||||
if len(arr) != 2:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
content = STORAGE_IMPL.get(bkt, nm)
|
||||
return StreamingResponse(
|
||||
iter([content]),
|
||||
media_type="image/JPEG"
|
||||
)
|
||||
return Response(content=content, media_type="image/JPEG")
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -769,28 +683,28 @@ async def upload_and_parse(
|
||||
files: List[UploadFile] = File(...),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""上传并解析"""
|
||||
if not files:
|
||||
return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
# Use UploadFile directly
|
||||
file_objs = files
|
||||
|
||||
for file_obj in file_objs:
|
||||
if file_obj.filename == "":
|
||||
for file_obj in files:
|
||||
if not file_obj.filename or file_obj.filename == "":
|
||||
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
doc_ids = await doc_upload_and_parse(conversation_id, file_objs, current_user.id)
|
||||
doc_ids = doc_upload_and_parse(conversation_id, files, current_user.id)
|
||||
|
||||
return get_json_result(data=doc_ids)
|
||||
|
||||
|
||||
@router.post("/parse")
|
||||
async def parse(
|
||||
req: ParseRequest = None,
|
||||
files: List[UploadFile] = File(None),
|
||||
request: Optional[ParseRequest] = None,
|
||||
files: Optional[List[UploadFile]] = File(None),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
url = req.url if req else ""
|
||||
"""解析文档"""
|
||||
url = request.url if request else None
|
||||
|
||||
if url:
|
||||
if not is_valid_url(url):
|
||||
return get_json_result(data=False, message="The URL format is invalid", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
@@ -828,46 +742,37 @@ async def parse(
|
||||
if not r or not r.group(1):
|
||||
return get_json_result(data=False, message="Can't not identify downloaded file", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
f = File(r.group(1), os.path.join(download_path, r.group(1)))
|
||||
txt = await FileService.parse_docs([f], current_user.id)
|
||||
txt = FileService.parse_docs([f], current_user.id)
|
||||
return get_json_result(data=txt)
|
||||
|
||||
if not files:
|
||||
return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
# Use UploadFile directly
|
||||
file_objs = files
|
||||
txt = await FileService.parse_docs(file_objs, current_user.id)
|
||||
txt = FileService.parse_docs(files, current_user.id)
|
||||
|
||||
return get_json_result(data=txt)
|
||||
|
||||
|
||||
@router.post("/set_meta")
|
||||
async def set_meta(
|
||||
req: SetMetaRequest,
|
||||
request: SetMetaRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not DocumentService.accessible(req.doc_id, current_user.id):
|
||||
"""设置元数据"""
|
||||
if not DocumentService.accessible(request.doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
try:
|
||||
meta = json.loads(req.meta)
|
||||
if not isinstance(meta, dict):
|
||||
return get_json_result(data=False, message="Only dictionary type supported.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
for k, v in meta.items():
|
||||
if not isinstance(v, str) and not isinstance(v, int) and not isinstance(v, float):
|
||||
return get_json_result(data=False, message=f"The type is not supported: {v}", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
except Exception as e:
|
||||
return get_json_result(data=False, message=f"Json syntax error: {e}", code=settings.RetCode.ARGUMENT_ERROR)
|
||||
if not isinstance(meta, dict):
|
||||
return get_json_result(data=False, message='Meta data should be in Json map format, like {"key": "value"}', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(req.doc_id)
|
||||
meta = json.loads(request.meta)
|
||||
|
||||
e, doc = DocumentService.get_by_id(request.doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
if not DocumentService.update_by_id(req.doc_id, {"meta_fields": meta}):
|
||||
if not DocumentService.update_by_id(request.doc_id, {"meta_fields": meta}):
|
||||
return get_data_error_result(message="Database error (meta updates)!")
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@@ -15,15 +15,12 @@
|
||||
#
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
from api.utils.api_utils import security
|
||||
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
||||
from api.utils import get_uuid
|
||||
@@ -31,91 +28,15 @@ from api.db import FileType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api import settings
|
||||
from api.utils.api_utils import get_json_result
|
||||
from pydantic import BaseModel
|
||||
|
||||
# Security
|
||||
|
||||
# Pydantic models for request/response
|
||||
class ConvertRequest(BaseModel):
|
||||
file_ids: List[str]
|
||||
kb_ids: List[str]
|
||||
|
||||
class RemoveFile2DocumentRequest(BaseModel):
|
||||
file_ids: List[str]
|
||||
|
||||
# Dependency injection
|
||||
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
"""获取当前用户"""
|
||||
from api.db import StatusEnum
|
||||
from api.db.services.user_service import UserService
|
||||
from fastapi import HTTPException, status
|
||||
import logging
|
||||
|
||||
try:
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
except ImportError:
|
||||
# 如果没有itsdangerous,使用jwt作为替代
|
||||
import jwt
|
||||
Serializer = jwt
|
||||
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"User {user[0].email} has empty access_token in database"
|
||||
)
|
||||
return user[0]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header required"
|
||||
)
|
||||
|
||||
# Create router
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post('/convert')
|
||||
async def convert(
|
||||
req: ConvertRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_ids = req.kb_ids
|
||||
file_ids = req.file_ids
|
||||
@manager.route('/convert', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("file_ids", "kb_ids")
|
||||
def convert():
|
||||
req = request.json
|
||||
kb_ids = req["kb_ids"]
|
||||
file_ids = req["file_ids"]
|
||||
file2documents = []
|
||||
|
||||
try:
|
||||
@@ -179,12 +100,12 @@ async def convert(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/rm')
|
||||
async def rm(
|
||||
req: RemoveFile2DocumentRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
file_ids = req.file_ids
|
||||
@manager.route('/rm', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("file_ids")
|
||||
def rm():
|
||||
req = request.json
|
||||
file_ids = req["file_ids"]
|
||||
if not file_ids:
|
||||
return get_json_result(
|
||||
data=False, message='Lack of "Files ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
@@ -13,15 +13,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
#
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
from api.utils.api_utils import security
|
||||
import flask
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
from api.common.check_team_permission import check_file_team_permission
|
||||
from api.db.services.document_service import DocumentService
|
||||
@@ -36,110 +35,22 @@ from api.utils.api_utils import get_json_result
|
||||
from api.utils.file_utils import filename_type
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from pydantic import BaseModel
|
||||
|
||||
# Security
|
||||
|
||||
# Pydantic models for request/response
|
||||
class CreateFileRequest(BaseModel):
|
||||
name: str
|
||||
parent_id: Optional[str] = None
|
||||
type: Optional[str] = None
|
||||
|
||||
class RemoveFileRequest(BaseModel):
|
||||
file_ids: List[str]
|
||||
|
||||
class RenameFileRequest(BaseModel):
|
||||
file_id: str
|
||||
name: str
|
||||
|
||||
class MoveFileRequest(BaseModel):
|
||||
src_file_ids: List[str]
|
||||
dest_file_id: str
|
||||
|
||||
# Dependency injection
|
||||
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
"""获取当前用户"""
|
||||
from api.db import StatusEnum
|
||||
from api.db.services.user_service import UserService
|
||||
from fastapi import HTTPException, status
|
||||
import logging
|
||||
|
||||
try:
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
except ImportError:
|
||||
# 如果没有itsdangerous,使用jwt作为替代
|
||||
import jwt
|
||||
Serializer = jwt
|
||||
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"User {user[0].email} has empty access_token in database"
|
||||
)
|
||||
return user[0]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header required"
|
||||
)
|
||||
|
||||
# Create router
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post('/upload')
|
||||
async def upload(
|
||||
parent_id: Optional[str] = Form(None),
|
||||
files: List[UploadFile] = File(...),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
pf_id = parent_id
|
||||
@manager.route('/upload', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
# @validate_request("parent_id")
|
||||
def upload():
|
||||
pf_id = request.form.get("parent_id")
|
||||
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
pf_id = root_folder["id"]
|
||||
|
||||
if not files:
|
||||
if 'file' not in request.files:
|
||||
return get_json_result(
|
||||
data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
file_objs = files
|
||||
file_objs = request.files.getlist('file')
|
||||
|
||||
for file_obj in file_objs:
|
||||
if file_obj.filename == '':
|
||||
@@ -186,7 +97,7 @@ async def upload(
|
||||
location = file_obj_names[file_len - 1]
|
||||
while STORAGE_IMPL.obj_exist(last_folder.id, location):
|
||||
location += "_"
|
||||
blob = await file_obj.read()
|
||||
blob = file_obj.read()
|
||||
filename = duplicate_name(
|
||||
FileService.query,
|
||||
name=file_obj_names[file_len - 1],
|
||||
@@ -209,13 +120,13 @@ async def upload(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/create')
|
||||
async def create(
|
||||
req: CreateFileRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
pf_id = req.parent_id
|
||||
input_file_type = req.type
|
||||
@manager.route('/create', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("name")
|
||||
def create():
|
||||
req = request.json
|
||||
pf_id = request.json.get("parent_id")
|
||||
input_file_type = request.json.get("type")
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
pf_id = root_folder["id"]
|
||||
@@ -224,7 +135,7 @@ async def create(
|
||||
if not FileService.is_parent_folder_exist(pf_id):
|
||||
return get_json_result(
|
||||
data=False, message="Parent Folder Doesn't Exist!", code=settings.RetCode.OPERATING_ERROR)
|
||||
if FileService.query(name=req.name, parent_id=pf_id):
|
||||
if FileService.query(name=req["name"], parent_id=pf_id):
|
||||
return get_data_error_result(
|
||||
message="Duplicated folder name in the same folder.")
|
||||
|
||||
@@ -238,7 +149,7 @@ async def create(
|
||||
"parent_id": pf_id,
|
||||
"tenant_id": current_user.id,
|
||||
"created_by": current_user.id,
|
||||
"name": req.name,
|
||||
"name": req["name"],
|
||||
"location": "",
|
||||
"size": 0,
|
||||
"type": file_type
|
||||
@@ -249,18 +160,17 @@ async def create(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get('/list')
|
||||
async def list_files(
|
||||
parent_id: Optional[str] = Query(None),
|
||||
keywords: str = Query(""),
|
||||
page: int = Query(1),
|
||||
page_size: int = Query(15),
|
||||
orderby: str = Query("create_time"),
|
||||
desc: bool = Query(True),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
pf_id = parent_id
|
||||
@manager.route('/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def list_files():
|
||||
pf_id = request.args.get("parent_id")
|
||||
|
||||
keywords = request.args.get("keywords", "")
|
||||
|
||||
page_number = int(request.args.get("page", 1))
|
||||
items_per_page = int(request.args.get("page_size", 15))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
desc = request.args.get("desc", True)
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
pf_id = root_folder["id"]
|
||||
@@ -271,7 +181,7 @@ async def list_files(
|
||||
return get_data_error_result(message="Folder not found!")
|
||||
|
||||
files, total = FileService.get_by_pf_id(
|
||||
current_user.id, pf_id, page, page_size, orderby, desc, keywords)
|
||||
current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
|
||||
|
||||
parent_folder = FileService.get_parent_folder(pf_id)
|
||||
if not parent_folder:
|
||||
@@ -282,8 +192,9 @@ async def list_files(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get('/root_folder')
|
||||
async def get_root_folder(current_user = Depends(get_current_user)):
|
||||
@manager.route('/root_folder', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def get_root_folder():
|
||||
try:
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
return get_json_result(data={"root_folder": root_folder})
|
||||
@@ -291,11 +202,10 @@ async def get_root_folder(current_user = Depends(get_current_user)):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get('/parent_folder')
|
||||
async def get_parent_folder(
|
||||
file_id: str = Query(...),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route('/parent_folder', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def get_parent_folder():
|
||||
file_id = request.args.get("file_id")
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
@@ -307,11 +217,10 @@ async def get_parent_folder(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get('/all_parent_folder')
|
||||
async def get_all_parent_folders(
|
||||
file_id: str = Query(...),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route('/all_parent_folder', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def get_all_parent_folders():
|
||||
file_id = request.args.get("file_id")
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
@@ -326,90 +235,99 @@ async def get_all_parent_folders(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/rm')
|
||||
async def rm(
|
||||
req: RemoveFileRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
file_ids = req.file_ids
|
||||
@manager.route("/rm", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("file_ids")
|
||||
def rm():
|
||||
req = request.json
|
||||
file_ids = req["file_ids"]
|
||||
|
||||
def _delete_single_file(file):
|
||||
try:
|
||||
if file.location:
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(file.id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if e and doc:
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if tenant_id:
|
||||
DocumentService.remove_document(doc, tenant_id)
|
||||
File2DocumentService.delete_by_file_id(file.id)
|
||||
|
||||
FileService.delete(file)
|
||||
|
||||
def _delete_folder_recursive(folder, tenant_id):
|
||||
sub_files = FileService.list_all_files_by_parent_id(folder.id)
|
||||
for sub_file in sub_files:
|
||||
if sub_file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(sub_file, tenant_id)
|
||||
else:
|
||||
_delete_single_file(sub_file)
|
||||
|
||||
FileService.delete(folder)
|
||||
|
||||
try:
|
||||
for file_id in file_ids:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
if not e or not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
if file.source_type == FileSource.KNOWLEDGEBASE:
|
||||
continue
|
||||
|
||||
if file.type == FileType.FOLDER.value:
|
||||
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
||||
for inner_file_id in file_id_list:
|
||||
e, file = FileService.get_by_id(inner_file_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="File not found!")
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
FileService.delete_folder_by_pf_id(current_user.id, file_id)
|
||||
else:
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
if not FileService.delete(file):
|
||||
return get_data_error_result(
|
||||
message="Database error (File removal)!")
|
||||
_delete_folder_recursive(file, current_user.id)
|
||||
continue
|
||||
|
||||
# delete file2document
|
||||
informs = File2DocumentService.get_by_file_id(file_id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return get_data_error_result(
|
||||
message="Database error (Document removal)!")
|
||||
File2DocumentService.delete_by_file_id(file_id)
|
||||
_delete_single_file(file)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/rename')
|
||||
async def rename(
|
||||
req: RenameFileRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route('/rename', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("file_id", "name")
|
||||
def rename():
|
||||
req = request.json
|
||||
try:
|
||||
e, file = FileService.get_by_id(req.file_id)
|
||||
e, file = FileService.get_by_id(req["file_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="File not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
if file.type != FileType.FOLDER.value \
|
||||
and pathlib.Path(req.name.lower()).suffix != pathlib.Path(
|
||||
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
||||
file.name.lower()).suffix:
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message="The extension of file can't be changed",
|
||||
code=settings.RetCode.ARGUMENT_ERROR)
|
||||
for file in FileService.query(name=req.name, pf_id=file.parent_id):
|
||||
if file.name == req.name:
|
||||
for file in FileService.query(name=req["name"], pf_id=file.parent_id):
|
||||
if file.name == req["name"]:
|
||||
return get_data_error_result(
|
||||
message="Duplicated file name in the same folder.")
|
||||
|
||||
if not FileService.update_by_id(
|
||||
req.file_id, {"name": req.name}):
|
||||
req["file_id"], {"name": req["name"]}):
|
||||
return get_data_error_result(
|
||||
message="Database error (File rename)!")
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(req.file_id)
|
||||
informs = File2DocumentService.get_by_file_id(req["file_id"])
|
||||
if informs:
|
||||
if not DocumentService.update_by_id(
|
||||
informs[0].document_id, {"name": req.name}):
|
||||
informs[0].document_id, {"name": req["name"]}):
|
||||
return get_data_error_result(
|
||||
message="Database error (Document rename)!")
|
||||
|
||||
@@ -418,8 +336,9 @@ async def rename(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get('/get/{file_id}')
|
||||
async def get(file_id: str, current_user = Depends(get_current_user)):
|
||||
@manager.route('/get/<file_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def get(file_id):
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
@@ -432,6 +351,7 @@ async def get(file_id: str, current_user = Depends(get_current_user)):
|
||||
b, n = File2DocumentService.get_storage_address(file_id=file_id)
|
||||
blob = STORAGE_IMPL.get(b, n)
|
||||
|
||||
response = flask.make_response(blob)
|
||||
ext = re.search(r"\.([^.]+)$", file.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
if ext:
|
||||
@@ -439,43 +359,95 @@ async def get(file_id: str, current_user = Depends(get_current_user)):
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||
else:
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
else:
|
||||
content_type = "application/octet-stream"
|
||||
|
||||
return StreamingResponse(
|
||||
iter([blob]),
|
||||
media_type=content_type,
|
||||
headers={"Content-Disposition": f"attachment; filename={file.name}"}
|
||||
)
|
||||
response.headers.set("Content-Type", content_type)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/mv')
|
||||
async def move(
|
||||
req: MoveFileRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route("/mv", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("src_file_ids", "dest_file_id")
|
||||
def move():
|
||||
req = request.json
|
||||
try:
|
||||
file_ids = req.src_file_ids
|
||||
parent_id = req.dest_file_id
|
||||
file_ids = req["src_file_ids"]
|
||||
dest_parent_id = req["dest_file_id"]
|
||||
|
||||
ok, dest_folder = FileService.get_by_id(dest_parent_id)
|
||||
if not ok or not dest_folder:
|
||||
return get_data_error_result(message="Parent Folder not found!")
|
||||
|
||||
files = FileService.get_by_ids(file_ids)
|
||||
files_dict = {}
|
||||
for file in files:
|
||||
files_dict[file.id] = file
|
||||
if not files:
|
||||
return get_data_error_result(message="Source files not found!")
|
||||
|
||||
files_dict = {f.id: f for f in files}
|
||||
|
||||
for file_id in file_ids:
|
||||
file = files_dict[file_id]
|
||||
file = files_dict.get(file_id)
|
||||
if not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
fe, _ = FileService.get_by_id(parent_id)
|
||||
if not fe:
|
||||
return get_data_error_result(message="Parent Folder not found!")
|
||||
FileService.move_file(file_ids, parent_id)
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message="No authorization.",
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR,
|
||||
)
|
||||
|
||||
def _move_entry_recursive(source_file_entry, dest_folder):
|
||||
if source_file_entry.type == FileType.FOLDER.value:
|
||||
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
|
||||
if existing_folder:
|
||||
new_folder = existing_folder[0]
|
||||
else:
|
||||
new_folder = FileService.insert(
|
||||
{
|
||||
"id": get_uuid(),
|
||||
"parent_id": dest_folder.id,
|
||||
"tenant_id": source_file_entry.tenant_id,
|
||||
"created_by": current_user.id,
|
||||
"name": source_file_entry.name,
|
||||
"location": "",
|
||||
"size": 0,
|
||||
"type": FileType.FOLDER.value,
|
||||
}
|
||||
)
|
||||
|
||||
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
|
||||
for sub_file in sub_files:
|
||||
_move_entry_recursive(sub_file, new_folder)
|
||||
|
||||
FileService.delete_by_id(source_file_entry.id)
|
||||
return
|
||||
|
||||
old_parent_id = source_file_entry.parent_id
|
||||
old_location = source_file_entry.location
|
||||
filename = source_file_entry.name
|
||||
|
||||
new_location = filename
|
||||
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
|
||||
new_location += "_"
|
||||
|
||||
try:
|
||||
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
|
||||
except Exception as storage_err:
|
||||
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
|
||||
|
||||
FileService.update_by_id(
|
||||
source_file_entry.id,
|
||||
{
|
||||
"parent_id": dest_folder.id,
|
||||
"location": new_location,
|
||||
},
|
||||
)
|
||||
|
||||
for file in files:
|
||||
_move_entry_recursive(file, dest_folder)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# you may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
@@ -15,27 +15,29 @@
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from api.models.kb_models import (
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.kb_models import (
|
||||
CreateKnowledgeBaseRequest,
|
||||
UpdateKnowledgeBaseRequest,
|
||||
DeleteKnowledgeBaseRequest,
|
||||
ListKnowledgeBasesRequest,
|
||||
ListKnowledgeBasesQuery,
|
||||
ListKnowledgeBasesBody,
|
||||
RemoveTagsRequest,
|
||||
RenameTagRequest,
|
||||
RunGraphRAGRequest,
|
||||
ListPipelineLogsQuery,
|
||||
ListPipelineLogsBody,
|
||||
ListPipelineDatasetLogsQuery,
|
||||
ListPipelineDatasetLogsBody,
|
||||
DeletePipelineLogsQuery,
|
||||
DeletePipelineLogsBody,
|
||||
RunGraphragRequest,
|
||||
RunRaptorRequest,
|
||||
RunMindmapRequest,
|
||||
ListPipelineLogsRequest,
|
||||
ListPipelineDatasetLogsRequest,
|
||||
DeletePipelineLogsRequest,
|
||||
UnbindTaskRequest
|
||||
)
|
||||
from api.utils.api_utils import get_current_user
|
||||
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
|
||||
@@ -44,7 +46,12 @@ from api.db.services.file_service import FileService
|
||||
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||
from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID
|
||||
from api.db.services.user_service import TenantService, UserTenantService
|
||||
from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, get_json_result
|
||||
from api.utils.api_utils import (
|
||||
get_error_data_result,
|
||||
server_error_response,
|
||||
get_data_error_result,
|
||||
get_json_result,
|
||||
)
|
||||
from api.utils import get_uuid
|
||||
from api.db import PipelineTaskType, StatusEnum, FileSource, VALID_FILE_TYPES, VALID_TASK_STATUS
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
@@ -53,9 +60,10 @@ from api import settings
|
||||
from rag.nlp import search
|
||||
from api.constants import DATASET_NAME_LIMIT
|
||||
from rag.settings import PAGERANK_FLD
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
# 创建 FastAPI 路由器
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -64,7 +72,14 @@ async def create(
|
||||
request: CreateKnowledgeBaseRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
dataset_name = request.name
|
||||
"""创建知识库
|
||||
|
||||
支持两种解析类型:
|
||||
- parse_type=1: 使用内置解析器,需要 parser_id
|
||||
- parse_type=2: 使用自定义 pipeline,需要 pipeline_id
|
||||
"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
dataset_name = req["name"]
|
||||
if not isinstance(dataset_name, str):
|
||||
return get_data_error_result(message="Dataset name must be string.")
|
||||
if dataset_name.strip() == "":
|
||||
@@ -80,55 +95,66 @@ async def create(
|
||||
tenant_id=current_user.id,
|
||||
status=StatusEnum.VALID.value)
|
||||
try:
|
||||
req = {
|
||||
"id": get_uuid(),
|
||||
"name": dataset_name,
|
||||
"tenant_id": current_user.id,
|
||||
"created_by": current_user.id,
|
||||
"parser_id": request.parser_id or "naive",
|
||||
"description": request.description
|
||||
}
|
||||
# 根据 parse_type 处理 parser_id 和 pipeline_id
|
||||
parse_type = req.pop("parse_type", 1) # 移除 parse_type,不需要存储到数据库
|
||||
|
||||
if parse_type == 1:
|
||||
# 使用内置解析器,需要 parser_id
|
||||
# 验证器已经确保 parser_id 不为空,但保留默认值逻辑以防万一
|
||||
if not req.get("parser_id") or req["parser_id"].strip() == "":
|
||||
req["parser_id"] = "naive"
|
||||
# 清空 pipeline_id(设置为 None,数据库字段允许为 null)
|
||||
req["pipeline_id"] = None
|
||||
elif parse_type == 2:
|
||||
# 使用自定义 pipeline,需要 pipeline_id
|
||||
# 验证器已经确保 pipeline_id 不为空
|
||||
# parser_id 应该为空字符串,但数据库字段不允许 null,所以不设置 parser_id
|
||||
# 让数据库使用默认值 "naive"(虽然用户传入的是空字符串,但数据库会处理)
|
||||
# 如果用户明确传入了空字符串,我们也不设置它,让数据库使用默认值
|
||||
if "parser_id" in req and (not req["parser_id"] or req["parser_id"].strip() == ""):
|
||||
# 移除空字符串的 parser_id,让数据库使用默认值
|
||||
req.pop("parser_id")
|
||||
# pipeline_id 保留在 req 中,会被保存到数据库
|
||||
|
||||
req["id"] = get_uuid()
|
||||
req["name"] = dataset_name
|
||||
req["tenant_id"] = current_user.id
|
||||
req["created_by"] = current_user.id
|
||||
|
||||
# embd_id 已经在模型中定义为必需字段,直接使用
|
||||
|
||||
e, t = TenantService.get_by_id(current_user.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Tenant not found.")
|
||||
|
||||
# 设置 embd_id 默认值
|
||||
if not request.embd_id:
|
||||
req["embd_id"] = t.embd_id
|
||||
else:
|
||||
req["embd_id"] = request.embd_id
|
||||
|
||||
if request.parser_config:
|
||||
req["parser_config"] = request.parser_config
|
||||
else:
|
||||
req["parser_config"] = {
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 512,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
"html4excel": False,
|
||||
"topn_tags": 3,
|
||||
"raptor": {
|
||||
"use_raptor": True,
|
||||
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||
"max_token": 256,
|
||||
"threshold": 0.1,
|
||||
"max_cluster": 64,
|
||||
"random_seed": 0
|
||||
},
|
||||
"graphrag": {
|
||||
"use_graphrag": True,
|
||||
"entity_types": [
|
||||
"organization",
|
||||
"person",
|
||||
"geo",
|
||||
"event",
|
||||
"category"
|
||||
],
|
||||
"method": "light"
|
||||
}
|
||||
|
||||
req["parser_config"] = {
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 512,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
"html4excel": False,
|
||||
"topn_tags": 3,
|
||||
"raptor": {
|
||||
"use_raptor": True,
|
||||
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||
"max_token": 256,
|
||||
"threshold": 0.1,
|
||||
"max_cluster": 64,
|
||||
"random_seed": 0
|
||||
},
|
||||
"graphrag": {
|
||||
"use_graphrag": True,
|
||||
"entity_types": [
|
||||
"organization",
|
||||
"person",
|
||||
"geo",
|
||||
"event",
|
||||
"category"
|
||||
],
|
||||
"method": "light"
|
||||
}
|
||||
}
|
||||
if not KnowledgebaseService.save(**req):
|
||||
return get_data_error_result()
|
||||
return get_json_result(data={"kb_id": req["id"]})
|
||||
@@ -141,16 +167,24 @@ async def update(
|
||||
request: UpdateKnowledgeBaseRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not isinstance(request.name, str):
|
||||
"""更新知识库"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
if not isinstance(req["name"], str):
|
||||
return get_data_error_result(message="Dataset name must be string.")
|
||||
if request.name.strip() == "":
|
||||
if req["name"].strip() == "":
|
||||
return get_data_error_result(message="Dataset name can't be empty.")
|
||||
if len(request.name.encode("utf-8")) > DATASET_NAME_LIMIT:
|
||||
if len(req["name"].encode("utf-8")) > DATASET_NAME_LIMIT:
|
||||
return get_data_error_result(
|
||||
message=f"Dataset name length is {len(request.name)} which is large than {DATASET_NAME_LIMIT}")
|
||||
name = request.name.strip()
|
||||
message=f"Dataset name length is {len(req['name'])} which is large than {DATASET_NAME_LIMIT}")
|
||||
req["name"] = req["name"].strip()
|
||||
|
||||
if not KnowledgebaseService.accessible4deletion(request.kb_id, current_user.id):
|
||||
# 验证不允许的参数
|
||||
not_allowed = ["id", "tenant_id", "created_by", "create_time", "update_time", "create_date", "update_date"]
|
||||
for key in not_allowed:
|
||||
if key in req:
|
||||
del req[key]
|
||||
|
||||
if not KnowledgebaseService.accessible4deletion(req["kb_id"], current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
@@ -158,48 +192,29 @@ async def update(
|
||||
)
|
||||
try:
|
||||
if not KnowledgebaseService.query(
|
||||
created_by=current_user.id, id=request.kb_id):
|
||||
created_by=current_user.id, id=req["kb_id"]):
|
||||
return get_json_result(
|
||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
||||
code=settings.RetCode.OPERATING_ERROR)
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(request.kb_id)
|
||||
e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
|
||||
if not e:
|
||||
return get_data_error_result(
|
||||
message="Can't find this knowledgebase!")
|
||||
|
||||
if name.lower() != kb.name.lower() \
|
||||
if req["name"].lower() != kb.name.lower() \
|
||||
and len(
|
||||
KnowledgebaseService.query(name=name, tenant_id=current_user.id, status=StatusEnum.VALID.value)) >= 1:
|
||||
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) >= 1:
|
||||
return get_data_error_result(
|
||||
message="Duplicated knowledgebase name.")
|
||||
|
||||
# 构建更新数据,包含所有可更新的字段
|
||||
update_data = {
|
||||
"name": name,
|
||||
"pagerank": request.pagerank
|
||||
}
|
||||
|
||||
# 添加可选字段(如果提供了的话)
|
||||
if request.description is not None:
|
||||
update_data["description"] = request.description
|
||||
if request.permission is not None:
|
||||
update_data["permission"] = request.permission
|
||||
if request.avatar is not None:
|
||||
update_data["avatar"] = request.avatar
|
||||
if request.parser_id is not None:
|
||||
update_data["parser_id"] = request.parser_id
|
||||
if request.embd_id is not None:
|
||||
update_data["embd_id"] = request.embd_id
|
||||
if request.parser_config is not None:
|
||||
update_data["parser_config"] = request.parser_config
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, update_data):
|
||||
kb_id = req.pop("kb_id")
|
||||
if not KnowledgebaseService.update_by_id(kb.id, req):
|
||||
return get_data_error_result()
|
||||
|
||||
if kb.pagerank != request.pagerank:
|
||||
if request.pagerank > 0:
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: request.pagerank},
|
||||
if kb.pagerank != req.get("pagerank", 0):
|
||||
if req.get("pagerank", 0) > 0:
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
|
||||
search.index_name(kb.tenant_id), kb.id)
|
||||
else:
|
||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||
@@ -211,26 +226,7 @@ async def update(
|
||||
return get_data_error_result(
|
||||
message="Database error (Knowledgebase rename)!")
|
||||
kb = kb.to_dict()
|
||||
|
||||
# 使用完整的请求数据更新返回结果,保持与原来代码的一致性
|
||||
request_data = {
|
||||
"name": name,
|
||||
"pagerank": request.pagerank
|
||||
}
|
||||
if request.description is not None:
|
||||
request_data["description"] = request.description
|
||||
if request.permission is not None:
|
||||
request_data["permission"] = request.permission
|
||||
if request.avatar is not None:
|
||||
request_data["avatar"] = request.avatar
|
||||
if request.parser_id is not None:
|
||||
request_data["parser_id"] = request.parser_id
|
||||
if request.embd_id is not None:
|
||||
request_data["embd_id"] = request.embd_id
|
||||
if request.parser_config is not None:
|
||||
request_data["parser_config"] = request.parser_config
|
||||
|
||||
kb.update(request_data)
|
||||
kb.update(req)
|
||||
|
||||
return get_json_result(data=kb)
|
||||
except Exception as e:
|
||||
@@ -242,6 +238,7 @@ async def detail(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取知识库详情"""
|
||||
try:
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
for tenant in tenants:
|
||||
@@ -257,6 +254,9 @@ async def detail(
|
||||
return get_data_error_result(
|
||||
message="Can't find this knowledgebase!")
|
||||
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
|
||||
for key in ["graphrag_task_finish_at", "raptor_task_finish_at", "mindmap_task_finish_at"]:
|
||||
if finish_at := kb.get(key):
|
||||
kb[key] = finish_at.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return get_json_result(data=kb)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -264,18 +264,22 @@ async def detail(
|
||||
|
||||
@router.post('/list')
|
||||
async def list_kbs(
|
||||
request: ListKnowledgeBasesRequest,
|
||||
keywords: str = Query("", description="关键词"),
|
||||
page: int = Query(0, description="页码"),
|
||||
page_size: int = Query(0, description="每页大小"),
|
||||
parser_id: Optional[str] = Query(None, description="解析器ID"),
|
||||
orderby: str = Query("create_time", description="排序字段"),
|
||||
desc: bool = Query(True, description="是否降序"),
|
||||
query: ListKnowledgeBasesQuery = Depends(),
|
||||
body: Optional[ListKnowledgeBasesBody] = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
page_number = page
|
||||
items_per_page = page_size
|
||||
owner_ids = request.owner_ids
|
||||
"""列出知识库"""
|
||||
if body is None:
|
||||
body = ListKnowledgeBasesBody()
|
||||
|
||||
keywords = query.keywords or ""
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
parser_id = query.parser_id
|
||||
orderby = query.orderby or "create_time"
|
||||
desc = query.desc.lower() == "true" if query.desc else True
|
||||
|
||||
owner_ids = body.owner_ids or [] if body else []
|
||||
try:
|
||||
if not owner_ids:
|
||||
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
||||
@@ -296,11 +300,13 @@ async def list_kbs(
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/rm')
|
||||
async def rm(
|
||||
request: DeleteKnowledgeBaseRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除知识库"""
|
||||
if not KnowledgebaseService.accessible4deletion(request.kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -343,6 +349,7 @@ async def list_tags(
|
||||
kb_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出知识库标签"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -353,17 +360,18 @@ async def list_tags(
|
||||
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
||||
tags = []
|
||||
for tenant in tenants:
|
||||
tags += settings.retrievaler.all_tags(tenant["tenant_id"], [kb_id])
|
||||
tags += settings.retriever.all_tags(tenant["tenant_id"], [kb_id])
|
||||
return get_json_result(data=tags)
|
||||
|
||||
|
||||
@router.get('/tags')
|
||||
async def list_tags_from_kbs(
|
||||
kb_ids: str = Query(..., description="知识库ID列表,用逗号分隔"),
|
||||
kb_ids: str = Query(..., description="知识库ID列表,逗号分隔"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_ids = kb_ids.split(",")
|
||||
for kb_id in kb_ids:
|
||||
"""从多个知识库列出标签"""
|
||||
kb_id_list = kb_ids.split(",")
|
||||
for kb_id in kb_id_list:
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -374,7 +382,7 @@ async def list_tags_from_kbs(
|
||||
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
||||
tags = []
|
||||
for tenant in tenants:
|
||||
tags += settings.retrievaler.all_tags(tenant["tenant_id"], kb_ids)
|
||||
tags += settings.retriever.all_tags(tenant["tenant_id"], kb_id_list)
|
||||
return get_json_result(data=tags)
|
||||
|
||||
|
||||
@@ -384,6 +392,7 @@ async def rm_tags(
|
||||
request: RemoveTagsRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除知识库标签"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -406,6 +415,7 @@ async def rename_tags(
|
||||
request: RenameTagRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""重命名知识库标签"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -426,6 +436,7 @@ async def knowledge_graph(
|
||||
kb_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取知识图谱"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -441,7 +452,7 @@ async def knowledge_graph(
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), kb_id):
|
||||
return get_json_result(data=obj)
|
||||
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [kb_id])
|
||||
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
|
||||
if not len(sres.ids):
|
||||
return get_json_result(data=obj)
|
||||
|
||||
@@ -468,6 +479,7 @@ async def delete_knowledge_graph(
|
||||
kb_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除知识图谱"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -482,18 +494,19 @@ async def delete_knowledge_graph(
|
||||
|
||||
@router.get("/get_meta")
|
||||
async def get_meta(
|
||||
kb_ids: str = Query(..., description="知识库ID列表,用逗号分隔"),
|
||||
kb_ids: str = Query(..., description="知识库ID列表,逗号分隔"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
kb_ids = kb_ids.split(",")
|
||||
for kb_id in kb_ids:
|
||||
"""获取知识库元数据"""
|
||||
kb_id_list = kb_ids.split(",")
|
||||
for kb_id in kb_id_list:
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
return get_json_result(data=DocumentService.get_meta_by_kbs(kb_ids))
|
||||
return get_json_result(data=DocumentService.get_meta_by_kbs(kb_id_list))
|
||||
|
||||
|
||||
@router.get("/basic_info")
|
||||
@@ -501,6 +514,7 @@ async def get_basic_info(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取知识库基本信息"""
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -515,42 +529,45 @@ async def get_basic_info(
|
||||
|
||||
@router.post("/list_pipeline_logs")
|
||||
async def list_pipeline_logs(
|
||||
request: ListPipelineLogsRequest,
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
keywords: str = Query("", description="关键词"),
|
||||
page: int = Query(0, description="页码"),
|
||||
page_size: int = Query(0, description="每页大小"),
|
||||
orderby: str = Query("create_time", description="排序字段"),
|
||||
desc: bool = Query(True, description="是否降序"),
|
||||
create_date_from: str = Query("", description="创建日期开始"),
|
||||
create_date_to: str = Query("", description="创建日期结束"),
|
||||
query: ListPipelineLogsQuery = Depends(),
|
||||
body: Optional[ListPipelineLogsBody] = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not kb_id:
|
||||
"""列出流水线日志"""
|
||||
if not query.kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
page_number = page
|
||||
items_per_page = page_size
|
||||
|
||||
if body is None:
|
||||
body = ListPipelineLogsBody()
|
||||
|
||||
keywords = query.keywords or ""
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
orderby = query.orderby or "create_time"
|
||||
desc = query.desc.lower() == "true" if query.desc else True
|
||||
create_date_from = query.create_date_from or ""
|
||||
create_date_to = query.create_date_to or ""
|
||||
if create_date_to > create_date_from:
|
||||
return get_data_error_result(message="Create data filter is abnormal.")
|
||||
|
||||
operation_status = request.operation_status
|
||||
operation_status = body.operation_status or []
|
||||
if operation_status:
|
||||
invalid_status = {s for s in operation_status if s not in VALID_TASK_STATUS}
|
||||
if invalid_status:
|
||||
return get_data_error_result(message=f"Invalid filter operation_status status conditions: {', '.join(invalid_status)}")
|
||||
|
||||
types = request.types
|
||||
types = body.types or []
|
||||
if types:
|
||||
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||
if invalid_types:
|
||||
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
|
||||
|
||||
suffix = request.suffix
|
||||
suffix = body.suffix or []
|
||||
|
||||
try:
|
||||
logs, tol = PipelineOperationLogService.get_file_logs_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from, create_date_to)
|
||||
logs, tol = PipelineOperationLogService.get_file_logs_by_kb_id(
|
||||
query.kb_id, page_number, items_per_page, orderby, desc, keywords,
|
||||
operation_status, types, suffix, create_date_from, create_date_to)
|
||||
return get_json_result(data={"total": tol, "logs": logs})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -558,33 +575,36 @@ async def list_pipeline_logs(
|
||||
|
||||
@router.post("/list_pipeline_dataset_logs")
|
||||
async def list_pipeline_dataset_logs(
|
||||
request: ListPipelineDatasetLogsRequest,
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
page: int = Query(0, description="页码"),
|
||||
page_size: int = Query(0, description="每页大小"),
|
||||
orderby: str = Query("create_time", description="排序字段"),
|
||||
desc: bool = Query(True, description="是否降序"),
|
||||
create_date_from: str = Query("", description="创建日期开始"),
|
||||
create_date_to: str = Query("", description="创建日期结束"),
|
||||
query: ListPipelineDatasetLogsQuery = Depends(),
|
||||
body: Optional[ListPipelineDatasetLogsBody] = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not kb_id:
|
||||
"""列出流水线数据集日志"""
|
||||
if not query.kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
page_number = page
|
||||
items_per_page = page_size
|
||||
|
||||
if body is None:
|
||||
body = ListPipelineDatasetLogsBody()
|
||||
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
orderby = query.orderby or "create_time"
|
||||
desc = query.desc.lower() == "true" if query.desc else True
|
||||
create_date_from = query.create_date_from or ""
|
||||
create_date_to = query.create_date_to or ""
|
||||
if create_date_to > create_date_from:
|
||||
return get_data_error_result(message="Create data filter is abnormal.")
|
||||
|
||||
operation_status = request.operation_status
|
||||
operation_status = body.operation_status or []
|
||||
if operation_status:
|
||||
invalid_status = {s for s in operation_status if s not in VALID_TASK_STATUS}
|
||||
if invalid_status:
|
||||
return get_data_error_result(message=f"Invalid filter operation_status status conditions: {', '.join(invalid_status)}")
|
||||
|
||||
try:
|
||||
logs, tol = PipelineOperationLogService.get_dataset_logs_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from, create_date_to)
|
||||
logs, tol = PipelineOperationLogService.get_dataset_logs_by_kb_id(
|
||||
query.kb_id, page_number, items_per_page, orderby, desc,
|
||||
operation_status, create_date_from, create_date_to)
|
||||
return get_json_result(data={"total": tol, "logs": logs})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -592,14 +612,18 @@ async def list_pipeline_dataset_logs(
|
||||
|
||||
@router.post("/delete_pipeline_logs")
|
||||
async def delete_pipeline_logs(
|
||||
request: DeletePipelineLogsRequest,
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
query: DeletePipelineLogsQuery = Depends(),
|
||||
body: Optional[DeletePipelineLogsBody] = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
if not kb_id:
|
||||
"""删除流水线日志"""
|
||||
if not query.kb_id:
|
||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
log_ids = request.log_ids
|
||||
if body is None:
|
||||
body = DeletePipelineLogsBody(log_ids=[])
|
||||
|
||||
log_ids = body.log_ids or []
|
||||
|
||||
PipelineOperationLogService.delete_by_ids(log_ids)
|
||||
|
||||
@@ -608,9 +632,10 @@ async def delete_pipeline_logs(
|
||||
|
||||
@router.get("/pipeline_log_detail")
|
||||
async def pipeline_log_detail(
|
||||
log_id: str = Query(..., description="日志ID"),
|
||||
log_id: str = Query(..., description="流水线日志ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取流水线日志详情"""
|
||||
if not log_id:
|
||||
return get_json_result(data=False, message='Lack of "Pipeline log ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||
|
||||
@@ -623,9 +648,10 @@ async def pipeline_log_detail(
|
||||
|
||||
@router.post("/run_graphrag")
|
||||
async def run_graphrag(
|
||||
request: RunGraphRAGRequest,
|
||||
request: RunGraphragRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""运行 GraphRAG"""
|
||||
kb_id = request.kb_id
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
@@ -660,7 +686,7 @@ async def run_graphrag(
|
||||
sample_document = documents[0]
|
||||
document_ids = [document["id"] for document in documents]
|
||||
|
||||
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": task_id}):
|
||||
logging.warning(f"Cannot save graphrag_task_id for kb {kb_id}")
|
||||
@@ -673,6 +699,7 @@ async def trace_graphrag(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""追踪 GraphRAG 任务"""
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
|
||||
@@ -696,6 +723,7 @@ async def run_raptor(
|
||||
request: RunRaptorRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""运行 RAPTOR"""
|
||||
kb_id = request.kb_id
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
@@ -730,7 +758,7 @@ async def run_raptor(
|
||||
sample_document = documents[0]
|
||||
document_ids = [document["id"] for document in documents]
|
||||
|
||||
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": task_id}):
|
||||
logging.warning(f"Cannot save raptor_task_id for kb {kb_id}")
|
||||
@@ -743,6 +771,7 @@ async def trace_raptor(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""追踪 RAPTOR 任务"""
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
|
||||
@@ -766,6 +795,7 @@ async def run_mindmap(
|
||||
request: RunMindmapRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""运行 Mindmap"""
|
||||
kb_id = request.kb_id
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
@@ -800,7 +830,7 @@ async def run_mindmap(
|
||||
sample_document = documents[0]
|
||||
document_ids = [document["id"] for document in documents]
|
||||
|
||||
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="mindmap", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="mindmap", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, {"mindmap_task_id": task_id}):
|
||||
logging.warning(f"Cannot save mindmap_task_id for kb {kb_id}")
|
||||
@@ -813,6 +843,7 @@ async def trace_mindmap(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""追踪 Mindmap 任务"""
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
|
||||
@@ -834,33 +865,43 @@ async def trace_mindmap(
|
||||
@router.delete("/unbind_task")
|
||||
async def delete_kb_task(
|
||||
kb_id: str = Query(..., description="知识库ID"),
|
||||
pipeline_task_type: str = Query(..., description="管道任务类型"),
|
||||
pipeline_task_type: str = Query(..., description="流水线任务类型"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""解绑任务"""
|
||||
if not kb_id:
|
||||
return get_error_data_result(message='Lack of "KB ID"')
|
||||
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||
if not ok:
|
||||
return get_json_result(data=True)
|
||||
|
||||
if not pipeline_task_type or pipeline_task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
|
||||
return get_error_data_result(message="Invalid task type")
|
||||
|
||||
match pipeline_task_type:
|
||||
case PipelineTaskType.GRAPH_RAG:
|
||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
|
||||
kb_task_id = "graphrag_task_id"
|
||||
kb_task_id_field = "graphrag_task_id"
|
||||
task_id = kb.graphrag_task_id
|
||||
kb_task_finish_at = "graphrag_task_finish_at"
|
||||
case PipelineTaskType.RAPTOR:
|
||||
kb_task_id = "raptor_task_id"
|
||||
kb_task_id_field = "raptor_task_id"
|
||||
task_id = kb.raptor_task_id
|
||||
kb_task_finish_at = "raptor_task_finish_at"
|
||||
case PipelineTaskType.MINDMAP:
|
||||
kb_task_id = "mindmap_task_id"
|
||||
kb_task_id_field = "mindmap_task_id"
|
||||
task_id = kb.mindmap_task_id
|
||||
kb_task_finish_at = "mindmap_task_finish_at"
|
||||
case _:
|
||||
return get_error_data_result(message="Internal Error: Invalid task type")
|
||||
|
||||
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id: "", kb_task_finish_at: None})
|
||||
def cancel_task(task_id):
|
||||
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
||||
cancel_task(task_id)
|
||||
|
||||
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id_field: "", kb_task_finish_at: None})
|
||||
if not ok:
|
||||
return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}")
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@@ -15,26 +15,36 @@
|
||||
#
|
||||
import logging
|
||||
import json
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.llm_models import (
|
||||
SetApiKeyRequest,
|
||||
AddLLMRequest,
|
||||
DeleteLLMRequest,
|
||||
DeleteFactoryRequest,
|
||||
MyLLMsQuery,
|
||||
ListLLMsQuery,
|
||||
)
|
||||
from api.db.services.tenant_llm_service import LLMFactoriesService, TenantLLMService
|
||||
from api.db.services.llm_service import LLMService
|
||||
from api import settings
|
||||
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result
|
||||
from api.utils.api_utils import server_error_response, get_data_error_result
|
||||
from api.db import StatusEnum, LLMType
|
||||
from api.db.db_models import TenantLLM
|
||||
from api.utils.api_utils import get_json_result
|
||||
from api.utils.base64_image import test_image
|
||||
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
|
||||
from api.models.llm_models import SetApiKeyRequest, AddLLMRequest, DeleteLLMRequest, DeleteFactoryRequest
|
||||
from api.utils.api_utils import get_current_user
|
||||
|
||||
# 创建 FastAPI 路由器
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get('/factories')
|
||||
async def factories(current_user = Depends(get_current_user)):
|
||||
async def factories(
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取 LLM 工厂列表"""
|
||||
try:
|
||||
fac = LLMFactoriesService.get_all()
|
||||
fac = [f.to_dict() for f in fac if f.name not in ["Youdao", "FastEmbed", "BAAI"]]
|
||||
@@ -55,17 +65,22 @@ async def factories(current_user = Depends(get_current_user)):
|
||||
|
||||
|
||||
@router.post('/set_api_key')
|
||||
async def set_api_key(request: SetApiKeyRequest, current_user = Depends(get_current_user)):
|
||||
async def set_api_key(
|
||||
request: SetApiKeyRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""设置 API Key"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
# test if api key works
|
||||
chat_passed, embd_passed, rerank_passed = False, False, False
|
||||
factory = request.llm_factory
|
||||
factory = req["llm_factory"]
|
||||
extra = {"provider": factory}
|
||||
msg = ""
|
||||
for llm in LLMService.query(fid=factory):
|
||||
if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
|
||||
assert factory in EmbeddingModel, f"Embedding model from {factory} is not supported yet."
|
||||
mdl = EmbeddingModel[factory](
|
||||
request.api_key, llm.llm_name, base_url=request.base_url)
|
||||
req["api_key"], llm.llm_name, base_url=req.get("base_url", ""))
|
||||
try:
|
||||
arr, tc = mdl.encode(["Test if the api key is available"])
|
||||
if len(arr[0]) == 0:
|
||||
@@ -76,7 +91,7 @@ async def set_api_key(request: SetApiKeyRequest, current_user = Depends(get_curr
|
||||
elif not chat_passed and llm.model_type == LLMType.CHAT.value:
|
||||
assert factory in ChatModel, f"Chat model from {factory} is not supported yet."
|
||||
mdl = ChatModel[factory](
|
||||
request.api_key, llm.llm_name, base_url=request.base_url, **extra)
|
||||
req["api_key"], llm.llm_name, base_url=req.get("base_url", ""), **extra)
|
||||
try:
|
||||
m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}],
|
||||
{"temperature": 0.9, 'max_tokens': 50})
|
||||
@@ -89,7 +104,7 @@ async def set_api_key(request: SetApiKeyRequest, current_user = Depends(get_curr
|
||||
elif not rerank_passed and llm.model_type == LLMType.RERANK:
|
||||
assert factory in RerankModel, f"Re-rank model from {factory} is not supported yet."
|
||||
mdl = RerankModel[factory](
|
||||
request.api_key, llm.llm_name, base_url=request.base_url)
|
||||
req["api_key"], llm.llm_name, base_url=req.get("base_url", ""))
|
||||
try:
|
||||
arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"])
|
||||
if len(arr) == 0 or tc == 0:
|
||||
@@ -107,9 +122,12 @@ async def set_api_key(request: SetApiKeyRequest, current_user = Depends(get_curr
|
||||
return get_data_error_result(message=msg)
|
||||
|
||||
llm_config = {
|
||||
"api_key": request.api_key,
|
||||
"api_base": request.base_url or ""
|
||||
"api_key": req["api_key"],
|
||||
"api_base": req.get("base_url", "")
|
||||
}
|
||||
for n in ["model_type", "llm_name"]:
|
||||
if n in req:
|
||||
llm_config[n] = req[n]
|
||||
|
||||
for llm in LLMService.query(fid=factory):
|
||||
llm_config["max_tokens"]=llm.max_tokens
|
||||
@@ -132,14 +150,19 @@ async def set_api_key(request: SetApiKeyRequest, current_user = Depends(get_curr
|
||||
|
||||
|
||||
@router.post('/add_llm')
|
||||
async def add_llm(request: AddLLMRequest, current_user = Depends(get_current_user)):
|
||||
factory = request.llm_factory
|
||||
api_key = request.api_key or "x"
|
||||
llm_name = request.llm_name
|
||||
async def add_llm(
|
||||
request: AddLLMRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""添加 LLM"""
|
||||
req = request.model_dump(exclude_unset=True)
|
||||
factory = req["llm_factory"]
|
||||
api_key = req.get("api_key", "x")
|
||||
llm_name = req.get("llm_name")
|
||||
|
||||
def apikey_json(keys):
|
||||
nonlocal request
|
||||
return json.dumps({k: getattr(request, k, "") for k in keys})
|
||||
nonlocal req
|
||||
return json.dumps({k: req.get(k, "") for k in keys})
|
||||
|
||||
if factory == "VolcEngine":
|
||||
# For VolcEngine, due to its special authentication method
|
||||
@@ -147,21 +170,28 @@ async def add_llm(request: AddLLMRequest, current_user = Depends(get_current_use
|
||||
api_key = apikey_json(["ark_api_key", "endpoint_id"])
|
||||
|
||||
elif factory == "Tencent Hunyuan":
|
||||
# Create a temporary request object for set_api_key
|
||||
temp_request = SetApiKeyRequest(
|
||||
llm_factory=factory,
|
||||
api_key=apikey_json(["hunyuan_sid", "hunyuan_sk"]),
|
||||
base_url=request.api_base
|
||||
req["api_key"] = apikey_json(["hunyuan_sid", "hunyuan_sk"])
|
||||
# 创建 SetApiKeyRequest 并调用 set_api_key 逻辑
|
||||
set_api_key_req = SetApiKeyRequest(
|
||||
llm_factory=req["llm_factory"],
|
||||
api_key=req["api_key"],
|
||||
base_url=req.get("api_base", req.get("base_url", "")),
|
||||
model_type=req.get("model_type"),
|
||||
llm_name=req.get("llm_name")
|
||||
)
|
||||
return await set_api_key(temp_request, current_user)
|
||||
return await set_api_key(set_api_key_req, current_user)
|
||||
|
||||
elif factory == "Tencent Cloud":
|
||||
temp_request = SetApiKeyRequest(
|
||||
llm_factory=factory,
|
||||
api_key=apikey_json(["tencent_cloud_sid", "tencent_cloud_sk"]),
|
||||
base_url=request.api_base
|
||||
req["api_key"] = apikey_json(["tencent_cloud_sid", "tencent_cloud_sk"])
|
||||
# 创建 SetApiKeyRequest 并调用 set_api_key 逻辑
|
||||
set_api_key_req = SetApiKeyRequest(
|
||||
llm_factory=req["llm_factory"],
|
||||
api_key=req["api_key"],
|
||||
base_url=req.get("api_base", req.get("base_url", "")),
|
||||
model_type=req.get("model_type"),
|
||||
llm_name=req.get("llm_name")
|
||||
)
|
||||
return await set_api_key(temp_request, current_user)
|
||||
return await set_api_key(set_api_key_req, current_user)
|
||||
|
||||
elif factory == "Bedrock":
|
||||
# For Bedrock, due to its special authentication method
|
||||
@@ -181,9 +211,9 @@ async def add_llm(request: AddLLMRequest, current_user = Depends(get_current_use
|
||||
llm_name += "___VLLM"
|
||||
|
||||
elif factory == "XunFei Spark":
|
||||
if request.model_type == "chat":
|
||||
api_key = request.spark_api_password or ""
|
||||
elif request.model_type == "tts":
|
||||
if req["model_type"] == "chat":
|
||||
api_key = req.get("spark_api_password", "")
|
||||
elif req["model_type"] == "tts":
|
||||
api_key = apikey_json(["spark_app_id", "spark_api_secret", "spark_api_key"])
|
||||
|
||||
elif factory == "BaiduYiyan":
|
||||
@@ -198,14 +228,17 @@ async def add_llm(request: AddLLMRequest, current_user = Depends(get_current_use
|
||||
elif factory == "Azure-OpenAI":
|
||||
api_key = apikey_json(["api_key", "api_version"])
|
||||
|
||||
elif factory == "OpenRouter":
|
||||
api_key = apikey_json(["api_key", "provider_order"])
|
||||
|
||||
llm = {
|
||||
"tenant_id": current_user.id,
|
||||
"llm_factory": factory,
|
||||
"model_type": request.model_type,
|
||||
"model_type": req["model_type"],
|
||||
"llm_name": llm_name,
|
||||
"api_base": request.api_base or "",
|
||||
"api_base": req.get("api_base", ""),
|
||||
"api_key": api_key,
|
||||
"max_tokens": request.max_tokens
|
||||
"max_tokens": req.get("max_tokens")
|
||||
}
|
||||
|
||||
msg = ""
|
||||
@@ -295,7 +328,11 @@ async def add_llm(request: AddLLMRequest, current_user = Depends(get_current_use
|
||||
|
||||
|
||||
@router.post('/delete_llm')
|
||||
async def delete_llm(request: DeleteLLMRequest, current_user = Depends(get_current_user)):
|
||||
async def delete_llm(
|
||||
request: DeleteLLMRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除 LLM"""
|
||||
TenantLLMService.filter_delete(
|
||||
[TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == request.llm_factory,
|
||||
TenantLLM.llm_name == request.llm_name])
|
||||
@@ -303,7 +340,11 @@ async def delete_llm(request: DeleteLLMRequest, current_user = Depends(get_curre
|
||||
|
||||
|
||||
@router.post('/delete_factory')
|
||||
async def delete_factory(request: DeleteFactoryRequest, current_user = Depends(get_current_user)):
|
||||
async def delete_factory(
|
||||
request: DeleteFactoryRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除工厂"""
|
||||
TenantLLMService.filter_delete(
|
||||
[TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == request.llm_factory])
|
||||
return get_json_result(data=True)
|
||||
@@ -311,10 +352,13 @@ async def delete_factory(request: DeleteFactoryRequest, current_user = Depends(g
|
||||
|
||||
@router.get('/my_llms')
|
||||
async def my_llms(
|
||||
include_details: bool = Query(False, description="是否包含详细信息"),
|
||||
query: MyLLMsQuery = Depends(),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取我的 LLMs"""
|
||||
try:
|
||||
include_details = query.include_details.lower() == 'true'
|
||||
|
||||
if include_details:
|
||||
res = {}
|
||||
objs = TenantLLMService.query(tenant_id=current_user.id)
|
||||
@@ -362,11 +406,13 @@ async def my_llms(
|
||||
|
||||
@router.get('/list')
|
||||
async def list_app(
|
||||
model_type: Optional[str] = Query(None, description="模型类型"),
|
||||
query: ListLLMsQuery = Depends(),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出 LLMs"""
|
||||
self_deployed = ["Youdao", "FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio", "GPUStack"]
|
||||
weighted = ["Youdao", "FastEmbed", "BAAI"] if settings.LIGHTEN != 0 else []
|
||||
model_type = query.model_type
|
||||
try:
|
||||
objs = TenantLLMService.query(tenant_id=current_user.id)
|
||||
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
|
||||
|
||||
@@ -13,164 +13,61 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
from api.utils.api_utils import security
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from api.apps.models.mcp_models import (
|
||||
ListMCPServersQuery,
|
||||
ListMCPServersBody,
|
||||
CreateMCPServerRequest,
|
||||
UpdateMCPServerRequest,
|
||||
DeleteMCPServersRequest,
|
||||
ImportMCPServersRequest,
|
||||
ExportMCPServersRequest,
|
||||
ListMCPToolsRequest,
|
||||
TestMCPToolRequest,
|
||||
CacheMCPToolsRequest,
|
||||
TestMCPRequest,
|
||||
)
|
||||
|
||||
from api import settings
|
||||
from api.db import VALID_MCP_SERVER_TYPES
|
||||
from api.db.db_models import MCPServer, User
|
||||
from api.db.db_models import MCPServer
|
||||
from api.db.services.mcp_server_service import MCPServerService
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.settings import RetCode
|
||||
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, get_mcp_tools
|
||||
from api.utils.web_utils import get_float, safe_json_parse
|
||||
from api.utils.web_utils import safe_json_parse
|
||||
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
|
||||
from pydantic import BaseModel
|
||||
|
||||
# Security
|
||||
|
||||
# Pydantic models for request/response
|
||||
class ListMCPRequest(BaseModel):
|
||||
mcp_ids: List[str] = []
|
||||
|
||||
class CreateMCPRequest(BaseModel):
|
||||
name: str
|
||||
url: str
|
||||
server_type: str
|
||||
headers: Optional[Dict[str, Any]] = {}
|
||||
variables: Optional[Dict[str, Any]] = {}
|
||||
timeout: Optional[float] = 10
|
||||
|
||||
class UpdateMCPRequest(BaseModel):
|
||||
mcp_id: str
|
||||
name: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
server_type: Optional[str] = None
|
||||
headers: Optional[Dict[str, Any]] = None
|
||||
variables: Optional[Dict[str, Any]] = None
|
||||
timeout: Optional[float] = 10
|
||||
|
||||
class RemoveMCPRequest(BaseModel):
|
||||
mcp_ids: List[str]
|
||||
|
||||
class ImportMCPRequest(BaseModel):
|
||||
mcpServers: Dict[str, Dict[str, Any]]
|
||||
timeout: Optional[float] = 10
|
||||
|
||||
class ExportMCPRequest(BaseModel):
|
||||
mcp_ids: List[str]
|
||||
|
||||
|
||||
class ListToolsRequest(BaseModel):
|
||||
mcp_ids: List[str]
|
||||
timeout: Optional[float] = 10
|
||||
|
||||
|
||||
class TestToolRequest(BaseModel):
|
||||
mcp_id: str
|
||||
tool_name: str
|
||||
arguments: Dict[str, Any]
|
||||
timeout: Optional[float] = 10
|
||||
|
||||
|
||||
class CacheToolsRequest(BaseModel):
|
||||
mcp_id: str
|
||||
tools: List[Dict[str, Any]]
|
||||
|
||||
|
||||
class TestMCPRequest(BaseModel):
|
||||
url: str
|
||||
server_type: str
|
||||
timeout: Optional[float] = 10
|
||||
headers: Optional[Dict[str, Any]] = {}
|
||||
variables: Optional[Dict[str, Any]] = {}
|
||||
|
||||
# Dependency injection
|
||||
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
"""获取当前用户"""
|
||||
from api.db import StatusEnum
|
||||
from api.db.services.user_service import UserService
|
||||
from fastapi import HTTPException, status
|
||||
import logging
|
||||
|
||||
try:
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
except ImportError:
|
||||
# 如果没有itsdangerous,使用jwt作为替代
|
||||
import jwt
|
||||
Serializer = jwt
|
||||
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"User {user[0].email} has empty access_token in database"
|
||||
)
|
||||
return user[0]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header required"
|
||||
)
|
||||
|
||||
# Create router
|
||||
# 创建路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/list")
|
||||
async def list_mcp(
|
||||
request: ListMCPRequest,
|
||||
keywords: str = Query("", description="Search keywords"),
|
||||
page: int = Query(0, description="Page number"),
|
||||
page_size: int = Query(0, description="Items per page"),
|
||||
orderby: str = Query("create_time", description="Order by field"),
|
||||
desc: bool = Query(True, description="Sort descending"),
|
||||
current_user: User = Depends(get_current_user)
|
||||
query: ListMCPServersQuery = Depends(),
|
||||
body: ListMCPServersBody = None,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出MCP服务器"""
|
||||
if body is None:
|
||||
body = ListMCPServersBody()
|
||||
|
||||
keywords = query.keywords or ""
|
||||
page_number = int(query.page or 0)
|
||||
items_per_page = int(query.page_size or 0)
|
||||
orderby = query.orderby or "create_time"
|
||||
desc = query.desc.lower() == "true" if query.desc else True
|
||||
|
||||
mcp_ids = body.mcp_ids or []
|
||||
try:
|
||||
servers = MCPServerService.get_servers(current_user.id, request.mcp_ids, 0, 0, orderby, desc, keywords) or []
|
||||
servers = MCPServerService.get_servers(current_user.id, mcp_ids, 0, 0, orderby, desc, keywords) or []
|
||||
total = len(servers)
|
||||
|
||||
if page and page_size:
|
||||
servers = servers[(page - 1) * page_size : page * page_size]
|
||||
if page_number and items_per_page:
|
||||
servers = servers[(page_number - 1) * items_per_page : page_number * items_per_page]
|
||||
|
||||
return get_json_result(data={"mcp_servers": servers, "total": total})
|
||||
except Exception as e:
|
||||
@@ -179,9 +76,10 @@ async def list_mcp(
|
||||
|
||||
@router.get("/detail")
|
||||
async def detail(
|
||||
mcp_id: str = Query(..., description="MCP server ID"),
|
||||
current_user: User = Depends(get_current_user)
|
||||
mcp_id: str = Query(..., description="MCP服务器ID"),
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""获取MCP服务器详情"""
|
||||
try:
|
||||
mcp_server = MCPServerService.get_or_none(id=mcp_id, tenant_id=current_user.id)
|
||||
|
||||
@@ -195,9 +93,10 @@ async def detail(
|
||||
|
||||
@router.post("/create")
|
||||
async def create(
|
||||
request: CreateMCPRequest,
|
||||
current_user: User = Depends(get_current_user)
|
||||
request: CreateMCPServerRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""创建MCP服务器"""
|
||||
server_type = request.server_type
|
||||
if server_type not in VALID_MCP_SERVER_TYPES:
|
||||
return get_data_error_result(message="Unsupported MCP server type.")
|
||||
@@ -218,10 +117,10 @@ async def create(
|
||||
variables = safe_json_parse(request.variables or {})
|
||||
variables.pop("tools", None)
|
||||
|
||||
timeout = request.timeout or 10
|
||||
timeout = request.timeout or 10.0
|
||||
|
||||
try:
|
||||
req_data = {
|
||||
req = {
|
||||
"id": get_uuid(),
|
||||
"tenant_id": current_user.id,
|
||||
"name": server_name,
|
||||
@@ -229,7 +128,6 @@ async def create(
|
||||
"server_type": server_type,
|
||||
"headers": headers,
|
||||
"variables": variables,
|
||||
"timeout": timeout
|
||||
}
|
||||
|
||||
e, _ = TenantService.get_by_id(current_user.id)
|
||||
@@ -244,46 +142,45 @@ async def create(
|
||||
tools = server_tools[server_name]
|
||||
tools = {tool["name"]: tool for tool in tools if isinstance(tool, dict) and "name" in tool}
|
||||
variables["tools"] = tools
|
||||
req_data["variables"] = variables
|
||||
req["variables"] = variables
|
||||
|
||||
if not MCPServerService.insert(**req_data):
|
||||
if not MCPServerService.insert(**req):
|
||||
return get_data_error_result("Failed to create MCP server.")
|
||||
|
||||
return get_json_result(data=req_data)
|
||||
return get_json_result(data=req)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post("/update")
|
||||
async def update(
|
||||
request: UpdateMCPRequest,
|
||||
current_user: User = Depends(get_current_user)
|
||||
request: UpdateMCPServerRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""更新MCP服务器"""
|
||||
mcp_id = request.mcp_id
|
||||
e, mcp_server = MCPServerService.get_by_id(mcp_id)
|
||||
if not e or mcp_server.tenant_id != current_user.id:
|
||||
return get_data_error_result(message=f"Cannot find MCP server {mcp_id} for user {current_user.id}")
|
||||
|
||||
server_type = request.server_type or mcp_server.server_type
|
||||
server_type = request.server_type if request.server_type is not None else mcp_server.server_type
|
||||
if server_type and server_type not in VALID_MCP_SERVER_TYPES:
|
||||
return get_data_error_result(message="Unsupported MCP server type.")
|
||||
|
||||
server_name = request.name or mcp_server.name
|
||||
server_name = request.name if request.name is not None else mcp_server.name
|
||||
if server_name and len(server_name.encode("utf-8")) > 255:
|
||||
return get_data_error_result(message=f"Invalid MCP name or length is {len(server_name)} which is large than 255.")
|
||||
|
||||
url = request.url or mcp_server.url
|
||||
url = request.url if request.url is not None else mcp_server.url
|
||||
if not url:
|
||||
return get_data_error_result(message="Invalid url.")
|
||||
|
||||
headers = safe_json_parse(request.headers or mcp_server.headers)
|
||||
variables = safe_json_parse(request.variables or mcp_server.variables)
|
||||
headers = safe_json_parse(request.headers if request.headers is not None else mcp_server.headers)
|
||||
variables = safe_json_parse(request.variables if request.variables is not None else mcp_server.variables)
|
||||
variables.pop("tools", None)
|
||||
|
||||
timeout = request.timeout or 10
|
||||
timeout = request.timeout or 10.0
|
||||
|
||||
try:
|
||||
req_data = {
|
||||
req = {
|
||||
"tenant_id": current_user.id,
|
||||
"id": mcp_id,
|
||||
"name": server_name,
|
||||
@@ -291,7 +188,6 @@ async def update(
|
||||
"server_type": server_type,
|
||||
"headers": headers,
|
||||
"variables": variables,
|
||||
"timeout": timeout
|
||||
}
|
||||
|
||||
mcp_server = MCPServer(id=server_name, name=server_name, url=url, server_type=server_type, variables=variables, headers=headers)
|
||||
@@ -302,12 +198,12 @@ async def update(
|
||||
tools = server_tools[server_name]
|
||||
tools = {tool["name"]: tool for tool in tools if isinstance(tool, dict) and "name" in tool}
|
||||
variables["tools"] = tools
|
||||
req_data["variables"] = variables
|
||||
req["variables"] = variables
|
||||
|
||||
if not MCPServerService.filter_update([MCPServer.id == mcp_id, MCPServer.tenant_id == current_user.id], req_data):
|
||||
if not MCPServerService.filter_update([MCPServer.id == mcp_id, MCPServer.tenant_id == current_user.id], req):
|
||||
return get_data_error_result(message="Failed to updated MCP server.")
|
||||
|
||||
e, updated_mcp = MCPServerService.get_by_id(req_data["id"])
|
||||
e, updated_mcp = MCPServerService.get_by_id(req["id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Failed to fetch updated MCP server.")
|
||||
|
||||
@@ -318,9 +214,10 @@ async def update(
|
||||
|
||||
@router.post("/rm")
|
||||
async def rm(
|
||||
request: RemoveMCPRequest,
|
||||
current_user: User = Depends(get_current_user)
|
||||
request: DeleteMCPServersRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""删除MCP服务器"""
|
||||
mcp_ids = request.mcp_ids
|
||||
|
||||
try:
|
||||
@@ -334,14 +231,15 @@ async def rm(
|
||||
|
||||
@router.post("/import")
|
||||
async def import_multiple(
|
||||
request: ImportMCPRequest,
|
||||
current_user: User = Depends(get_current_user)
|
||||
request: ImportMCPServersRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""批量导入MCP服务器"""
|
||||
servers = request.mcpServers
|
||||
if not servers:
|
||||
return get_data_error_result(message="No MCP servers provided.")
|
||||
|
||||
timeout = request.timeout or 10
|
||||
timeout = request.timeout or 10.0
|
||||
|
||||
results = []
|
||||
try:
|
||||
@@ -401,9 +299,10 @@ async def import_multiple(
|
||||
|
||||
@router.post("/export")
|
||||
async def export_multiple(
|
||||
request: ExportMCPRequest,
|
||||
current_user: User = Depends(get_current_user)
|
||||
request: ExportMCPServersRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""批量导出MCP服务器"""
|
||||
mcp_ids = request.mcp_ids
|
||||
|
||||
if not mcp_ids:
|
||||
@@ -432,12 +331,16 @@ async def export_multiple(
|
||||
|
||||
|
||||
@router.post("/list_tools")
|
||||
async def list_tools(req: ListToolsRequest, current_user: User = Depends(get_current_user)):
|
||||
mcp_ids = req.mcp_ids
|
||||
async def list_tools(
|
||||
request: ListMCPToolsRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""列出MCP工具"""
|
||||
mcp_ids = request.mcp_ids
|
||||
if not mcp_ids:
|
||||
return get_data_error_result(message="No MCP server IDs provided.")
|
||||
|
||||
timeout = req.timeout
|
||||
timeout = request.timeout or 10.0
|
||||
|
||||
results = {}
|
||||
tool_call_sessions = []
|
||||
@@ -476,15 +379,19 @@ async def list_tools(req: ListToolsRequest, current_user: User = Depends(get_cur
|
||||
|
||||
|
||||
@router.post("/test_tool")
|
||||
async def test_tool(req: TestToolRequest, current_user: User = Depends(get_current_user)):
|
||||
mcp_id = req.mcp_id
|
||||
async def test_tool(
|
||||
request: TestMCPToolRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""测试MCP工具"""
|
||||
mcp_id = request.mcp_id
|
||||
if not mcp_id:
|
||||
return get_data_error_result(message="No MCP server ID provided.")
|
||||
|
||||
timeout = req.timeout
|
||||
timeout = request.timeout or 10.0
|
||||
|
||||
tool_name = req.tool_name
|
||||
arguments = req.arguments
|
||||
tool_name = request.tool_name
|
||||
arguments = request.arguments
|
||||
if not all([tool_name, arguments]):
|
||||
return get_data_error_result(message="Require provide tool name and arguments.")
|
||||
|
||||
@@ -506,11 +413,15 @@ async def test_tool(req: TestToolRequest, current_user: User = Depends(get_curre
|
||||
|
||||
|
||||
@router.post("/cache_tools")
|
||||
async def cache_tool(req: CacheToolsRequest, current_user: User = Depends(get_current_user)):
|
||||
mcp_id = req.mcp_id
|
||||
async def cache_tool(
|
||||
request: CacheMCPToolsRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""缓存MCP工具"""
|
||||
mcp_id = request.mcp_id
|
||||
if not mcp_id:
|
||||
return get_data_error_result(message="No MCP server ID provided.")
|
||||
tools = req.tools
|
||||
tools = request.tools
|
||||
|
||||
e, mcp_server = MCPServerService.get_by_id(mcp_id)
|
||||
if not e or mcp_server.tenant_id != current_user.id:
|
||||
@@ -527,18 +438,21 @@ async def cache_tool(req: CacheToolsRequest, current_user: User = Depends(get_cu
|
||||
|
||||
|
||||
@router.post("/test_mcp")
|
||||
async def test_mcp(req: TestMCPRequest):
|
||||
url = req.url
|
||||
async def test_mcp(
|
||||
request: TestMCPRequest
|
||||
):
|
||||
"""测试MCP服务器(不需要登录)"""
|
||||
url = request.url
|
||||
if not url:
|
||||
return get_data_error_result(message="Invalid MCP url.")
|
||||
|
||||
server_type = req.server_type
|
||||
server_type = request.server_type
|
||||
if server_type not in VALID_MCP_SERVER_TYPES:
|
||||
return get_data_error_result(message="Unsupported MCP server type.")
|
||||
|
||||
timeout = req.timeout
|
||||
headers = req.headers
|
||||
variables = req.variables
|
||||
timeout = request.timeout or 10.0
|
||||
headers = safe_json_parse(request.headers or {})
|
||||
variables = safe_json_parse(request.variables or {})
|
||||
|
||||
mcp_server = MCPServer(id=f"{server_type}: {url}", server_type=server_type, url=url, headers=headers, variables=variables)
|
||||
|
||||
|
||||
53
api/apps/models/auth_dependencies.py
Normal file
53
api/apps/models/auth_dependencies.py
Normal file
@@ -0,0 +1,53 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional
|
||||
from fastapi import Depends, Header, Security, HTTPException, status
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from api import settings
|
||||
from api.utils.api_utils import get_json_result
|
||||
|
||||
# 创建 HTTPBearer 安全方案(auto_error=False 允许我们自定义错误处理)
|
||||
http_bearer = HTTPBearer(auto_error=False)
|
||||
|
||||
|
||||
def get_current_user(credentials: Optional[HTTPAuthorizationCredentials] = Security(http_bearer)):
|
||||
"""FastAPI 依赖注入:获取当前用户(替代 Flask 的 login_required 和 current_user)
|
||||
|
||||
使用 Security(http_bearer) 可以让 FastAPI 自动在 OpenAPI schema 中添加安全要求,
|
||||
这样 Swagger UI 就会显示授权输入框并自动在请求中添加 Authorization 头。
|
||||
"""
|
||||
# 延迟导入以避免循环导入
|
||||
from api.apps.__init___fastapi import get_current_user_from_token
|
||||
|
||||
if not credentials:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header is required"
|
||||
)
|
||||
|
||||
# HTTPBearer 已经提取了 Bearer token,credentials.credentials 就是 token 本身
|
||||
authorization = credentials.credentials
|
||||
|
||||
user = get_current_user_from_token(authorization)
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token"
|
||||
)
|
||||
|
||||
return user
|
||||
|
||||
129
api/apps/models/canvas_models.py
Normal file
129
api/apps/models/canvas_models.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, List, Dict, Any, Union
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DeleteCanvasRequest(BaseModel):
|
||||
"""删除画布请求"""
|
||||
canvas_ids: List[str] = Field(..., description="画布ID列表")
|
||||
|
||||
|
||||
class SaveCanvasRequest(BaseModel):
|
||||
"""保存画布请求"""
|
||||
dsl: Union[str, Dict[str, Any]] = Field(..., description="DSL配置")
|
||||
title: str = Field(..., description="画布标题")
|
||||
id: Optional[str] = Field(default=None, description="画布ID(更新时提供)")
|
||||
canvas_category: Optional[str] = Field(default=None, description="画布类别")
|
||||
description: Optional[str] = Field(default=None, description="描述")
|
||||
permission: Optional[str] = Field(default=None, description="权限")
|
||||
avatar: Optional[str] = Field(default=None, description="头像")
|
||||
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
"""完成/运行画布请求"""
|
||||
id: str = Field(..., description="画布ID")
|
||||
query: Optional[str] = Field(default="", description="查询内容")
|
||||
files: Optional[List[str]] = Field(default=[], description="文件列表")
|
||||
inputs: Optional[Dict[str, Any]] = Field(default={}, description="输入参数")
|
||||
user_id: Optional[str] = Field(default=None, description="用户ID")
|
||||
|
||||
|
||||
class RerunRequest(BaseModel):
|
||||
"""重新运行请求"""
|
||||
id: str = Field(..., description="流水线ID")
|
||||
dsl: Dict[str, Any] = Field(..., description="DSL配置")
|
||||
component_id: str = Field(..., description="组件ID")
|
||||
|
||||
|
||||
class ResetCanvasRequest(BaseModel):
|
||||
"""重置画布请求"""
|
||||
id: str = Field(..., description="画布ID")
|
||||
|
||||
|
||||
class InputFormQuery(BaseModel):
|
||||
"""获取输入表单查询参数"""
|
||||
id: str = Field(..., description="画布ID")
|
||||
component_id: str = Field(..., description="组件ID")
|
||||
|
||||
|
||||
class DebugRequest(BaseModel):
|
||||
"""调试请求"""
|
||||
id: str = Field(..., description="画布ID")
|
||||
component_id: str = Field(..., description="组件ID")
|
||||
params: Dict[str, Any] = Field(..., description="参数")
|
||||
|
||||
|
||||
class TestDBConnectRequest(BaseModel):
|
||||
"""测试数据库连接请求"""
|
||||
db_type: str = Field(..., description="数据库类型")
|
||||
database: str = Field(..., description="数据库名")
|
||||
username: str = Field(..., description="用户名")
|
||||
host: str = Field(..., description="主机")
|
||||
port: str = Field(..., description="端口")
|
||||
password: str = Field(..., description="密码")
|
||||
|
||||
|
||||
class ListCanvasQuery(BaseModel):
|
||||
"""列出画布查询参数"""
|
||||
keywords: Optional[str] = Field(default="", description="关键词")
|
||||
page: Optional[int] = Field(default=0, description="页码")
|
||||
page_size: Optional[int] = Field(default=0, description="每页大小")
|
||||
orderby: Optional[str] = Field(default="create_time", description="排序字段")
|
||||
desc: Optional[str] = Field(default="true", description="是否降序")
|
||||
canvas_category: Optional[str] = Field(default=None, description="画布类别")
|
||||
owner_ids: Optional[str] = Field(default="", description="所有者ID列表(逗号分隔)")
|
||||
|
||||
|
||||
class SettingRequest(BaseModel):
|
||||
"""画布设置请求"""
|
||||
id: str = Field(..., description="画布ID")
|
||||
title: str = Field(..., description="标题")
|
||||
permission: str = Field(..., description="权限")
|
||||
description: Optional[str] = Field(default=None, description="描述")
|
||||
avatar: Optional[str] = Field(default=None, description="头像")
|
||||
|
||||
|
||||
class TraceQuery(BaseModel):
|
||||
"""追踪查询参数"""
|
||||
canvas_id: str = Field(..., description="画布ID")
|
||||
message_id: str = Field(..., description="消息ID")
|
||||
|
||||
|
||||
class ListSessionsQuery(BaseModel):
|
||||
"""列出会话查询参数"""
|
||||
user_id: Optional[str] = Field(default=None, description="用户ID")
|
||||
page: Optional[int] = Field(default=1, description="页码")
|
||||
page_size: Optional[int] = Field(default=30, description="每页大小")
|
||||
keywords: Optional[str] = Field(default=None, description="关键词")
|
||||
from_date: Optional[str] = Field(default=None, description="开始日期")
|
||||
to_date: Optional[str] = Field(default=None, description="结束日期")
|
||||
orderby: Optional[str] = Field(default="update_time", description="排序字段")
|
||||
desc: Optional[str] = Field(default="true", description="是否降序")
|
||||
dsl: Optional[str] = Field(default="true", description="是否包含DSL")
|
||||
|
||||
|
||||
class DownloadQuery(BaseModel):
|
||||
"""下载查询参数"""
|
||||
id: str = Field(..., description="文件ID")
|
||||
created_by: str = Field(..., description="创建者ID")
|
||||
|
||||
|
||||
class UploadQuery(BaseModel):
|
||||
"""上传查询参数"""
|
||||
url: Optional[str] = Field(default=None, description="URL(可选,用于从URL下载)")
|
||||
|
||||
80
api/apps/models/chunk_models.py
Normal file
80
api/apps/models/chunk_models.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, List, Any, Union
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
class ListChunksRequest(BaseModel):
|
||||
"""列出文档块请求"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
page: Optional[int] = Field(default=1, description="页码")
|
||||
size: Optional[int] = Field(default=30, description="每页大小")
|
||||
keywords: Optional[str] = Field(default="", description="关键词")
|
||||
available_int: Optional[int] = Field(default=None, description="可用状态")
|
||||
|
||||
|
||||
class SetChunkRequest(BaseModel):
|
||||
"""设置文档块请求"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
chunk_id: str = Field(..., description="块ID")
|
||||
content_with_weight: str = Field(..., description="内容")
|
||||
important_kwd: Optional[List[str]] = Field(default=None, description="重要关键词列表")
|
||||
question_kwd: Optional[List[str]] = Field(default=None, description="问题关键词列表")
|
||||
tag_kwd: Optional[str] = Field(default=None, description="标签关键词")
|
||||
tag_feas: Optional[Any] = Field(default=None, description="标签特征")
|
||||
available_int: Optional[int] = Field(default=None, description="可用状态")
|
||||
|
||||
|
||||
class SwitchChunksRequest(BaseModel):
|
||||
"""切换文档块状态请求"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
chunk_ids: List[str] = Field(..., description="块ID列表")
|
||||
available_int: int = Field(..., description="可用状态")
|
||||
|
||||
|
||||
class DeleteChunksRequest(BaseModel):
|
||||
"""删除文档块请求"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
chunk_ids: List[str] = Field(..., description="块ID列表")
|
||||
|
||||
|
||||
class CreateChunkRequest(BaseModel):
|
||||
"""创建文档块请求"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
content_with_weight: str = Field(..., description="内容")
|
||||
important_kwd: Optional[List[str]] = Field(default=[], description="重要关键词列表")
|
||||
question_kwd: Optional[List[str]] = Field(default=[], description="问题关键词列表")
|
||||
tag_feas: Optional[Any] = Field(default=None, description="标签特征")
|
||||
|
||||
|
||||
class RetrievalTestRequest(BaseModel):
|
||||
"""检索测试请求"""
|
||||
kb_id: Union[str, List[str]] = Field(..., description="知识库ID,可以是字符串或列表")
|
||||
question: str = Field(..., description="问题")
|
||||
page: Optional[int] = Field(default=1, description="页码")
|
||||
size: Optional[int] = Field(default=30, description="每页大小")
|
||||
doc_ids: Optional[List[str]] = Field(default=[], description="文档ID列表")
|
||||
use_kg: Optional[bool] = Field(default=False, description="是否使用知识图谱")
|
||||
top_k: Optional[int] = Field(default=1024, description="Top K")
|
||||
cross_languages: Optional[List[str]] = Field(default=[], description="跨语言列表")
|
||||
search_id: Optional[str] = Field(default="", description="搜索ID")
|
||||
rerank_id: Optional[str] = Field(default=None, description="重排序模型ID")
|
||||
keyword: Optional[bool] = Field(default=False, description="是否使用关键词")
|
||||
similarity_threshold: Optional[float] = Field(default=0.0, description="相似度阈值")
|
||||
vector_similarity_weight: Optional[float] = Field(default=0.3, description="向量相似度权重")
|
||||
highlight: Optional[bool] = Field(default=False, description="是否高亮")
|
||||
|
||||
204
api/apps/models/document_models.py
Normal file
204
api/apps/models/document_models.py
Normal file
@@ -0,0 +1,204 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, Literal, List
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
class CreateDocumentRequest(BaseModel):
|
||||
"""创建文档请求
|
||||
|
||||
支持两种解析类型:
|
||||
- parse_type=1: 使用内置解析器,需要 parser_id,pipeline_id 为空
|
||||
- parse_type=2: 使用自定义 pipeline,需要 pipeline_id,parser_id 为空
|
||||
如果不提供 parse_type,则从知识库继承解析配置
|
||||
"""
|
||||
name: str
|
||||
kb_id: str
|
||||
parse_type: Optional[Literal[1, 2]] = Field(default=None, description="解析类型:1=内置解析器,2=自定义pipeline,None=从知识库继承")
|
||||
parser_id: Optional[str] = Field(default="", description="解析器ID,parse_type=1时必需")
|
||||
pipeline_id: Optional[str] = Field(default="", description="流水线ID,parse_type=2时必需")
|
||||
parser_config: Optional[dict] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_parse_type_fields(self):
|
||||
"""根据 parse_type 验证相应字段"""
|
||||
if self.parse_type is not None:
|
||||
if self.parse_type == 1:
|
||||
# parse_type=1: 需要 parser_id,pipeline_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if parser_id_val.strip() == "":
|
||||
raise ValueError("parse_type=1时,parser_id不能为空")
|
||||
if pipeline_id_val.strip() != "":
|
||||
raise ValueError("parse_type=1时,pipeline_id必须为空")
|
||||
elif self.parse_type == 2:
|
||||
# parse_type=2: 需要 pipeline_id,parser_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if pipeline_id_val.strip() == "":
|
||||
raise ValueError("parse_type=2时,pipeline_id不能为空")
|
||||
if parser_id_val.strip() != "":
|
||||
raise ValueError("parse_type=2时,parser_id必须为空")
|
||||
return self
|
||||
|
||||
|
||||
class ChangeParserRequest(BaseModel):
|
||||
"""修改文档解析器请求
|
||||
|
||||
支持两种解析类型:
|
||||
- parse_type=1: 使用内置解析器,需要 parser_id,pipeline_id 为空
|
||||
- parse_type=2: 使用自定义 pipeline,需要 pipeline_id,parser_id 为空
|
||||
"""
|
||||
doc_id: str
|
||||
parse_type: Literal[1, 2] = Field(..., description="解析类型:1=内置解析器,2=自定义pipeline")
|
||||
parser_id: Optional[str] = Field(default="", description="解析器ID,parse_type=1时必需")
|
||||
pipeline_id: Optional[str] = Field(default="", description="流水线ID,parse_type=2时必需")
|
||||
parser_config: Optional[dict] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_parse_type_fields(self):
|
||||
"""根据 parse_type 验证相应字段"""
|
||||
if self.parse_type == 1:
|
||||
# parse_type=1: 需要 parser_id,pipeline_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if parser_id_val.strip() == "":
|
||||
raise ValueError("parse_type=1时,parser_id不能为空")
|
||||
if pipeline_id_val.strip() != "":
|
||||
raise ValueError("parse_type=1时,pipeline_id必须为空")
|
||||
elif self.parse_type == 2:
|
||||
# parse_type=2: 需要 pipeline_id,parser_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if pipeline_id_val.strip() == "":
|
||||
raise ValueError("parse_type=2时,pipeline_id不能为空")
|
||||
if parser_id_val.strip() != "":
|
||||
raise ValueError("parse_type=2时,parser_id必须为空")
|
||||
return self
|
||||
|
||||
|
||||
class WebCrawlRequest(BaseModel):
|
||||
"""网页爬取请求"""
|
||||
kb_id: str
|
||||
name: str
|
||||
url: str
|
||||
|
||||
|
||||
class ListDocumentsQuery(BaseModel):
|
||||
"""列出文档查询参数"""
|
||||
kb_id: str
|
||||
keywords: Optional[str] = ""
|
||||
page: Optional[int] = 0
|
||||
page_size: Optional[int] = 0
|
||||
orderby: Optional[str] = "create_time"
|
||||
desc: Optional[str] = "true"
|
||||
create_time_from: Optional[int] = 0
|
||||
create_time_to: Optional[int] = 0
|
||||
|
||||
|
||||
class ListDocumentsBody(BaseModel):
|
||||
"""列出文档请求体"""
|
||||
run_status: Optional[List[str]] = []
|
||||
types: Optional[List[str]] = []
|
||||
suffix: Optional[List[str]] = []
|
||||
|
||||
|
||||
class FilterDocumentsRequest(BaseModel):
|
||||
"""过滤文档请求"""
|
||||
kb_id: str
|
||||
keywords: Optional[str] = ""
|
||||
suffix: Optional[List[str]] = []
|
||||
run_status: Optional[List[str]] = []
|
||||
types: Optional[List[str]] = []
|
||||
|
||||
|
||||
class GetDocumentInfosRequest(BaseModel):
|
||||
"""获取文档信息请求"""
|
||||
doc_ids: List[str]
|
||||
|
||||
|
||||
class ChangeStatusRequest(BaseModel):
|
||||
"""修改文档状态请求"""
|
||||
doc_ids: List[str]
|
||||
status: str # "0" 或 "1"
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_status(self):
|
||||
if self.status not in ["0", "1"]:
|
||||
raise ValueError('Status must be either 0 or 1!')
|
||||
return self
|
||||
|
||||
|
||||
class DeleteDocumentRequest(BaseModel):
|
||||
"""删除文档请求"""
|
||||
doc_id: str | List[str] # 支持单个或列表
|
||||
|
||||
|
||||
class RunDocumentRequest(BaseModel):
|
||||
"""运行文档解析请求"""
|
||||
doc_ids: List[str]
|
||||
run: str # TaskStatus 值
|
||||
delete: Optional[bool] = False
|
||||
|
||||
|
||||
class RenameDocumentRequest(BaseModel):
|
||||
"""重命名文档请求"""
|
||||
doc_id: str
|
||||
name: str
|
||||
|
||||
|
||||
class ChangeParserSimpleRequest(BaseModel):
|
||||
"""简单修改解析器请求(兼容旧逻辑)"""
|
||||
doc_id: str
|
||||
parser_id: Optional[str] = None
|
||||
pipeline_id: Optional[str] = None
|
||||
parser_config: Optional[dict] = None
|
||||
|
||||
|
||||
class UploadAndParseRequest(BaseModel):
|
||||
"""上传并解析请求(仅用于验证 conversation_id)"""
|
||||
conversation_id: str
|
||||
|
||||
|
||||
class ParseRequest(BaseModel):
|
||||
"""解析请求"""
|
||||
url: Optional[str] = None
|
||||
|
||||
|
||||
class SetMetaRequest(BaseModel):
|
||||
"""设置元数据请求"""
|
||||
doc_id: str
|
||||
meta: str # JSON 字符串
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_meta(self):
|
||||
import json
|
||||
try:
|
||||
meta_dict = json.loads(self.meta)
|
||||
if not isinstance(meta_dict, dict):
|
||||
raise ValueError("Only dictionary type supported.")
|
||||
for k, v in meta_dict.items():
|
||||
if not isinstance(v, (str, int, float)):
|
||||
raise ValueError(f"The type is not supported: {v}")
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Json syntax error: {e}")
|
||||
return self
|
||||
|
||||
159
api/apps/models/kb_models.py
Normal file
159
api/apps/models/kb_models.py
Normal file
@@ -0,0 +1,159 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, List, Dict, Any, Literal
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
class CreateKnowledgeBaseRequest(BaseModel):
|
||||
"""创建知识库请求
|
||||
|
||||
支持两种解析类型:
|
||||
- parse_type=1: 使用内置解析器,需要 parser_id,pipeline_id 为空
|
||||
- parse_type=2: 使用自定义 pipeline,需要 pipeline_id,parser_id 为空
|
||||
"""
|
||||
name: str
|
||||
parse_type: Literal[1, 2] = Field(..., description="解析类型:1=内置解析器,2=自定义pipeline")
|
||||
embd_id: str = Field(..., description="嵌入模型ID")
|
||||
parser_id: Optional[str] = Field(default="", description="解析器ID,parse_type=1时必需")
|
||||
pipeline_id: Optional[str] = Field(default="", description="流水线ID,parse_type=2时必需")
|
||||
description: Optional[str] = None
|
||||
pagerank: Optional[int] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_parse_type_fields(self):
|
||||
"""根据 parse_type 验证相应字段"""
|
||||
if self.parse_type == 1:
|
||||
# parse_type=1: 需要 parser_id,pipeline_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if parser_id_val.strip() == "":
|
||||
raise ValueError("parse_type=1时,parser_id不能为空")
|
||||
if pipeline_id_val.strip() != "":
|
||||
raise ValueError("parse_type=1时,pipeline_id必须为空")
|
||||
elif self.parse_type == 2:
|
||||
# parse_type=2: 需要 pipeline_id,parser_id 必须为空
|
||||
parser_id_val = self.parser_id or ""
|
||||
pipeline_id_val = self.pipeline_id or ""
|
||||
|
||||
if pipeline_id_val.strip() == "":
|
||||
raise ValueError("parse_type=2时,pipeline_id不能为空")
|
||||
if parser_id_val.strip() != "":
|
||||
raise ValueError("parse_type=2时,parser_id必须为空")
|
||||
return self
|
||||
|
||||
|
||||
class UpdateKnowledgeBaseRequest(BaseModel):
|
||||
"""更新知识库请求"""
|
||||
kb_id: str
|
||||
name: str
|
||||
description: str
|
||||
parser_id: str
|
||||
pagerank: Optional[int] = None
|
||||
# 其他可选字段,但排除 id, tenant_id, created_by, create_time, update_time, create_date, update_date
|
||||
|
||||
|
||||
class DeleteKnowledgeBaseRequest(BaseModel):
|
||||
"""删除知识库请求"""
|
||||
kb_id: str
|
||||
|
||||
|
||||
class ListKnowledgeBasesQuery(BaseModel):
|
||||
"""列出知识库查询参数"""
|
||||
keywords: Optional[str] = ""
|
||||
page: Optional[int] = 0
|
||||
page_size: Optional[int] = 0
|
||||
parser_id: Optional[str] = None
|
||||
orderby: Optional[str] = "create_time"
|
||||
desc: Optional[str] = "true"
|
||||
|
||||
|
||||
class ListKnowledgeBasesBody(BaseModel):
|
||||
"""列出知识库请求体"""
|
||||
owner_ids: Optional[List[str]] = []
|
||||
|
||||
|
||||
class RemoveTagsRequest(BaseModel):
|
||||
"""删除标签请求"""
|
||||
tags: List[str]
|
||||
|
||||
|
||||
class RenameTagRequest(BaseModel):
|
||||
"""重命名标签请求"""
|
||||
from_tag: str
|
||||
to_tag: str
|
||||
|
||||
|
||||
class ListPipelineLogsQuery(BaseModel):
|
||||
"""列出流水线日志查询参数"""
|
||||
kb_id: str
|
||||
keywords: Optional[str] = ""
|
||||
page: Optional[int] = 0
|
||||
page_size: Optional[int] = 0
|
||||
orderby: Optional[str] = "create_time"
|
||||
desc: Optional[str] = "true"
|
||||
create_date_from: Optional[str] = ""
|
||||
create_date_to: Optional[str] = ""
|
||||
|
||||
|
||||
class ListPipelineLogsBody(BaseModel):
|
||||
"""列出流水线日志请求体"""
|
||||
operation_status: Optional[List[str]] = []
|
||||
types: Optional[List[str]] = []
|
||||
suffix: Optional[List[str]] = []
|
||||
|
||||
|
||||
class ListPipelineDatasetLogsQuery(BaseModel):
|
||||
"""列出流水线数据集日志查询参数"""
|
||||
kb_id: str
|
||||
page: Optional[int] = 0
|
||||
page_size: Optional[int] = 0
|
||||
orderby: Optional[str] = "create_time"
|
||||
desc: Optional[str] = "true"
|
||||
create_date_from: Optional[str] = ""
|
||||
create_date_to: Optional[str] = ""
|
||||
|
||||
|
||||
class ListPipelineDatasetLogsBody(BaseModel):
|
||||
"""列出流水线数据集日志请求体"""
|
||||
operation_status: Optional[List[str]] = []
|
||||
|
||||
|
||||
class DeletePipelineLogsQuery(BaseModel):
|
||||
"""删除流水线日志查询参数"""
|
||||
kb_id: str
|
||||
|
||||
|
||||
class DeletePipelineLogsBody(BaseModel):
|
||||
"""删除流水线日志请求体"""
|
||||
log_ids: List[str]
|
||||
|
||||
|
||||
class RunGraphragRequest(BaseModel):
|
||||
"""运行 GraphRAG 请求"""
|
||||
kb_id: str
|
||||
|
||||
|
||||
class RunRaptorRequest(BaseModel):
|
||||
"""运行 RAPTOR 请求"""
|
||||
kb_id: str
|
||||
|
||||
|
||||
class RunMindmapRequest(BaseModel):
|
||||
"""运行 Mindmap 请求"""
|
||||
kb_id: str
|
||||
|
||||
101
api/apps/models/llm_models.py
Normal file
101
api/apps/models/llm_models.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SetApiKeyRequest(BaseModel):
|
||||
"""设置 API Key 请求"""
|
||||
llm_factory: str = Field(..., description="LLM 工厂名称")
|
||||
api_key: str = Field(..., description="API Key")
|
||||
base_url: Optional[str] = Field(default="", description="API Base URL")
|
||||
model_type: Optional[str] = Field(default=None, description="模型类型")
|
||||
llm_name: Optional[str] = Field(default=None, description="LLM 名称")
|
||||
|
||||
|
||||
class AddLLMRequest(BaseModel):
|
||||
"""添加 LLM 请求"""
|
||||
llm_factory: str = Field(..., description="LLM 工厂名称")
|
||||
model_type: str = Field(..., description="模型类型")
|
||||
llm_name: str = Field(..., description="LLM 名称")
|
||||
api_key: Optional[str] = Field(default="x", description="API Key")
|
||||
api_base: Optional[str] = Field(default="", description="API Base URL")
|
||||
max_tokens: Optional[int] = Field(default=None, description="最大 Token 数")
|
||||
|
||||
# VolcEngine 特殊字段
|
||||
ark_api_key: Optional[str] = None
|
||||
endpoint_id: Optional[str] = None
|
||||
|
||||
# Tencent Hunyuan 特殊字段
|
||||
hunyuan_sid: Optional[str] = None
|
||||
hunyuan_sk: Optional[str] = None
|
||||
|
||||
# Tencent Cloud 特殊字段
|
||||
tencent_cloud_sid: Optional[str] = None
|
||||
tencent_cloud_sk: Optional[str] = None
|
||||
|
||||
# Bedrock 特殊字段
|
||||
bedrock_ak: Optional[str] = None
|
||||
bedrock_sk: Optional[str] = None
|
||||
bedrock_region: Optional[str] = None
|
||||
|
||||
# XunFei Spark 特殊字段
|
||||
spark_api_password: Optional[str] = None
|
||||
spark_app_id: Optional[str] = None
|
||||
spark_api_secret: Optional[str] = None
|
||||
spark_api_key: Optional[str] = None
|
||||
|
||||
# BaiduYiyan 特殊字段
|
||||
yiyan_ak: Optional[str] = None
|
||||
yiyan_sk: Optional[str] = None
|
||||
|
||||
# Fish Audio 特殊字段
|
||||
fish_audio_ak: Optional[str] = None
|
||||
fish_audio_refid: Optional[str] = None
|
||||
|
||||
# Google Cloud 特殊字段
|
||||
google_project_id: Optional[str] = None
|
||||
google_region: Optional[str] = None
|
||||
google_service_account_key: Optional[str] = None
|
||||
|
||||
# Azure-OpenAI 特殊字段
|
||||
api_version: Optional[str] = None
|
||||
|
||||
# OpenRouter 特殊字段
|
||||
provider_order: Optional[str] = None
|
||||
|
||||
|
||||
class DeleteLLMRequest(BaseModel):
|
||||
"""删除 LLM 请求"""
|
||||
llm_factory: str = Field(..., description="LLM 工厂名称")
|
||||
llm_name: str = Field(..., description="LLM 名称")
|
||||
|
||||
|
||||
class DeleteFactoryRequest(BaseModel):
|
||||
"""删除工厂请求"""
|
||||
llm_factory: str = Field(..., description="LLM 工厂名称")
|
||||
|
||||
|
||||
class MyLLMsQuery(BaseModel):
|
||||
"""获取我的 LLMs 查询参数"""
|
||||
include_details: Optional[str] = Field(default="false", description="是否包含详细信息")
|
||||
|
||||
|
||||
class ListLLMsQuery(BaseModel):
|
||||
"""列出 LLMs 查询参数"""
|
||||
model_type: Optional[str] = Field(default=None, description="模型类型过滤")
|
||||
|
||||
99
api/apps/models/mcp_models.py
Normal file
99
api/apps/models/mcp_models.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ListMCPServersQuery(BaseModel):
|
||||
"""列出MCP服务器查询参数"""
|
||||
keywords: Optional[str] = Field(default="", description="关键词")
|
||||
page: Optional[int] = Field(default=0, description="页码")
|
||||
page_size: Optional[int] = Field(default=0, description="每页大小")
|
||||
orderby: Optional[str] = Field(default="create_time", description="排序字段")
|
||||
desc: Optional[str] = Field(default="true", description="是否降序")
|
||||
|
||||
|
||||
class ListMCPServersBody(BaseModel):
|
||||
"""列出MCP服务器请求体"""
|
||||
mcp_ids: Optional[List[str]] = Field(default=[], description="MCP服务器ID列表")
|
||||
|
||||
|
||||
class CreateMCPServerRequest(BaseModel):
|
||||
"""创建MCP服务器请求"""
|
||||
name: str = Field(..., description="服务器名称")
|
||||
url: str = Field(..., description="服务器URL")
|
||||
server_type: str = Field(..., description="服务器类型")
|
||||
headers: Optional[Dict[str, Any]] = Field(default={}, description="请求头")
|
||||
variables: Optional[Dict[str, Any]] = Field(default={}, description="变量")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
|
||||
class UpdateMCPServerRequest(BaseModel):
|
||||
"""更新MCP服务器请求"""
|
||||
mcp_id: str = Field(..., description="MCP服务器ID")
|
||||
name: Optional[str] = Field(default=None, description="服务器名称")
|
||||
url: Optional[str] = Field(default=None, description="服务器URL")
|
||||
server_type: Optional[str] = Field(default=None, description="服务器类型")
|
||||
headers: Optional[Dict[str, Any]] = Field(default=None, description="请求头")
|
||||
variables: Optional[Dict[str, Any]] = Field(default=None, description="变量")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
|
||||
class DeleteMCPServersRequest(BaseModel):
|
||||
"""删除MCP服务器请求"""
|
||||
mcp_ids: List[str] = Field(..., description="MCP服务器ID列表")
|
||||
|
||||
|
||||
class ImportMCPServersRequest(BaseModel):
|
||||
"""批量导入MCP服务器请求"""
|
||||
mcpServers: Dict[str, Any] = Field(..., description="MCP服务器配置字典")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
|
||||
class ExportMCPServersRequest(BaseModel):
|
||||
"""批量导出MCP服务器请求"""
|
||||
mcp_ids: List[str] = Field(..., description="MCP服务器ID列表")
|
||||
|
||||
|
||||
class ListMCPToolsRequest(BaseModel):
|
||||
"""列出MCP工具请求"""
|
||||
mcp_ids: List[str] = Field(..., description="MCP服务器ID列表")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
|
||||
class TestMCPToolRequest(BaseModel):
|
||||
"""测试MCP工具请求"""
|
||||
mcp_id: str = Field(..., description="MCP服务器ID")
|
||||
tool_name: str = Field(..., description="工具名称")
|
||||
arguments: Dict[str, Any] = Field(..., description="工具参数")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
|
||||
class CacheMCPToolsRequest(BaseModel):
|
||||
"""缓存MCP工具请求"""
|
||||
mcp_id: str = Field(..., description="MCP服务器ID")
|
||||
tools: List[Dict[str, Any]] = Field(..., description="工具列表")
|
||||
|
||||
|
||||
class TestMCPRequest(BaseModel):
|
||||
"""测试MCP服务器请求(不需要登录)"""
|
||||
url: str = Field(..., description="服务器URL")
|
||||
server_type: str = Field(..., description="服务器类型")
|
||||
headers: Optional[Dict[str, Any]] = Field(default={}, description="请求头")
|
||||
variables: Optional[Dict[str, Any]] = Field(default={}, description="变量")
|
||||
timeout: Optional[float] = Field(default=10.0, description="超时时间")
|
||||
|
||||
@@ -1,8 +1,26 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from flask import Response
|
||||
from flask_login import login_required
|
||||
from api.utils.api_utils import get_json_result
|
||||
from plugin import GlobalPluginManager
|
||||
|
||||
|
||||
@manager.route('/llm_tools', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def llm_tools() -> Response:
|
||||
|
||||
@@ -25,6 +25,7 @@ from api.utils.api_utils import get_data_error_result, get_error_data_result, ge
|
||||
from api.utils.api_utils import get_result
|
||||
from flask import request
|
||||
|
||||
|
||||
@manager.route('/agents', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def list_agents(tenant_id):
|
||||
@@ -41,7 +42,7 @@ def list_agents(tenant_id):
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
canvas = UserCanvasService.get_list(tenant_id,page_number,items_per_page,orderby,desc,id,title)
|
||||
canvas = UserCanvasService.get_list(tenant_id, page_number, items_per_page, orderby, desc, id, title)
|
||||
return get_result(data=canvas)
|
||||
|
||||
|
||||
@@ -93,7 +94,7 @@ def update_agent(tenant_id: str, agent_id: str):
|
||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
|
||||
req["dsl"] = json.loads(req["dsl"])
|
||||
|
||||
|
||||
if req.get("title") is not None:
|
||||
req["title"] = req["title"].strip()
|
||||
|
||||
|
||||
@@ -215,7 +215,8 @@ def delete(tenant_id):
|
||||
continue
|
||||
kb_id_instance_pairs.append((kb_id, kb))
|
||||
if len(error_kb_ids) > 0:
|
||||
return get_error_permission_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
||||
return get_error_permission_result(
|
||||
message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
||||
|
||||
errors = []
|
||||
success_count = 0
|
||||
@@ -232,7 +233,8 @@ def delete(tenant_id):
|
||||
]
|
||||
)
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
||||
FileService.filter_delete(
|
||||
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
||||
if not KnowledgebaseService.delete_by_id(kb_id):
|
||||
errors.append(f"Delete dataset error for {kb_id}")
|
||||
continue
|
||||
@@ -329,7 +331,8 @@ def update(tenant_id, dataset_id):
|
||||
try:
|
||||
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
|
||||
if kb is None:
|
||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
|
||||
return get_error_permission_result(
|
||||
message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
|
||||
|
||||
if req.get("parser_config"):
|
||||
req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"])
|
||||
@@ -341,7 +344,8 @@ def update(tenant_id, dataset_id):
|
||||
del req["parser_config"]
|
||||
|
||||
if "name" in req and req["name"].lower() != kb.name.lower():
|
||||
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
||||
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id,
|
||||
status=StatusEnum.VALID.value)
|
||||
if exists:
|
||||
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
|
||||
|
||||
@@ -349,7 +353,8 @@ def update(tenant_id, dataset_id):
|
||||
if not req["embd_id"]:
|
||||
req["embd_id"] = kb.embd_id
|
||||
if kb.chunk_num != 0 and req["embd_id"] != kb.embd_id:
|
||||
return get_error_data_result(message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
|
||||
return get_error_data_result(
|
||||
message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
|
||||
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
|
||||
if not ok:
|
||||
return err
|
||||
@@ -359,10 +364,12 @@ def update(tenant_id, dataset_id):
|
||||
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
|
||||
|
||||
if req["pagerank"] > 0:
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
|
||||
search.index_name(kb.tenant_id), kb.id)
|
||||
else:
|
||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
|
||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
|
||||
search.index_name(kb.tenant_id), kb.id)
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, req):
|
||||
return get_error_data_result(message="Update dataset error.(Database error)")
|
||||
@@ -454,7 +461,7 @@ def list_datasets(tenant_id):
|
||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
|
||||
|
||||
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
||||
kbs = KnowledgebaseService.get_list(
|
||||
kbs, total = KnowledgebaseService.get_list(
|
||||
[m["tenant_id"] for m in tenants],
|
||||
tenant_id,
|
||||
args["page"],
|
||||
@@ -468,14 +475,15 @@ def list_datasets(tenant_id):
|
||||
response_data_list = []
|
||||
for kb in kbs:
|
||||
response_data_list.append(remap_dictionary_keys(kb))
|
||||
return get_result(data=response_data_list)
|
||||
return get_result(data=response_data_list, total=total)
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
|
||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def knowledge_graph(tenant_id,dataset_id):
|
||||
def knowledge_graph(tenant_id, dataset_id):
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
@@ -491,7 +499,7 @@ def knowledge_graph(tenant_id,dataset_id):
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
|
||||
return get_result(data=obj)
|
||||
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||
if not len(sres.ids):
|
||||
return get_result(data=obj)
|
||||
|
||||
@@ -507,14 +515,16 @@ def knowledge_graph(tenant_id,dataset_id):
|
||||
if "nodes" in obj["graph"]:
|
||||
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
|
||||
if "edges" in obj["graph"]:
|
||||
node_id_set = { o["id"] for o in obj["graph"]["nodes"] }
|
||||
filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
|
||||
node_id_set = {o["id"] for o in obj["graph"]["nodes"]}
|
||||
filtered_edges = [o for o in obj["graph"]["edges"] if
|
||||
o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
|
||||
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
|
||||
return get_result(data=obj)
|
||||
|
||||
|
||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
|
||||
@token_required
|
||||
def delete_knowledge_graph(tenant_id,dataset_id):
|
||||
def delete_knowledge_graph(tenant_id, dataset_id):
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
@@ -522,6 +532,7 @@ def delete_knowledge_graph(tenant_id,dataset_id):
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
_, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id)
|
||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]},
|
||||
search.index_name(kb.tenant_id), dataset_id)
|
||||
|
||||
return get_result(data=True)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -31,6 +31,89 @@ from api.db.services.dialog_service import meta_filter, convert_conditions
|
||||
@apikey_required
|
||||
@validate_request("knowledge_id", "query")
|
||||
def retrieval(tenant_id):
|
||||
"""
|
||||
Dify-compatible retrieval API
|
||||
---
|
||||
tags:
|
||||
- SDK
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: body
|
||||
name: body
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- knowledge_id
|
||||
- query
|
||||
properties:
|
||||
knowledge_id:
|
||||
type: string
|
||||
description: Knowledge base ID
|
||||
query:
|
||||
type: string
|
||||
description: Query text
|
||||
use_kg:
|
||||
type: boolean
|
||||
description: Whether to use knowledge graph
|
||||
default: false
|
||||
retrieval_setting:
|
||||
type: object
|
||||
description: Retrieval configuration
|
||||
properties:
|
||||
score_threshold:
|
||||
type: number
|
||||
description: Similarity threshold
|
||||
default: 0.0
|
||||
top_k:
|
||||
type: integer
|
||||
description: Number of results to return
|
||||
default: 1024
|
||||
metadata_condition:
|
||||
type: object
|
||||
description: Metadata filter condition
|
||||
properties:
|
||||
conditions:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: Field name
|
||||
comparison_operator:
|
||||
type: string
|
||||
description: Comparison operator
|
||||
value:
|
||||
type: string
|
||||
description: Field value
|
||||
responses:
|
||||
200:
|
||||
description: Retrieval succeeded
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
records:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
description: Content text
|
||||
score:
|
||||
type: number
|
||||
description: Similarity score
|
||||
title:
|
||||
type: string
|
||||
description: Document title
|
||||
metadata:
|
||||
type: object
|
||||
description: Metadata info
|
||||
404:
|
||||
description: Knowledge base or document not found
|
||||
"""
|
||||
req = request.json
|
||||
question = req["query"]
|
||||
kb_id = req["knowledge_id"]
|
||||
@@ -38,9 +121,9 @@ def retrieval(tenant_id):
|
||||
retrieval_setting = req.get("retrieval_setting", {})
|
||||
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
||||
top = int(retrieval_setting.get("top_k", 1024))
|
||||
metadata_condition = req.get("metadata_condition",{})
|
||||
metadata_condition = req.get("metadata_condition", {})
|
||||
metas = DocumentService.get_meta_by_kbs([kb_id])
|
||||
|
||||
|
||||
doc_ids = []
|
||||
try:
|
||||
|
||||
@@ -50,12 +133,12 @@ def retrieval(tenant_id):
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||
print(metadata_condition)
|
||||
print("after",convert_conditions(metadata_condition))
|
||||
# print("after", convert_conditions(metadata_condition))
|
||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
|
||||
print("doc_ids",doc_ids)
|
||||
# print("doc_ids", doc_ids)
|
||||
if not doc_ids and metadata_condition is not None:
|
||||
doc_ids = ['-999']
|
||||
ranks = settings.retrievaler.retrieval(
|
||||
ranks = settings.retriever.retrieval(
|
||||
question,
|
||||
embd_mdl,
|
||||
kb.tenant_id,
|
||||
@@ -70,17 +153,17 @@ def retrieval(tenant_id):
|
||||
)
|
||||
|
||||
if use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(question,
|
||||
[tenant_id],
|
||||
[kb_id],
|
||||
embd_mdl,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
ck = settings.kg_retriever.retrieval(question,
|
||||
[tenant_id],
|
||||
[kb_id],
|
||||
embd_mdl,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
|
||||
records = []
|
||||
for c in ranks["chunks"]:
|
||||
e, doc = DocumentService.get_by_id( c["doc_id"])
|
||||
e, doc = DocumentService.get_by_id(c["doc_id"])
|
||||
c.pop("vector", None)
|
||||
meta = getattr(doc, 'meta_fields', {})
|
||||
meta["doc_id"] = c["doc_id"]
|
||||
@@ -100,5 +183,3 @@ def retrieval(tenant_id):
|
||||
)
|
||||
logging.exception(e)
|
||||
return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)
|
||||
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ class Chunk(BaseModel):
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
async def upload(dataset_id, tenant_id):
|
||||
def upload(dataset_id, tenant_id):
|
||||
"""
|
||||
Upload documents to a dataset.
|
||||
---
|
||||
@@ -151,7 +151,7 @@ async def upload(dataset_id, tenant_id):
|
||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not e:
|
||||
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
||||
err, files = await FileService.upload_document(kb, file_objs, tenant_id)
|
||||
err, files = FileService.upload_document(kb, file_objs, tenant_id)
|
||||
if err:
|
||||
return get_result(message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
|
||||
# rename key's name
|
||||
@@ -470,6 +470,20 @@ def list_docs(dataset_id, tenant_id):
|
||||
required: false
|
||||
default: 0
|
||||
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
|
||||
- in: query
|
||||
name: suffix
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by file suffix (e.g., ["pdf", "txt", "docx"]).
|
||||
- in: query
|
||||
name: run
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by document run status. Supports both numeric ("0", "1", "2", "3", "4") and text formats ("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL").
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
@@ -512,63 +526,62 @@ def list_docs(dataset_id, tenant_id):
|
||||
description: Processing status.
|
||||
"""
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
id = request.args.get("id")
|
||||
name = request.args.get("name")
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
|
||||
if id and not DocumentService.query(id=id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
q = request.args
|
||||
document_id = q.get("id")
|
||||
name = q.get("name")
|
||||
|
||||
if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {document_id}.")
|
||||
if name and not DocumentService.query(name=name, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {name}.")
|
||||
|
||||
page = int(request.args.get("page", 1))
|
||||
keywords = request.args.get("keywords", "")
|
||||
page_size = int(request.args.get("page_size", 30))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
if request.args.get("desc") == "False":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
|
||||
page = int(q.get("page", 1))
|
||||
page_size = int(q.get("page_size", 30))
|
||||
orderby = q.get("orderby", "create_time")
|
||||
desc = str(q.get("desc", "true")).strip().lower() != "false"
|
||||
keywords = q.get("keywords", "")
|
||||
|
||||
create_time_from = int(request.args.get("create_time_from", 0))
|
||||
create_time_to = int(request.args.get("create_time_to", 0))
|
||||
# filters - align with OpenAPI parameter names
|
||||
suffix = q.getlist("suffix")
|
||||
run_status = q.getlist("run")
|
||||
create_time_from = int(q.get("create_time_from", 0))
|
||||
create_time_to = int(q.get("create_time_to", 0))
|
||||
|
||||
# map run status (accept text or numeric) - align with API parameter
|
||||
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
|
||||
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
|
||||
|
||||
docs, total = DocumentService.get_list(
|
||||
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
|
||||
)
|
||||
|
||||
# time range filter (0 means no bound)
|
||||
if create_time_from or create_time_to:
|
||||
filtered_docs = []
|
||||
for doc in docs:
|
||||
doc_create_time = doc.get("create_time", 0)
|
||||
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
|
||||
filtered_docs.append(doc)
|
||||
docs = filtered_docs
|
||||
docs = [
|
||||
d for d in docs
|
||||
if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
|
||||
and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
|
||||
]
|
||||
|
||||
# rename key's name
|
||||
renamed_doc_list = []
|
||||
# rename keys + map run status back to text for output
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method",
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL",
|
||||
}
|
||||
for doc in docs:
|
||||
renamed_doc = {}
|
||||
for key, value in doc.items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
renamed_doc_list.append(renamed_doc)
|
||||
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
||||
run_status_numeric_to_text = {"0": "UNSTART", "1": "RUNNING", "2": "CANCEL", "3": "DONE", "4": "FAIL"}
|
||||
|
||||
output_docs = []
|
||||
for d in docs:
|
||||
renamed_doc = {key_mapping.get(k, k): v for k, v in d.items()}
|
||||
if "run" in d:
|
||||
renamed_doc["run"] = run_status_numeric_to_text.get(str(d["run"]), d["run"])
|
||||
output_docs.append(renamed_doc)
|
||||
|
||||
return get_result(data={"total": total, "docs": output_docs})
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
@@ -982,7 +995,7 @@ def list_chunks(tenant_id, dataset_id, document_id):
|
||||
_ = Chunk(**final_chunk)
|
||||
|
||||
elif settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
|
||||
sres = settings.retrievaler.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
||||
res["total"] = sres.total
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
@@ -1446,7 +1459,7 @@ def retrieval_test(tenant_id):
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
|
||||
ranks = settings.retrievaler.retrieval(
|
||||
ranks = settings.retriever.retrieval(
|
||||
question,
|
||||
embd_mdl,
|
||||
tenant_ids,
|
||||
@@ -1462,7 +1475,7 @@ def retrieval_test(tenant_id):
|
||||
rank_feature=label_question(question, kbs),
|
||||
)
|
||||
if use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
|
||||
|
||||
@@ -1,3 +1,20 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pathlib
|
||||
import re
|
||||
|
||||
@@ -17,7 +34,8 @@ from api.utils.api_utils import get_json_result
|
||||
from api.utils.file_utils import filename_type
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
||||
|
||||
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def upload(tenant_id):
|
||||
"""
|
||||
@@ -44,22 +62,22 @@ def upload(tenant_id):
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: File ID
|
||||
name:
|
||||
type: string
|
||||
description: File name
|
||||
size:
|
||||
type: integer
|
||||
description: File size in bytes
|
||||
type:
|
||||
type: string
|
||||
description: File type (e.g., document, folder)
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: File ID
|
||||
name:
|
||||
type: string
|
||||
description: File name
|
||||
size:
|
||||
type: integer
|
||||
description: File size in bytes
|
||||
type:
|
||||
type: string
|
||||
description: File type (e.g., document, folder)
|
||||
"""
|
||||
pf_id = request.form.get("parent_id")
|
||||
|
||||
@@ -97,12 +115,14 @@ def upload(tenant_id):
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
|
||||
if not e:
|
||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names, len_id_list)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
|
||||
len_id_list)
|
||||
else:
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
|
||||
if not e:
|
||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names, len_id_list)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
|
||||
len_id_list)
|
||||
|
||||
filetype = filename_type(file_obj_names[file_len - 1])
|
||||
location = file_obj_names[file_len - 1]
|
||||
@@ -129,7 +149,7 @@ def upload(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/create', methods=['POST']) # noqa: F821
|
||||
@manager.route('/file/create', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def create(tenant_id):
|
||||
"""
|
||||
@@ -207,7 +227,7 @@ def create(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/list', methods=['GET']) # noqa: F821
|
||||
@manager.route('/file/list', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def list_files(tenant_id):
|
||||
"""
|
||||
@@ -299,7 +319,7 @@ def list_files(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
|
||||
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_root_folder(tenant_id):
|
||||
"""
|
||||
@@ -335,7 +355,7 @@ def get_root_folder(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
|
||||
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_parent_folder():
|
||||
"""
|
||||
@@ -380,7 +400,7 @@ def get_parent_folder():
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
|
||||
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_all_parent_folders(tenant_id):
|
||||
"""
|
||||
@@ -428,7 +448,7 @@ def get_all_parent_folders(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/rm', methods=['POST']) # noqa: F821
|
||||
@manager.route('/file/rm', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def rm(tenant_id):
|
||||
"""
|
||||
@@ -502,7 +522,7 @@ def rm(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/rename', methods=['POST']) # noqa: F821
|
||||
@manager.route('/file/rename', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def rename(tenant_id):
|
||||
"""
|
||||
@@ -542,7 +562,8 @@ def rename(tenant_id):
|
||||
if not e:
|
||||
return get_json_result(message="File not found!", code=404)
|
||||
|
||||
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(file.name.lower()).suffix:
|
||||
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
||||
file.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=400)
|
||||
|
||||
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
|
||||
@@ -562,9 +583,9 @@ def rename(tenant_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
|
||||
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get(tenant_id,file_id):
|
||||
def get(tenant_id, file_id):
|
||||
"""
|
||||
Download a file.
|
||||
---
|
||||
@@ -610,7 +631,7 @@ def get(tenant_id,file_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def move(tenant_id):
|
||||
"""
|
||||
@@ -669,6 +690,7 @@ def move(tenant_id):
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/convert', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def convert(tenant_id):
|
||||
@@ -735,4 +757,4 @@ def convert(tenant_id):
|
||||
file2documents.append(file2document.to_json())
|
||||
return get_json_result(data=file2documents)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -36,7 +36,8 @@ from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.search_service import SearchService
|
||||
from api.db.services.user_service import UserTenantService
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, get_result, server_error_response, token_required, validate_request
|
||||
from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, \
|
||||
get_result, server_error_response, token_required, validate_request
|
||||
from rag.app.tag import label_question
|
||||
from rag.prompts.template import load_prompt
|
||||
from rag.prompts.generator import cross_languages, gen_meta_filter, keyword_extraction, chunks_format
|
||||
@@ -88,7 +89,8 @@ def create_agent_session(tenant_id, agent_id):
|
||||
canvas.reset()
|
||||
|
||||
cvs.dsl = json.loads(str(canvas))
|
||||
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id, "message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent", "dsl": cvs.dsl}
|
||||
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id,
|
||||
"message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent", "dsl": cvs.dsl}
|
||||
API4ConversationService.save(**conv)
|
||||
conv["agent_id"] = conv.pop("dialog_id")
|
||||
return get_result(data=conv)
|
||||
@@ -279,7 +281,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
|
||||
reasoning_match = re.search(r"<think>(.*?)</think>", answer, flags=re.DOTALL)
|
||||
if reasoning_match:
|
||||
reasoning_part = reasoning_match.group(1)
|
||||
content_part = answer[reasoning_match.end() :]
|
||||
content_part = answer[reasoning_match.end():]
|
||||
else:
|
||||
reasoning_part = ""
|
||||
content_part = answer
|
||||
@@ -324,7 +326,8 @@ def chat_completion_openai_like(tenant_id, chat_id):
|
||||
response["choices"][0]["delta"]["content"] = None
|
||||
response["choices"][0]["delta"]["reasoning_content"] = None
|
||||
response["choices"][0]["finish_reason"] = "stop"
|
||||
response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used, "total_tokens": len(prompt) + token_used}
|
||||
response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used,
|
||||
"total_tokens": len(prompt) + token_used}
|
||||
if need_reference:
|
||||
response["choices"][0]["delta"]["reference"] = chunks_format(last_ans.get("reference", []))
|
||||
response["choices"][0]["delta"]["final_content"] = last_ans.get("answer", "")
|
||||
@@ -559,7 +562,8 @@ def list_agent_session(tenant_id, agent_id):
|
||||
desc = True
|
||||
# dsl defaults to True in all cases except for False and false
|
||||
include_dsl = request.args.get("dsl") != "False" and request.args.get("dsl") != "false"
|
||||
total, convs = API4ConversationService.get_list(agent_id, tenant_id, page_number, items_per_page, orderby, desc, id, user_id, include_dsl)
|
||||
total, convs = API4ConversationService.get_list(agent_id, tenant_id, page_number, items_per_page, orderby, desc, id,
|
||||
user_id, include_dsl)
|
||||
if not convs:
|
||||
return get_result(data=[])
|
||||
for conv in convs:
|
||||
@@ -581,7 +585,8 @@ def list_agent_session(tenant_id, agent_id):
|
||||
if message_num != 0 and messages[message_num]["role"] != "user":
|
||||
chunk_list = []
|
||||
# Add boundary and type checks to prevent KeyError
|
||||
if chunk_num < len(conv["reference"]) and conv["reference"][chunk_num] is not None and isinstance(conv["reference"][chunk_num], dict) and "chunks" in conv["reference"][chunk_num]:
|
||||
if chunk_num < len(conv["reference"]) and conv["reference"][chunk_num] is not None and isinstance(
|
||||
conv["reference"][chunk_num], dict) and "chunks" in conv["reference"][chunk_num]:
|
||||
chunks = conv["reference"][chunk_num]["chunks"]
|
||||
for chunk in chunks:
|
||||
# Ensure chunk is a dictionary before calling get method
|
||||
@@ -639,13 +644,16 @@ def delete(tenant_id, chat_id):
|
||||
|
||||
if errors:
|
||||
if success_count > 0:
|
||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||
return get_result(data={"success_count": success_count, "errors": errors},
|
||||
message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||
else:
|
||||
return get_error_data_result(message="; ".join(errors))
|
||||
|
||||
if duplicate_messages:
|
||||
if success_count > 0:
|
||||
return get_result(message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
|
||||
return get_result(
|
||||
message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors",
|
||||
data={"success_count": success_count, "errors": duplicate_messages})
|
||||
else:
|
||||
return get_error_data_result(message=";".join(duplicate_messages))
|
||||
|
||||
@@ -691,13 +699,16 @@ def delete_agent_session(tenant_id, agent_id):
|
||||
|
||||
if errors:
|
||||
if success_count > 0:
|
||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||
return get_result(data={"success_count": success_count, "errors": errors},
|
||||
message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||
else:
|
||||
return get_error_data_result(message="; ".join(errors))
|
||||
|
||||
if duplicate_messages:
|
||||
if success_count > 0:
|
||||
return get_result(message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
|
||||
return get_result(
|
||||
message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors",
|
||||
data={"success_count": success_count, "errors": duplicate_messages})
|
||||
else:
|
||||
return get_error_data_result(message=";".join(duplicate_messages))
|
||||
|
||||
@@ -730,7 +741,9 @@ def ask_about(tenant_id):
|
||||
for ans in ask(req["question"], req["kb_ids"], uid):
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
except Exception as e:
|
||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
|
||||
yield "data:" + json.dumps(
|
||||
{"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
||||
ensure_ascii=False) + "\n\n"
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
||||
|
||||
resp = Response(stream(), mimetype="text/event-stream")
|
||||
@@ -882,7 +895,9 @@ def begin_inputs(agent_id):
|
||||
return get_error_data_result(f"Can't find agent by ID: {agent_id}")
|
||||
|
||||
canvas = Canvas(json.dumps(cvs.dsl), objs[0].tenant_id)
|
||||
return get_result(data={"title": cvs.title, "avatar": cvs.avatar, "inputs": canvas.get_component_input_form("begin"), "prologue": canvas.get_prologue(), "mode": canvas.get_mode()})
|
||||
return get_result(
|
||||
data={"title": cvs.title, "avatar": cvs.avatar, "inputs": canvas.get_component_input_form("begin"),
|
||||
"prologue": canvas.get_prologue(), "mode": canvas.get_mode()})
|
||||
|
||||
|
||||
@manager.route("/searchbots/ask", methods=["POST"]) # noqa: F821
|
||||
@@ -911,7 +926,9 @@ def ask_about_embedded():
|
||||
for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
except Exception as e:
|
||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
|
||||
yield "data:" + json.dumps(
|
||||
{"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
||||
ensure_ascii=False) + "\n\n"
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
||||
|
||||
resp = Response(stream(), mimetype="text/event-stream")
|
||||
@@ -978,7 +995,8 @@ def retrieval_test_embedded():
|
||||
tenant_ids.append(tenant.tenant_id)
|
||||
break
|
||||
else:
|
||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.",
|
||||
code=settings.RetCode.OPERATING_ERROR)
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
||||
if not e:
|
||||
@@ -998,11 +1016,13 @@ def retrieval_test_embedded():
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
|
||||
labels = label_question(question, [kb])
|
||||
ranks = settings.retrievaler.retrieval(
|
||||
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||
ranks = settings.retriever.retrieval(
|
||||
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
|
||||
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||
)
|
||||
if use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(question, tenant_ids, kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
ck = settings.kg_retriever.retrieval(question, tenant_ids, kb_ids, embd_mdl,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
|
||||
@@ -1013,7 +1033,8 @@ def retrieval_test_embedded():
|
||||
return get_json_result(data=ranks)
|
||||
except Exception as e:
|
||||
if str(e).find("not_found") > 0:
|
||||
return get_json_result(data=False, message="No chunk found! Check the chunk status please!", code=settings.RetCode.DATA_ERROR)
|
||||
return get_json_result(data=False, message="No chunk found! Check the chunk status please!",
|
||||
code=settings.RetCode.DATA_ERROR)
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@@ -1082,7 +1103,8 @@ def detail_share_embedded():
|
||||
if SearchService.query(tenant_id=tenant.tenant_id, id=search_id):
|
||||
break
|
||||
else:
|
||||
return get_json_result(data=False, message="Has no permission for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
||||
return get_json_result(data=False, message="Has no permission for this operation.",
|
||||
code=settings.RetCode.OPERATING_ERROR)
|
||||
|
||||
search = SearchService.get_detail(search_id)
|
||||
if not search:
|
||||
|
||||
@@ -39,6 +39,7 @@ from rag.utils.redis_conn import REDIS_CONN
|
||||
from flask import jsonify
|
||||
from api.utils.health_utils import run_health_checks
|
||||
|
||||
|
||||
@manager.route("/version", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
def version():
|
||||
@@ -161,7 +162,7 @@ def status():
|
||||
task_executors = REDIS_CONN.smembers("TASKEXE")
|
||||
now = datetime.now().timestamp()
|
||||
for task_executor_id in task_executors:
|
||||
heartbeats = REDIS_CONN.zrangebyscore(task_executor_id, now - 60*30, now)
|
||||
heartbeats = REDIS_CONN.zrangebyscore(task_executor_id, now - 60 * 30, now)
|
||||
heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
|
||||
task_executor_heartbeats[task_executor_id] = heartbeats
|
||||
except Exception:
|
||||
@@ -273,7 +274,8 @@ def token_list():
|
||||
objs = [o.to_dict() for o in objs]
|
||||
for o in objs:
|
||||
if not o["beta"]:
|
||||
o["beta"] = generate_confirmation_token(generate_confirmation_token(tenants[0].tenant_id)).replace("ragflow-", "")[:32]
|
||||
o["beta"] = generate_confirmation_token(generate_confirmation_token(tenants[0].tenant_id)).replace(
|
||||
"ragflow-", "")[:32]
|
||||
APITokenService.filter_update([APIToken.tenant_id == tenant_id, APIToken.token == o["token"]], o)
|
||||
return get_json_result(data=objs)
|
||||
except Exception as e:
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from api.models.tenant_models import InviteUserRequest, UserTenantResponse
|
||||
from api.utils.api_utils import get_current_user
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
from api import settings
|
||||
from api.apps import smtp_mail_server
|
||||
@@ -25,18 +24,13 @@ from api.db.db_models import UserTenant
|
||||
from api.db.services.user_service import UserTenantService, UserService
|
||||
|
||||
from api.utils import get_uuid, delta_seconds
|
||||
from api.utils.api_utils import get_json_result, server_error_response, get_data_error_result
|
||||
from api.utils.api_utils import get_json_result, validate_request, server_error_response, get_data_error_result
|
||||
from api.utils.web_utils import send_invite_email
|
||||
|
||||
# 创建 FastAPI 路由器
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/user/list")
|
||||
async def user_list(
|
||||
tenant_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route("/<tenant_id>/user/list", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
def user_list(tenant_id):
|
||||
if current_user.id != tenant_id:
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -52,19 +46,18 @@ async def user_list(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.post('/{tenant_id}/user')
|
||||
async def create(
|
||||
tenant_id: str,
|
||||
request: InviteUserRequest,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route('/<tenant_id>/user', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("email")
|
||||
def create(tenant_id):
|
||||
if current_user.id != tenant_id:
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
invite_user_email = request.email
|
||||
req = request.json
|
||||
invite_user_email = req["email"]
|
||||
invite_users = UserService.query(email=invite_user_email)
|
||||
if not invite_users:
|
||||
return get_data_error_result(message="User not found.")
|
||||
@@ -77,7 +70,8 @@ async def create(
|
||||
return get_data_error_result(message=f"{invite_user_email} is already in the team.")
|
||||
if user_tenant_role == UserTenantRole.OWNER:
|
||||
return get_data_error_result(message=f"{invite_user_email} is the owner of the team.")
|
||||
return get_data_error_result(message=f"{invite_user_email} is in the team, but the role: {user_tenant_role} is invalid.")
|
||||
return get_data_error_result(
|
||||
message=f"{invite_user_email} is in the team, but the role: {user_tenant_role} is invalid.")
|
||||
|
||||
UserTenantService.save(
|
||||
id=get_uuid(),
|
||||
@@ -107,12 +101,9 @@ async def create(
|
||||
return get_json_result(data=usr)
|
||||
|
||||
|
||||
@router.delete('/{tenant_id}/user/{user_id}')
|
||||
async def rm(
|
||||
tenant_id: str,
|
||||
user_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route('/<tenant_id>/user/<user_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
def rm(tenant_id, user_id):
|
||||
if current_user.id != tenant_id and current_user.id != user_id:
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@@ -126,10 +117,9 @@ async def rm(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.get("/list")
|
||||
async def tenant_list(
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route("/list", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
def tenant_list():
|
||||
try:
|
||||
users = UserTenantService.get_tenants_by_user_id(current_user.id)
|
||||
for u in users:
|
||||
@@ -139,13 +129,12 @@ async def tenant_list(
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@router.put("/agree/{tenant_id}")
|
||||
async def agree(
|
||||
tenant_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
@manager.route("/agree/<tenant_id>", methods=["PUT"]) # noqa: F821
|
||||
@login_required
|
||||
def agree(tenant_id):
|
||||
try:
|
||||
UserTenantService.filter_update([UserTenant.tenant_id == tenant_id, UserTenant.user_id == current_user.id], {"role": UserTenantRole.NORMAL})
|
||||
UserTenantService.filter_update([UserTenant.tenant_id == tenant_id, UserTenant.user_id == current_user.id],
|
||||
{"role": UserTenantRole.NORMAL})
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -15,11 +15,14 @@
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import string
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from flask import redirect, request, session
|
||||
from flask import redirect, request, session, make_response
|
||||
from flask_login import current_user, login_required, login_user, logout_user
|
||||
from werkzeug.security import check_password_hash, generate_password_hash
|
||||
|
||||
@@ -46,6 +49,19 @@ from api.utils.api_utils import (
|
||||
validate_request,
|
||||
)
|
||||
from api.utils.crypt import decrypt
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from api.apps import smtp_mail_server
|
||||
from api.utils.web_utils import (
|
||||
send_email_html,
|
||||
OTP_LENGTH,
|
||||
OTP_TTL_SECONDS,
|
||||
ATTEMPT_LIMIT,
|
||||
ATTEMPT_LOCK_SECONDS,
|
||||
RESEND_COOLDOWN_SECONDS,
|
||||
otp_keys,
|
||||
hash_code,
|
||||
captcha_key,
|
||||
)
|
||||
|
||||
|
||||
@manager.route("/login", methods=["POST", "GET"]) # noqa: F821
|
||||
@@ -825,3 +841,172 @@ def set_tenant_info():
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/forget/captcha", methods=["GET"]) # noqa: F821
|
||||
def forget_get_captcha():
|
||||
"""
|
||||
GET /forget/captcha?email=<email>
|
||||
- Generate an image captcha and cache it in Redis under key captcha:{email} with TTL = OTP_TTL_SECONDS.
|
||||
- Returns the captcha as a PNG image.
|
||||
"""
|
||||
email = (request.args.get("email") or "")
|
||||
if not email:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email is required")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
# Generate captcha text
|
||||
allowed = string.ascii_uppercase + string.digits
|
||||
captcha_text = "".join(secrets.choice(allowed) for _ in range(OTP_LENGTH))
|
||||
REDIS_CONN.set(captcha_key(email), captcha_text, 60) # Valid for 60 seconds
|
||||
|
||||
from captcha.image import ImageCaptcha
|
||||
image = ImageCaptcha(width=300, height=120, font_sizes=[50, 60, 70])
|
||||
img_bytes = image.generate(captcha_text).read()
|
||||
response = make_response(img_bytes)
|
||||
response.headers.set("Content-Type", "image/JPEG")
|
||||
return response
|
||||
|
||||
|
||||
@manager.route("/forget/otp", methods=["POST"]) # noqa: F821
|
||||
def forget_send_otp():
|
||||
"""
|
||||
POST /forget/otp
|
||||
- Verify the image captcha stored at captcha:{email} (case-insensitive).
|
||||
- On success, generate an email OTP (A–Z with length = OTP_LENGTH), store hash + salt (and timestamp) in Redis with TTL, reset attempts and cooldown, and send the OTP via email.
|
||||
"""
|
||||
req = request.get_json()
|
||||
email = req.get("email") or ""
|
||||
captcha = (req.get("captcha") or "").strip()
|
||||
|
||||
if not email or not captcha:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email and captcha required")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
stored_captcha = REDIS_CONN.get(captcha_key(email))
|
||||
if not stored_captcha:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="invalid or expired captcha")
|
||||
if (stored_captcha or "").strip().lower() != captcha.lower():
|
||||
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="invalid or expired captcha")
|
||||
|
||||
# Delete captcha to prevent reuse
|
||||
REDIS_CONN.delete(captcha_key(email))
|
||||
|
||||
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||
now = int(time.time())
|
||||
last_ts = REDIS_CONN.get(k_last)
|
||||
if last_ts:
|
||||
try:
|
||||
elapsed = now - int(last_ts)
|
||||
except Exception:
|
||||
elapsed = RESEND_COOLDOWN_SECONDS
|
||||
remaining = RESEND_COOLDOWN_SECONDS - elapsed
|
||||
if remaining > 0:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message=f"you still have to wait {remaining} seconds")
|
||||
|
||||
# Generate OTP (uppercase letters only) and store hashed
|
||||
otp = "".join(secrets.choice(string.ascii_uppercase) for _ in range(OTP_LENGTH))
|
||||
salt = os.urandom(16)
|
||||
code_hash = hash_code(otp, salt)
|
||||
REDIS_CONN.set(k_code, f"{code_hash}:{salt.hex()}", OTP_TTL_SECONDS)
|
||||
REDIS_CONN.set(k_attempts, 0, OTP_TTL_SECONDS)
|
||||
REDIS_CONN.set(k_last, now, OTP_TTL_SECONDS)
|
||||
REDIS_CONN.delete(k_lock)
|
||||
|
||||
ttl_min = OTP_TTL_SECONDS // 60
|
||||
|
||||
if not smtp_mail_server:
|
||||
logging.warning("SMTP mail server not initialized; skip sending email.")
|
||||
else:
|
||||
try:
|
||||
send_email_html(
|
||||
subject="Your Password Reset Code",
|
||||
to_email=email,
|
||||
template_key="reset_code",
|
||||
code=otp,
|
||||
ttl_min=ttl_min,
|
||||
)
|
||||
except Exception:
|
||||
return get_json_result(data=False, code=settings.RetCode.SERVER_ERROR, message="failed to send email")
|
||||
|
||||
return get_json_result(data=True, code=settings.RetCode.SUCCESS, message="verification passed, email sent")
|
||||
|
||||
|
||||
@manager.route("/forget", methods=["POST"]) # noqa: F821
|
||||
def forget():
|
||||
"""
|
||||
POST: Verify email + OTP and reset password, then log the user in.
|
||||
Request JSON: { email, otp, new_password, confirm_new_password }
|
||||
"""
|
||||
req = request.get_json()
|
||||
email = req.get("email") or ""
|
||||
otp = (req.get("otp") or "").strip()
|
||||
new_pwd = req.get("new_password")
|
||||
new_pwd2 = req.get("confirm_new_password")
|
||||
|
||||
if not all([email, otp, new_pwd, new_pwd2]):
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email, otp and passwords are required")
|
||||
|
||||
# For reset, passwords are provided as-is (no decrypt needed)
|
||||
if new_pwd != new_pwd2:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="passwords do not match")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
user = users[0]
|
||||
# Verify OTP from Redis
|
||||
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||
if REDIS_CONN.get(k_lock):
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="too many attempts, try later")
|
||||
|
||||
stored = REDIS_CONN.get(k_code)
|
||||
if not stored:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="expired otp")
|
||||
|
||||
try:
|
||||
stored_hash, salt_hex = str(stored).split(":", 1)
|
||||
salt = bytes.fromhex(salt_hex)
|
||||
except Exception:
|
||||
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="otp storage corrupted")
|
||||
|
||||
# Case-insensitive verification: OTP generated uppercase
|
||||
calc = hash_code(otp.upper(), salt)
|
||||
if calc != stored_hash:
|
||||
# bump attempts
|
||||
try:
|
||||
attempts = int(REDIS_CONN.get(k_attempts) or 0) + 1
|
||||
except Exception:
|
||||
attempts = 1
|
||||
REDIS_CONN.set(k_attempts, attempts, OTP_TTL_SECONDS)
|
||||
if attempts >= ATTEMPT_LIMIT:
|
||||
REDIS_CONN.set(k_lock, int(time.time()), ATTEMPT_LOCK_SECONDS)
|
||||
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="expired otp")
|
||||
|
||||
# Success: consume OTP and reset password
|
||||
REDIS_CONN.delete(k_code)
|
||||
REDIS_CONN.delete(k_attempts)
|
||||
REDIS_CONN.delete(k_last)
|
||||
REDIS_CONN.delete(k_lock)
|
||||
|
||||
try:
|
||||
UserService.update_user_password(user.id, new_pwd)
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="failed to reset password")
|
||||
|
||||
# Auto login (reuse login flow)
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.save()
|
||||
msg = "Password reset successful. Logged in."
|
||||
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
||||
|
||||
@@ -21,8 +21,7 @@ from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
from api.utils.api_utils import security
|
||||
from api.apps.models.auth_dependencies import get_current_user
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, EmailStr
|
||||
try:
|
||||
@@ -89,63 +88,7 @@ class TenantInfoRequest(BaseModel):
|
||||
img2txt_id: str
|
||||
llm_id: str
|
||||
|
||||
# 依赖项:获取当前用户
|
||||
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
"""获取当前用户"""
|
||||
from api.db import StatusEnum
|
||||
try:
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
except ImportError:
|
||||
# 如果没有itsdangerous,使用jwt作为替代
|
||||
import jwt
|
||||
Serializer = jwt
|
||||
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"User {user[0].email} has empty access_token in database"
|
||||
)
|
||||
return user[0]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid access token"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authorization header required"
|
||||
)
|
||||
# 依赖项:获取当前用户 - 从 auth_dependencies 导入
|
||||
|
||||
@router.post("/login")
|
||||
async def login(request: LoginRequest):
|
||||
|
||||
@@ -1,3 +1,20 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
class AdminException(Exception):
|
||||
def __init__(self, message, code=400):
|
||||
super().__init__(message)
|
||||
@@ -18,4 +35,9 @@ class UserAlreadyExistsError(AdminException):
|
||||
|
||||
class CannotDeleteAdminError(AdminException):
|
||||
def __init__(self):
|
||||
super().__init__("Cannot delete admin account", 403)
|
||||
super().__init__("Cannot delete admin account", 403)
|
||||
|
||||
|
||||
class NotAdminError(AdminException):
|
||||
def __init__(self, username):
|
||||
super().__init__(f"User '{username}' is not admin", 403)
|
||||
|
||||
@@ -313,9 +313,75 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
|
||||
raise
|
||||
|
||||
|
||||
class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.max_retries = kwargs.pop("max_retries", 5)
|
||||
self.retry_delay = kwargs.pop("retry_delay", 1)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def execute_sql(self, sql, params=None, commit=True):
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
return super().execute_sql(sql, params, commit)
|
||||
except (OperationalError, InterfaceError) as e:
|
||||
# PostgreSQL specific error codes
|
||||
# 57P01: admin_shutdown
|
||||
# 57P02: crash_shutdown
|
||||
# 57P03: cannot_connect_now
|
||||
# 08006: connection_failure
|
||||
# 08003: connection_does_not_exist
|
||||
# 08000: connection_exception
|
||||
error_messages = ['connection', 'server closed', 'connection refused',
|
||||
'no connection to the server', 'terminating connection']
|
||||
|
||||
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||
|
||||
if should_retry and attempt < self.max_retries:
|
||||
logging.warning(
|
||||
f"PostgreSQL connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
|
||||
)
|
||||
self._handle_connection_loss()
|
||||
time.sleep(self.retry_delay * (2 ** attempt))
|
||||
else:
|
||||
logging.error(f"PostgreSQL execution failure: {e}")
|
||||
raise
|
||||
return None
|
||||
|
||||
def _handle_connection_loss(self):
|
||||
try:
|
||||
self.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self.connect()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to reconnect to PostgreSQL: {e}")
|
||||
time.sleep(0.1)
|
||||
self.connect()
|
||||
|
||||
def begin(self):
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
return super().begin()
|
||||
except (OperationalError, InterfaceError) as e:
|
||||
error_messages = ['connection', 'server closed', 'connection refused',
|
||||
'no connection to the server', 'terminating connection']
|
||||
|
||||
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||
|
||||
if should_retry and attempt < self.max_retries:
|
||||
logging.warning(
|
||||
f"PostgreSQL connection lost during transaction (attempt {attempt+1}/{self.max_retries})"
|
||||
)
|
||||
self._handle_connection_loss()
|
||||
time.sleep(self.retry_delay * (2 ** attempt))
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
class PooledDatabase(Enum):
|
||||
MYSQL = RetryingPooledMySQLDatabase
|
||||
POSTGRES = PooledPostgresqlDatabase
|
||||
POSTGRES = RetryingPooledPostgresqlDatabase
|
||||
|
||||
|
||||
class DatabaseMigrator(Enum):
|
||||
@@ -329,12 +395,11 @@ class BaseDataBase:
|
||||
database_config = settings.DATABASE.copy()
|
||||
db_name = database_config.pop("name")
|
||||
|
||||
# pool_config = {
|
||||
# 'max_retries': 5,
|
||||
# 'retry_delay': 1,
|
||||
# }
|
||||
# database_config.update(pool_config)
|
||||
|
||||
pool_config = {
|
||||
'max_retries': 5,
|
||||
'retry_delay': 1,
|
||||
}
|
||||
database_config.update(pool_config)
|
||||
self.database_connection = PooledDatabase[settings.DATABASE_TYPE.upper()].value(
|
||||
db_name, **database_config
|
||||
)
|
||||
@@ -642,7 +707,7 @@ class TenantLLM(DataBaseModel):
|
||||
llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name", index=True)
|
||||
model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR", index=True)
|
||||
llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="", index=True)
|
||||
api_key = CharField(max_length=2048, null=True, help_text="API KEY", index=True)
|
||||
api_key = TextField(null=True, help_text="API KEY")
|
||||
api_base = CharField(max_length=255, null=True, help_text="API Base")
|
||||
max_tokens = IntegerField(default=8192, index=True)
|
||||
used_tokens = IntegerField(default=0, index=True)
|
||||
@@ -1143,4 +1208,8 @@ def migrate_db():
|
||||
migrate(migrator.add_column("knowledgebase", "mindmap_task_finish_at", CharField(null=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.alter_column_type("tenant_llm", "api_key", TextField(null=True, help_text="API KEY")))
|
||||
except Exception:
|
||||
pass
|
||||
logging.disable(logging.NOTSET)
|
||||
|
||||
@@ -143,15 +143,12 @@ class UserCanvasService(CommonService):
|
||||
]
|
||||
if keywords:
|
||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
cls.model.user_id.in_(joined_tenant_ids),
|
||||
fn.LOWER(cls.model.title).contains(keywords.lower())
|
||||
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
|
||||
#(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
||||
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
|
||||
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
||||
)
|
||||
else:
|
||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
cls.model.user_id.in_(joined_tenant_ids)
|
||||
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
|
||||
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
|
||||
)
|
||||
if canvas_category:
|
||||
agents = agents.where(cls.model.canvas_category == canvas_category)
|
||||
|
||||
@@ -370,7 +370,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
chat_mdl.bind_tools(toolcall_session, tools)
|
||||
bind_models_ts = timer()
|
||||
|
||||
retriever = settings.retrievaler
|
||||
retriever = settings.retriever
|
||||
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
|
||||
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
|
||||
if "doc_ids" in messages[-1]:
|
||||
@@ -466,13 +466,17 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=label_question(" ".join(questions), kbs),
|
||||
)
|
||||
if prompt_config.get("toc_enhance"):
|
||||
cks = retriever.retrieval_by_toc(" ".join(questions), kbinfos["chunks"], tenant_ids, chat_mdl, dialog.top_n)
|
||||
if cks:
|
||||
kbinfos["chunks"] = cks
|
||||
if prompt_config.get("tavily_api_key"):
|
||||
tav = Tavily(prompt_config["tavily_api_key"])
|
||||
tav_res = tav.retrieve_chunks(" ".join(questions))
|
||||
kbinfos["chunks"].extend(tav_res["chunks"])
|
||||
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
|
||||
if prompt_config.get("use_kg"):
|
||||
ck = settings.kg_retrievaler.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl,
|
||||
ck = settings.kg_retriever.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl,
|
||||
LLMBundle(dialog.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
kbinfos["chunks"].insert(0, ck)
|
||||
@@ -658,7 +662,7 @@ Please write the SQL, only SQL, without any other explanations or text.
|
||||
|
||||
logging.debug(f"{question} get SQL(refined): {sql}")
|
||||
tried_times += 1
|
||||
return settings.retrievaler.sql_retrieval(sql, format="json"), sql
|
||||
return settings.retriever.sql_retrieval(sql, format="json"), sql
|
||||
|
||||
tbl, sql = get_table()
|
||||
if tbl is None:
|
||||
@@ -752,7 +756,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
|
||||
embedding_list = list(set([kb.embd_id for kb in kbs]))
|
||||
|
||||
is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
|
||||
retriever = settings.retrievaler if not is_knowledge_graph else settings.kg_retrievaler
|
||||
retriever = settings.retriever if not is_knowledge_graph else settings.kg_retriever
|
||||
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
|
||||
@@ -848,7 +852,7 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
|
||||
ranks = settings.retrievaler.retrieval(
|
||||
ranks = settings.retriever.retrieval(
|
||||
question=question,
|
||||
embd_mdl=embd_mdl,
|
||||
tenant_ids=tenant_ids,
|
||||
|
||||
@@ -79,7 +79,7 @@ class DocumentService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_list(cls, kb_id, page_number, items_per_page,
|
||||
orderby, desc, keywords, id, name):
|
||||
orderby, desc, keywords, id, name, suffix=None, run = None):
|
||||
fields = cls.get_cls_model_fields()
|
||||
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
|
||||
.join(File, on = (File.id == File2Document.file_id))\
|
||||
@@ -96,6 +96,10 @@ class DocumentService(CommonService):
|
||||
docs = docs.where(
|
||||
fn.LOWER(cls.model.name).contains(keywords.lower())
|
||||
)
|
||||
if suffix:
|
||||
docs = docs.where(cls.model.suffix.in_(suffix))
|
||||
if run:
|
||||
docs = docs.where(cls.model.run.in_(run))
|
||||
if desc:
|
||||
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
@@ -667,9 +671,11 @@ class DocumentService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def _sync_progress(cls, docs:list[dict]):
|
||||
from api.db.services.task_service import TaskService
|
||||
|
||||
for d in docs:
|
||||
try:
|
||||
tsks = Task.query(doc_id=d["id"], order_by=Task.create_time)
|
||||
tsks = TaskService.query(doc_id=d["id"], order_by=Task.create_time)
|
||||
if not tsks:
|
||||
continue
|
||||
msg = []
|
||||
@@ -787,21 +793,23 @@ class DocumentService(CommonService):
|
||||
"cancelled": int(cancelled),
|
||||
}
|
||||
|
||||
def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[]):
|
||||
def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", doc_ids=[]):
|
||||
"""
|
||||
You can provide a fake_doc_id to bypass the restriction of tasks at the knowledgebase level.
|
||||
Optionally, specify a list of doc_ids to determine which documents participate in the task.
|
||||
"""
|
||||
chunking_config = DocumentService.get_chunking_config(doc["id"])
|
||||
assert ty in ["graphrag", "raptor", "mindmap"], "type should be graphrag, raptor or mindmap"
|
||||
|
||||
chunking_config = DocumentService.get_chunking_config(sample_doc_id["id"])
|
||||
hasher = xxhash.xxh64()
|
||||
for field in sorted(chunking_config.keys()):
|
||||
hasher.update(str(chunking_config[field]).encode("utf-8"))
|
||||
|
||||
def new_task():
|
||||
nonlocal doc
|
||||
nonlocal sample_doc_id
|
||||
return {
|
||||
"id": get_uuid(),
|
||||
"doc_id": fake_doc_id if fake_doc_id else doc["id"],
|
||||
"doc_id": sample_doc_id["id"],
|
||||
"from_page": 100000000,
|
||||
"to_page": 100000000,
|
||||
"task_type": ty,
|
||||
@@ -816,9 +824,9 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[])
|
||||
task["digest"] = hasher.hexdigest()
|
||||
bulk_insert_into_db(Task, [task], True)
|
||||
|
||||
if ty in ["graphrag", "raptor", "mindmap"]:
|
||||
task["doc_ids"] = doc_ids
|
||||
DocumentService.begin2parse(doc["id"])
|
||||
task["doc_id"] = fake_doc_id
|
||||
task["doc_ids"] = doc_ids
|
||||
DocumentService.begin2parse(sample_doc_id["id"])
|
||||
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
|
||||
return task["id"]
|
||||
|
||||
@@ -830,7 +838,7 @@ def get_queue_length(priority):
|
||||
return int(group_info.get("lag", 0) or 0)
|
||||
|
||||
|
||||
async def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
from api.db.services.api_service import API4ConversationService
|
||||
from api.db.services.conversation_service import ConversationService
|
||||
from api.db.services.dialog_service import DialogService
|
||||
@@ -855,7 +863,7 @@ async def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language)
|
||||
|
||||
err, files = await FileService.upload_document(kb, file_objs, user_id)
|
||||
err, files = FileService.upload_document(kb, file_objs, user_id)
|
||||
assert not err, "\n".join(err)
|
||||
|
||||
def dummy(prog=None, msg=""):
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#
|
||||
import logging
|
||||
import re
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
@@ -421,7 +420,7 @@ class FileService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
async def upload_document(self, kb, file_objs, user_id):
|
||||
def upload_document(self, kb, file_objs, user_id):
|
||||
root_folder = self.get_root_folder(user_id)
|
||||
pf_id = root_folder["id"]
|
||||
self.init_knowledgebase_docs(pf_id, user_id)
|
||||
@@ -441,7 +440,14 @@ class FileService(CommonService):
|
||||
while STORAGE_IMPL.obj_exist(kb.id, location):
|
||||
location += "_"
|
||||
|
||||
blob = await file.read()
|
||||
# 支持 FastAPI UploadFile,直接使用 file 属性进行同步读取
|
||||
if hasattr(file, 'file') and hasattr(file, 'filename'):
|
||||
# FastAPI UploadFile
|
||||
file.file.seek(0)
|
||||
blob = file.file.read()
|
||||
else:
|
||||
# 普通文件对象
|
||||
blob = file.read()
|
||||
if filetype == FileType.PDF.value:
|
||||
blob = read_potential_broken_pdf(blob)
|
||||
STORAGE_IMPL.put(kb.id, location, blob)
|
||||
@@ -473,33 +479,33 @@ class FileService(CommonService):
|
||||
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
||||
files.append((doc, blob))
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
err.append(file.filename + ": " + str(e))
|
||||
|
||||
return err, files
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def list_all_files_by_parent_id(cls, parent_id):
|
||||
try:
|
||||
files = cls.model.select().where((cls.model.parent_id == parent_id) & (cls.model.id != parent_id))
|
||||
return list(files)
|
||||
except Exception:
|
||||
logging.exception("list_by_parent_id failed")
|
||||
raise RuntimeError("Database error (list_by_parent_id)!")
|
||||
|
||||
@staticmethod
|
||||
async def parse_docs(file_objs, user_id):
|
||||
def parse_docs(file_objs, user_id):
|
||||
exe = ThreadPoolExecutor(max_workers=12)
|
||||
threads = []
|
||||
for file in file_objs:
|
||||
# Check if file has async read method (UploadFile)
|
||||
if hasattr(file, 'read') and hasattr(file.read, '__call__'):
|
||||
try:
|
||||
# Try to get the coroutine to check if it's async
|
||||
read_result = file.read()
|
||||
if hasattr(read_result, '__await__'):
|
||||
# It's an async method, await it
|
||||
blob = await read_result
|
||||
else:
|
||||
# It's a sync method
|
||||
blob = read_result
|
||||
except Exception:
|
||||
# Fallback to sync read
|
||||
blob = file.read()
|
||||
# 支持 FastAPI UploadFile,直接使用 file 属性进行同步读取
|
||||
if hasattr(file, 'file') and hasattr(file, 'filename'):
|
||||
# FastAPI UploadFile
|
||||
file.file.seek(0)
|
||||
blob = file.file.read()
|
||||
else:
|
||||
# 普通文件对象
|
||||
blob = file.read()
|
||||
|
||||
threads.append(exe.submit(FileService.parse, file.filename, blob, False))
|
||||
|
||||
res = []
|
||||
|
||||
@@ -379,6 +379,7 @@ class KnowledgebaseService(CommonService):
|
||||
# name: Optional name filter
|
||||
# Returns:
|
||||
# List of knowledge bases
|
||||
# Total count of knowledge bases
|
||||
kbs = cls.model.select()
|
||||
if id:
|
||||
kbs = kbs.where(cls.model.id == id)
|
||||
@@ -390,14 +391,16 @@ class KnowledgebaseService(CommonService):
|
||||
cls.model.tenant_id == user_id))
|
||||
& (cls.model.status == StatusEnum.VALID.value)
|
||||
)
|
||||
|
||||
if desc:
|
||||
kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
|
||||
|
||||
total = kbs.count()
|
||||
kbs = kbs.paginate(page_number, items_per_page)
|
||||
|
||||
return list(kbs.dicts())
|
||||
return list(kbs.dicts()), total
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
|
||||
@@ -205,32 +205,31 @@ class LLMBundle(LLM4Tenant):
|
||||
return txt
|
||||
|
||||
return txt[last_think_end + len("</think>") :]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _clean_param(chat_partial, **kwargs):
|
||||
func = chat_partial.func
|
||||
sig = inspect.signature(func)
|
||||
keyword_args = []
|
||||
support_var_args = False
|
||||
for param in sig.parameters.values():
|
||||
if param.kind == inspect.Parameter.VAR_KEYWORD or param.kind == inspect.Parameter.VAR_POSITIONAL:
|
||||
support_var_args = True
|
||||
elif param.kind == inspect.Parameter.KEYWORD_ONLY:
|
||||
keyword_args.append(param.name)
|
||||
allowed_params = set()
|
||||
|
||||
use_kwargs = kwargs
|
||||
if not support_var_args:
|
||||
use_kwargs = {k: v for k, v in kwargs.items() if k in keyword_args}
|
||||
return use_kwargs
|
||||
|
||||
for param in sig.parameters.values():
|
||||
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
||||
support_var_args = True
|
||||
elif param.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY):
|
||||
allowed_params.add(param.name)
|
||||
if support_var_args:
|
||||
return kwargs
|
||||
else:
|
||||
return {k: v for k, v in kwargs.items() if k in allowed_params}
|
||||
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
|
||||
if self.langfuse:
|
||||
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})
|
||||
|
||||
chat_partial = partial(self.mdl.chat, system, history, gen_conf)
|
||||
chat_partial = partial(self.mdl.chat, system, history, gen_conf, **kwargs)
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf)
|
||||
|
||||
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf, **kwargs)
|
||||
|
||||
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
||||
txt, used_tokens = chat_partial(**use_kwargs)
|
||||
txt = self._remove_reasoning_content(txt)
|
||||
@@ -266,7 +265,7 @@ class LLMBundle(LLM4Tenant):
|
||||
break
|
||||
|
||||
if txt.endswith("</think>"):
|
||||
ans = ans.rstrip("</think>")
|
||||
ans = ans[: -len("</think>")]
|
||||
|
||||
if not self.verbose_tool_use:
|
||||
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
||||
|
||||
@@ -33,7 +33,8 @@ class MCPServerService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_servers(cls, tenant_id: str, id_list: list[str] | None, page_number, items_per_page, orderby, desc, keywords):
|
||||
def get_servers(cls, tenant_id: str, id_list: list[str] | None, page_number, items_per_page, orderby, desc,
|
||||
keywords):
|
||||
"""Retrieve all MCP servers associated with a tenant.
|
||||
|
||||
This method fetches all MCP servers for a given tenant, ordered by creation time.
|
||||
|
||||
@@ -94,7 +94,8 @@ class SearchService(CommonService):
|
||||
query = (
|
||||
cls.model.select(*fields)
|
||||
.join(User, on=(cls.model.tenant_id == User.id))
|
||||
.where(((cls.model.tenant_id.in_(joined_tenant_ids)) | (cls.model.tenant_id == user_id)) & (cls.model.status == StatusEnum.VALID.value))
|
||||
.where(((cls.model.tenant_id.in_(joined_tenant_ids)) | (cls.model.tenant_id == user_id)) & (
|
||||
cls.model.status == StatusEnum.VALID.value))
|
||||
)
|
||||
|
||||
if keywords:
|
||||
|
||||
@@ -165,7 +165,7 @@ class TaskService(CommonService):
|
||||
]
|
||||
tasks = (
|
||||
cls.model.select(*fields).order_by(cls.model.from_page.asc(), cls.model.create_time.desc())
|
||||
.where(cls.model.doc_id == doc_id)
|
||||
.where(cls.model.doc_id == doc_id)
|
||||
)
|
||||
tasks = list(tasks.dicts())
|
||||
if not tasks:
|
||||
@@ -205,18 +205,18 @@ class TaskService(CommonService):
|
||||
cls.model.select(
|
||||
*[Document.id, Document.kb_id, Document.location, File.parent_id]
|
||||
)
|
||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
||||
.join(
|
||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
||||
.join(
|
||||
File2Document,
|
||||
on=(File2Document.document_id == Document.id),
|
||||
join_type=JOIN.LEFT_OUTER,
|
||||
)
|
||||
.join(
|
||||
.join(
|
||||
File,
|
||||
on=(File2Document.file_id == File.id),
|
||||
join_type=JOIN.LEFT_OUTER,
|
||||
)
|
||||
.where(
|
||||
.where(
|
||||
Document.status == StatusEnum.VALID.value,
|
||||
Document.run == TaskStatus.RUNNING.value,
|
||||
~(Document.type == FileType.VIRTUAL.value),
|
||||
@@ -294,8 +294,8 @@ class TaskService(CommonService):
|
||||
cls.model.update(progress=prog).where(
|
||||
(cls.model.id == id) &
|
||||
(
|
||||
(cls.model.progress != -1) &
|
||||
((prog == -1) | (prog > cls.model.progress))
|
||||
(cls.model.progress != -1) &
|
||||
((prog == -1) | (prog > cls.model.progress))
|
||||
)
|
||||
).execute()
|
||||
else:
|
||||
@@ -343,6 +343,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
- Task digests are calculated for optimization and reuse
|
||||
- Previous task chunks may be reused if available
|
||||
"""
|
||||
|
||||
def new_task():
|
||||
return {
|
||||
"id": get_uuid(),
|
||||
@@ -350,7 +351,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
"progress": 0.0,
|
||||
"from_page": 0,
|
||||
"to_page": 100000000,
|
||||
"begin_at": datetime.now(),
|
||||
"begin_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
|
||||
parse_task_array = []
|
||||
@@ -502,7 +503,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
|
||||
to_page=100000000,
|
||||
task_type="dataflow" if not rerun else "dataflow_rerun",
|
||||
priority=priority,
|
||||
begin_at=datetime.now(),
|
||||
begin_at= datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
)
|
||||
if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
|
||||
TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
|
||||
@@ -515,7 +516,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
|
||||
task["file"] = file
|
||||
|
||||
if not REDIS_CONN.queue_product(
|
||||
get_svr_queue_name(priority), message=task
|
||||
get_svr_queue_name(priority), message=task
|
||||
):
|
||||
return False, "Can't access Redis. Please check the Redis' status."
|
||||
|
||||
|
||||
@@ -57,8 +57,10 @@ class TenantLLMService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_my_llms(cls, tenant_id):
|
||||
fields = [cls.model.llm_factory, LLMFactories.logo, LLMFactories.tags, cls.model.model_type, cls.model.llm_name, cls.model.used_tokens]
|
||||
objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
|
||||
fields = [cls.model.llm_factory, LLMFactories.logo, LLMFactories.tags, cls.model.model_type, cls.model.llm_name,
|
||||
cls.model.used_tokens]
|
||||
objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
|
||||
cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
|
||||
|
||||
return list(objs)
|
||||
|
||||
@@ -122,7 +124,8 @@ class TenantLLMService(CommonService):
|
||||
model_config = {"llm_factory": llm[0].fid, "api_key": "", "llm_name": mdlnm, "api_base": ""}
|
||||
if not model_config:
|
||||
if mdlnm == "flag-embedding":
|
||||
model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", "llm_name": llm_name, "api_base": ""}
|
||||
model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", "llm_name": llm_name,
|
||||
"api_base": ""}
|
||||
else:
|
||||
if not mdlnm:
|
||||
raise LookupError(f"Type of {llm_type} model is not set.")
|
||||
@@ -137,27 +140,33 @@ class TenantLLMService(CommonService):
|
||||
if llm_type == LLMType.EMBEDDING.value:
|
||||
if model_config["llm_factory"] not in EmbeddingModel:
|
||||
return
|
||||
return EmbeddingModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
||||
return EmbeddingModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||
base_url=model_config["api_base"])
|
||||
|
||||
if llm_type == LLMType.RERANK:
|
||||
if model_config["llm_factory"] not in RerankModel:
|
||||
return
|
||||
return RerankModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
||||
return RerankModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||
base_url=model_config["api_base"])
|
||||
|
||||
if llm_type == LLMType.IMAGE2TEXT.value:
|
||||
if model_config["llm_factory"] not in CvModel:
|
||||
return
|
||||
return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang, base_url=model_config["api_base"], **kwargs)
|
||||
return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang,
|
||||
base_url=model_config["api_base"], **kwargs)
|
||||
|
||||
if llm_type == LLMType.CHAT.value:
|
||||
if model_config["llm_factory"] not in ChatModel:
|
||||
return
|
||||
return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], **kwargs)
|
||||
return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||
base_url=model_config["api_base"], **kwargs)
|
||||
|
||||
if llm_type == LLMType.SPEECH2TEXT:
|
||||
if model_config["llm_factory"] not in Seq2txtModel:
|
||||
return
|
||||
return Seq2txtModel[model_config["llm_factory"]](key=model_config["api_key"], model_name=model_config["llm_name"], lang=lang, base_url=model_config["api_base"])
|
||||
return Seq2txtModel[model_config["llm_factory"]](key=model_config["api_key"],
|
||||
model_name=model_config["llm_name"], lang=lang,
|
||||
base_url=model_config["api_base"])
|
||||
if llm_type == LLMType.TTS:
|
||||
if model_config["llm_factory"] not in TTSModel:
|
||||
return
|
||||
@@ -194,11 +203,14 @@ class TenantLLMService(CommonService):
|
||||
try:
|
||||
num = (
|
||||
cls.model.update(used_tokens=cls.model.used_tokens + used_tokens)
|
||||
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name, cls.model.llm_factory == llm_factory if llm_factory else True)
|
||||
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name,
|
||||
cls.model.llm_factory == llm_factory if llm_factory else True)
|
||||
.execute()
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s", tenant_id, llm_name)
|
||||
logging.exception(
|
||||
"TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s",
|
||||
tenant_id, llm_name)
|
||||
return 0
|
||||
|
||||
return num
|
||||
@@ -206,7 +218,9 @@ class TenantLLMService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_openai_models(cls):
|
||||
objs = cls.model.select().where((cls.model.llm_factory == "OpenAI"), ~(cls.model.llm_name == "text-embedding-3-small"), ~(cls.model.llm_name == "text-embedding-3-large")).dicts()
|
||||
objs = cls.model.select().where((cls.model.llm_factory == "OpenAI"),
|
||||
~(cls.model.llm_name == "text-embedding-3-small"),
|
||||
~(cls.model.llm_name == "text-embedding-3-large")).dicts()
|
||||
return list(objs)
|
||||
|
||||
@classmethod
|
||||
@@ -250,8 +264,9 @@ class LLM4Tenant:
|
||||
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
|
||||
self.langfuse = None
|
||||
if langfuse_keys:
|
||||
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
|
||||
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key,
|
||||
host=langfuse_keys.host)
|
||||
if langfuse.auth_check():
|
||||
self.langfuse = langfuse
|
||||
trace_id = self.langfuse.create_trace_id()
|
||||
self.trace_context = {"trace_id": trace_id}
|
||||
self.trace_context = {"trace_id": trace_id}
|
||||
|
||||
@@ -2,22 +2,22 @@ from api.db.db_models import UserCanvasVersion, DB
|
||||
from api.db.services.common_service import CommonService
|
||||
from peewee import DoesNotExist
|
||||
|
||||
|
||||
class UserCanvasVersionService(CommonService):
|
||||
model = UserCanvasVersion
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def list_by_canvas_id(cls, user_canvas_id):
|
||||
try:
|
||||
user_canvas_version = cls.model.select(
|
||||
*[cls.model.id,
|
||||
cls.model.create_time,
|
||||
cls.model.title,
|
||||
cls.model.create_date,
|
||||
cls.model.update_date,
|
||||
cls.model.user_canvas_id,
|
||||
cls.model.update_time]
|
||||
*[cls.model.id,
|
||||
cls.model.create_time,
|
||||
cls.model.title,
|
||||
cls.model.create_date,
|
||||
cls.model.update_date,
|
||||
cls.model.user_canvas_id,
|
||||
cls.model.update_time]
|
||||
).where(cls.model.user_canvas_id == user_canvas_id)
|
||||
return user_canvas_version
|
||||
except DoesNotExist:
|
||||
@@ -46,18 +46,16 @@ class UserCanvasVersionService(CommonService):
|
||||
@DB.connection_context()
|
||||
def delete_all_versions(cls, user_canvas_id):
|
||||
try:
|
||||
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc())
|
||||
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(
|
||||
cls.model.create_time.desc())
|
||||
if user_canvas_version.count() > 20:
|
||||
delete_ids = []
|
||||
for i in range(20, user_canvas_version.count()):
|
||||
delete_ids.append(user_canvas_version[i].id)
|
||||
|
||||
|
||||
cls.delete_by_ids(delete_ids)
|
||||
return True
|
||||
except DoesNotExist:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ListChunkRequest(BaseModel):
|
||||
"""列出块请求模型"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
page: Optional[int] = Field(1, description="页码")
|
||||
size: Optional[int] = Field(30, description="每页大小")
|
||||
keywords: Optional[str] = Field("", description="关键词")
|
||||
available_int: Optional[int] = Field(None, description="可用性状态")
|
||||
|
||||
|
||||
class GetChunkRequest(BaseModel):
|
||||
"""获取块请求模型"""
|
||||
chunk_id: str = Field(..., description="块ID")
|
||||
|
||||
|
||||
class SetChunkRequest(BaseModel):
|
||||
"""设置块请求模型"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
chunk_id: str = Field(..., description="块ID")
|
||||
content_with_weight: str = Field(..., description="带权重的内容")
|
||||
important_kwd: Optional[List[str]] = Field(None, description="重要关键词")
|
||||
question_kwd: Optional[List[str]] = Field(None, description="问题关键词")
|
||||
tag_kwd: Optional[str] = Field(None, description="标签关键词")
|
||||
tag_feas: Optional[Any] = Field(None, description="标签特征")
|
||||
available_int: Optional[int] = Field(None, description="可用性状态")
|
||||
|
||||
|
||||
class SwitchChunkRequest(BaseModel):
|
||||
"""切换块状态请求模型"""
|
||||
chunk_ids: List[str] = Field(..., description="块ID列表")
|
||||
available_int: int = Field(..., description="可用性状态")
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
|
||||
|
||||
class RemoveChunkRequest(BaseModel):
|
||||
"""删除块请求模型"""
|
||||
chunk_ids: List[str] = Field(..., description="块ID列表")
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
|
||||
|
||||
class CreateChunkRequest(BaseModel):
|
||||
"""创建块请求模型"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
content_with_weight: str = Field(..., description="带权重的内容")
|
||||
important_kwd: Optional[List[str]] = Field([], description="重要关键词")
|
||||
question_kwd: Optional[List[str]] = Field([], description="问题关键词")
|
||||
tag_feas: Optional[Any] = Field(None, description="标签特征")
|
||||
|
||||
|
||||
class RetrievalTestRequest(BaseModel):
|
||||
"""检索测试请求模型"""
|
||||
kb_id: List[str] = Field(..., description="知识库ID列表")
|
||||
question: str = Field(..., description="问题")
|
||||
page: Optional[int] = Field(1, description="页码")
|
||||
size: Optional[int] = Field(30, description="每页大小")
|
||||
doc_ids: Optional[List[str]] = Field([], description="文档ID列表")
|
||||
use_kg: Optional[bool] = Field(False, description="是否使用知识图谱")
|
||||
top_k: Optional[int] = Field(1024, description="返回数量")
|
||||
cross_languages: Optional[List[str]] = Field([], description="跨语言列表")
|
||||
search_id: Optional[str] = Field("", description="搜索ID")
|
||||
rerank_id: Optional[str] = Field(None, description="重排序ID")
|
||||
keyword: Optional[bool] = Field(False, description="是否使用关键词")
|
||||
similarity_threshold: Optional[float] = Field(0.0, description="相似度阈值")
|
||||
vector_similarity_weight: Optional[float] = Field(0.3, description="向量相似度权重")
|
||||
highlight: Optional[bool] = Field(None, description="是否高亮")
|
||||
|
||||
|
||||
class KnowledgeGraphRequest(BaseModel):
|
||||
"""知识图谱请求模型"""
|
||||
doc_id: str = Field(..., description="文档ID")
|
||||
@@ -1,98 +0,0 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class CreateKnowledgeBaseRequest(BaseModel):
|
||||
"""创建知识库请求模型"""
|
||||
name: str = Field(..., description="知识库名称")
|
||||
description: Optional[str] = Field(None, description="知识库描述")
|
||||
parser_id: Optional[str] = Field("naive", description="解析器ID")
|
||||
parser_config: Optional[Dict[str, Any]] = Field(None, description="解析器配置")
|
||||
embd_id: Optional[str] = Field(None, description="嵌入模型ID")
|
||||
|
||||
|
||||
class UpdateKnowledgeBaseRequest(BaseModel):
|
||||
"""更新知识库请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
name: str = Field(..., description="知识库名称")
|
||||
description: Optional[str] = Field(None, description="知识库描述")
|
||||
permission: Optional[str] = Field(None, description="权限设置")
|
||||
avatar: Optional[str] = Field(None, description="头像base64字符串")
|
||||
parser_id: Optional[str] = Field(None, description="解析器ID")
|
||||
embd_id: Optional[str] = Field(None, description="嵌入模型ID")
|
||||
parser_config: Optional[Dict[str, Any]] = Field(None, description="解析器配置")
|
||||
pagerank: Optional[int] = Field(0, description="页面排名")
|
||||
|
||||
|
||||
class DeleteKnowledgeBaseRequest(BaseModel):
|
||||
"""删除知识库请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
|
||||
|
||||
class ListKnowledgeBasesRequest(BaseModel):
|
||||
"""列出知识库请求模型"""
|
||||
owner_ids: Optional[List[str]] = Field([], description="所有者ID列表")
|
||||
|
||||
|
||||
class RemoveTagsRequest(BaseModel):
|
||||
"""移除标签请求模型"""
|
||||
tags: List[str] = Field(..., description="要移除的标签列表")
|
||||
|
||||
|
||||
class RenameTagRequest(BaseModel):
|
||||
"""重命名标签请求模型"""
|
||||
from_tag: str = Field(..., description="原标签名")
|
||||
to_tag: str = Field(..., description="新标签名")
|
||||
|
||||
|
||||
class RunGraphRAGRequest(BaseModel):
|
||||
"""运行GraphRAG请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
|
||||
|
||||
class RunRaptorRequest(BaseModel):
|
||||
"""运行RAPTOR请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
|
||||
|
||||
class RunMindmapRequest(BaseModel):
|
||||
"""运行Mindmap请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
|
||||
|
||||
class ListPipelineLogsRequest(BaseModel):
|
||||
"""列出管道日志请求模型"""
|
||||
operation_status: Optional[List[str]] = Field([], description="操作状态列表")
|
||||
types: Optional[List[str]] = Field([], description="文件类型列表")
|
||||
suffix: Optional[List[str]] = Field([], description="文件后缀列表")
|
||||
|
||||
|
||||
class ListPipelineDatasetLogsRequest(BaseModel):
|
||||
"""列出管道数据集日志请求模型"""
|
||||
operation_status: Optional[List[str]] = Field([], description="操作状态列表")
|
||||
|
||||
|
||||
class DeletePipelineLogsRequest(BaseModel):
|
||||
"""删除管道日志请求模型"""
|
||||
log_ids: List[str] = Field(..., description="日志ID列表")
|
||||
|
||||
|
||||
class UnbindTaskRequest(BaseModel):
|
||||
"""解绑任务请求模型"""
|
||||
kb_id: str = Field(..., description="知识库ID")
|
||||
pipeline_task_type: str = Field(..., description="管道任务类型")
|
||||
@@ -1,84 +0,0 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import Optional, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SetApiKeyRequest(BaseModel):
|
||||
"""设置API密钥请求模型"""
|
||||
llm_factory: str = Field(..., description="LLM工厂名称")
|
||||
api_key: str = Field(..., description="API密钥")
|
||||
base_url: Optional[str] = Field(None, description="基础URL")
|
||||
|
||||
|
||||
class AddLLMRequest(BaseModel):
|
||||
"""添加LLM请求模型"""
|
||||
llm_factory: str = Field(..., description="LLM工厂名称")
|
||||
api_key: Optional[str] = Field("x", description="API密钥")
|
||||
llm_name: Optional[str] = Field(None, description="LLM名称")
|
||||
model_type: Optional[str] = Field(None, description="模型类型")
|
||||
api_base: Optional[str] = Field(None, description="API基础URL")
|
||||
max_tokens: Optional[int] = Field(None, description="最大token数")
|
||||
|
||||
# VolcEngine specific fields
|
||||
ark_api_key: Optional[str] = Field(None, description="VolcEngine ARK API密钥")
|
||||
endpoint_id: Optional[str] = Field(None, description="VolcEngine端点ID")
|
||||
|
||||
# Tencent Hunyuan specific fields
|
||||
hunyuan_sid: Optional[str] = Field(None, description="腾讯混元SID")
|
||||
hunyuan_sk: Optional[str] = Field(None, description="腾讯混元SK")
|
||||
|
||||
# Tencent Cloud specific fields
|
||||
tencent_cloud_sid: Optional[str] = Field(None, description="腾讯云SID")
|
||||
tencent_cloud_sk: Optional[str] = Field(None, description="腾讯云SK")
|
||||
|
||||
# Bedrock specific fields
|
||||
bedrock_ak: Optional[str] = Field(None, description="Bedrock访问密钥")
|
||||
bedrock_sk: Optional[str] = Field(None, description="Bedrock秘密密钥")
|
||||
bedrock_region: Optional[str] = Field(None, description="Bedrock区域")
|
||||
|
||||
# XunFei Spark specific fields
|
||||
spark_api_password: Optional[str] = Field(None, description="讯飞Spark API密码")
|
||||
spark_app_id: Optional[str] = Field(None, description="讯飞Spark应用ID")
|
||||
spark_api_secret: Optional[str] = Field(None, description="讯飞Spark API密钥")
|
||||
spark_api_key: Optional[str] = Field(None, description="讯飞Spark API密钥")
|
||||
|
||||
# BaiduYiyan specific fields
|
||||
yiyan_ak: Optional[str] = Field(None, description="百度文心一言AK")
|
||||
yiyan_sk: Optional[str] = Field(None, description="百度文心一言SK")
|
||||
|
||||
# Fish Audio specific fields
|
||||
fish_audio_ak: Optional[str] = Field(None, description="Fish Audio AK")
|
||||
fish_audio_refid: Optional[str] = Field(None, description="Fish Audio参考ID")
|
||||
|
||||
# Google Cloud specific fields
|
||||
google_project_id: Optional[str] = Field(None, description="Google Cloud项目ID")
|
||||
google_region: Optional[str] = Field(None, description="Google Cloud区域")
|
||||
google_service_account_key: Optional[str] = Field(None, description="Google Cloud服务账户密钥")
|
||||
|
||||
# Azure OpenAI specific fields
|
||||
api_version: Optional[str] = Field(None, description="Azure OpenAI API版本")
|
||||
|
||||
|
||||
class DeleteLLMRequest(BaseModel):
|
||||
"""删除LLM请求模型"""
|
||||
llm_factory: str = Field(..., description="LLM工厂名称")
|
||||
llm_name: str = Field(..., description="LLM名称")
|
||||
|
||||
|
||||
class DeleteFactoryRequest(BaseModel):
|
||||
"""删除工厂请求模型"""
|
||||
llm_factory: str = Field(..., description="LLM工厂名称")
|
||||
@@ -1,30 +0,0 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class InviteUserRequest(BaseModel):
|
||||
"""邀请用户请求模型"""
|
||||
email: str = Field(..., description="用户邮箱")
|
||||
|
||||
|
||||
class UserTenantResponse(BaseModel):
|
||||
"""用户租户响应模型"""
|
||||
id: str = Field(..., description="用户ID")
|
||||
avatar: Optional[str] = Field(None, description="用户头像")
|
||||
email: str = Field(..., description="用户邮箱")
|
||||
nickname: Optional[str] = Field(None, description="用户昵称")
|
||||
@@ -65,8 +65,8 @@ OAUTH_CONFIG = None
|
||||
DOC_ENGINE = None
|
||||
docStoreConn = None
|
||||
|
||||
retrievaler = None
|
||||
kg_retrievaler = None
|
||||
retriever = None
|
||||
kg_retriever = None
|
||||
|
||||
# user registration switch
|
||||
REGISTER_ENABLED = 1
|
||||
@@ -174,7 +174,7 @@ def init_settings():
|
||||
|
||||
OAUTH_CONFIG = get_base_config("oauth", {})
|
||||
|
||||
global DOC_ENGINE, docStoreConn, retrievaler, kg_retrievaler
|
||||
global DOC_ENGINE, docStoreConn, retriever, kg_retriever
|
||||
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
||||
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
||||
lower_case_doc_engine = DOC_ENGINE.lower()
|
||||
@@ -187,10 +187,10 @@ def init_settings():
|
||||
else:
|
||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||
|
||||
retrievaler = search.Dealer(docStoreConn)
|
||||
retriever = search.Dealer(docStoreConn)
|
||||
from graphrag import search as kg_search
|
||||
|
||||
kg_retrievaler = kg_search.KGSearch(docStoreConn)
|
||||
kg_retriever = kg_search.KGSearch(docStoreConn)
|
||||
|
||||
if int(os.environ.get("SANDBOX_ENABLED", "0")):
|
||||
global SANDBOX_HOST
|
||||
|
||||
@@ -53,6 +53,7 @@ from api.db.services.llm_service import LLMService
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.utils.json import CustomJSONEncoder, json_dumps
|
||||
|
||||
|
||||
# 自定义认证方案,支持不传Bearer格式
|
||||
class CustomHTTPBearer(SecurityBase):
|
||||
def __init__(self, *, scheme_name: str = None, auto_error: bool = True):
|
||||
@@ -71,13 +72,13 @@ class CustomHTTPBearer(SecurityBase):
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
# 支持Bearer格式和直接token格式
|
||||
if authorization.startswith("Bearer "):
|
||||
token = authorization[7:] # 移除"Bearer "前缀
|
||||
else:
|
||||
token = authorization # 直接使用token
|
||||
|
||||
|
||||
return HTTPAuthorizationCredentials(scheme="Bearer", credentials=token)
|
||||
|
||||
# FastAPI 安全方案
|
||||
@@ -95,8 +96,8 @@ def serialize_for_json(obj):
|
||||
if hasattr(obj, '__dict__'):
|
||||
# For objects with __dict__, try to serialize their attributes
|
||||
try:
|
||||
return {key: serialize_for_json(value) for key, value in obj.__dict__.items()
|
||||
if not key.startswith('_')}
|
||||
return {key: serialize_for_json(value) for key, value in obj.__dict__.items()
|
||||
if not key.startswith('_')}
|
||||
except (AttributeError, TypeError):
|
||||
return str(obj)
|
||||
elif hasattr(obj, '__name__'):
|
||||
@@ -112,6 +113,7 @@ def serialize_for_json(obj):
|
||||
# Fallback: convert to string representation
|
||||
return str(obj)
|
||||
|
||||
|
||||
def request(**kwargs):
|
||||
sess = requests.Session()
|
||||
stream = kwargs.pop("stream", sess.stream)
|
||||
@@ -132,7 +134,8 @@ def request(**kwargs):
|
||||
settings.HTTP_APP_KEY.encode("ascii"),
|
||||
prepped.path_url.encode("ascii"),
|
||||
prepped.body if kwargs.get("json") else b"",
|
||||
urlencode(sorted(kwargs["data"].items()), quote_via=quote, safe="-._~").encode("ascii") if kwargs.get("data") and isinstance(kwargs["data"], dict) else b"",
|
||||
urlencode(sorted(kwargs["data"].items()), quote_via=quote, safe="-._~").encode(
|
||||
"ascii") if kwargs.get("data") and isinstance(kwargs["data"], dict) else b"",
|
||||
]
|
||||
),
|
||||
"sha1",
|
||||
@@ -154,7 +157,7 @@ def request(**kwargs):
|
||||
def get_exponential_backoff_interval(retries, full_jitter=False):
|
||||
"""Calculate the exponential backoff wait time."""
|
||||
# Will be zero if factor equals 0
|
||||
countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2**retries))
|
||||
countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries))
|
||||
# Full jitter according to
|
||||
# https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
|
||||
if full_jitter:
|
||||
@@ -185,11 +188,12 @@ def server_error_response(e):
|
||||
if len(e.args) > 1:
|
||||
try:
|
||||
serialized_data = serialize_for_json(e.args[1])
|
||||
return get_json_result(code= settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=serialized_data)
|
||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=serialized_data)
|
||||
except Exception:
|
||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=None)
|
||||
if repr(e).find("index_not_found_exception") >= 0:
|
||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.")
|
||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR,
|
||||
message="No chunk found, please upload file and parse it.")
|
||||
|
||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||
|
||||
@@ -214,12 +218,15 @@ def validate_request(*args, **kwargs):
|
||||
废弃的装饰器:在 FastAPI 中使用 Pydantic 模型进行验证
|
||||
这个函数保留是为了向后兼容,但不会执行任何验证
|
||||
"""
|
||||
|
||||
def wrapper(func):
|
||||
@wraps(func)
|
||||
def decorated_function(*_args, **_kwargs):
|
||||
# FastAPI 中不需要手动验证,Pydantic 会自动处理
|
||||
return func(*_args, **_kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@@ -228,11 +235,14 @@ def not_allowed_parameters(*params):
|
||||
废弃的装饰器:在 FastAPI 中使用 Pydantic 模型进行验证
|
||||
这个函数保留是为了向后兼容,但不会执行任何验证
|
||||
"""
|
||||
|
||||
def decorator(f):
|
||||
def wrapper(*args, **kwargs):
|
||||
# FastAPI 中不需要手动验证,Pydantic 会自动处理
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@@ -241,10 +251,12 @@ def active_required(f):
|
||||
废弃的装饰器:在 FastAPI 中使用依赖注入进行用户验证
|
||||
这个函数保留是为了向后兼容,但不会执行任何验证
|
||||
"""
|
||||
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
# FastAPI 中使用依赖注入进行用户验证
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@@ -281,10 +293,12 @@ def apikey_required(func):
|
||||
废弃的装饰器:在 FastAPI 中使用依赖注入进行 API Key 验证
|
||||
这个函数保留是为了向后兼容,但不会执行任何验证
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def decorated_function(*args, **kwargs):
|
||||
# FastAPI 中使用依赖注入进行 API Key 验证
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
@@ -301,7 +315,7 @@ def construct_response(code=settings.RetCode.SUCCESS, message="success", data=No
|
||||
continue
|
||||
else:
|
||||
response_dict[key] = value
|
||||
|
||||
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Method": "*",
|
||||
@@ -310,7 +324,7 @@ def construct_response(code=settings.RetCode.SUCCESS, message="success", data=No
|
||||
}
|
||||
if auth:
|
||||
headers["Authorization"] = auth
|
||||
|
||||
|
||||
return JSONResponse(content=response_dict, headers=headers)
|
||||
|
||||
|
||||
@@ -349,10 +363,12 @@ def token_required(func):
|
||||
废弃的装饰器:在 FastAPI 中使用依赖注入进行 Token 验证
|
||||
这个函数保留是为了向后兼容,但不会执行任何验证
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def decorated_function(*args, **kwargs):
|
||||
# FastAPI 中使用依赖注入进行 Token 验证
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
@@ -368,8 +384,8 @@ def get_result(code=settings.RetCode.SUCCESS, message="", data=None):
|
||||
|
||||
|
||||
def get_error_data_result(
|
||||
message="Sorry! Data missing!",
|
||||
code=settings.RetCode.DATA_ERROR,
|
||||
message="Sorry! Data missing!",
|
||||
code=settings.RetCode.DATA_ERROR,
|
||||
):
|
||||
result_dict = {"code": code, "message": message}
|
||||
response = {}
|
||||
@@ -392,24 +408,24 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s
|
||||
try:
|
||||
jwt = URLSafeTimedSerializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = credentials.credentials
|
||||
|
||||
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication attempt with empty access token"
|
||||
)
|
||||
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
|
||||
)
|
||||
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
@@ -474,7 +490,7 @@ def create_file_response(data, filename: str, media_type: str = "application/oct
|
||||
data = json_dumps(data)
|
||||
if isinstance(data, str):
|
||||
data = data.encode("utf-8")
|
||||
|
||||
|
||||
return StreamingResponse(
|
||||
BytesIO(data),
|
||||
media_type=media_type,
|
||||
@@ -501,7 +517,8 @@ def get_parser_config(chunk_method, parser_config):
|
||||
|
||||
# Define default configurations for each chunking method
|
||||
key_mapping = {
|
||||
"naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC", "raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||
"naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||
"qa": {"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||
"tag": None,
|
||||
"resume": None,
|
||||
@@ -540,16 +557,16 @@ def get_parser_config(chunk_method, parser_config):
|
||||
|
||||
|
||||
def get_data_openai(
|
||||
id=None,
|
||||
created=None,
|
||||
model=None,
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
content=None,
|
||||
finish_reason=None,
|
||||
object="chat.completion",
|
||||
param=None,
|
||||
stream=False
|
||||
id=None,
|
||||
created=None,
|
||||
model=None,
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
content=None,
|
||||
finish_reason=None,
|
||||
object="chat.completion",
|
||||
param=None,
|
||||
stream=False
|
||||
):
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
@@ -661,7 +678,9 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, J
|
||||
in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding"))
|
||||
|
||||
tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id)
|
||||
is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms)
|
||||
is_tenant_model = any(
|
||||
llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for
|
||||
llm in tenant_llms)
|
||||
|
||||
is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS
|
||||
if not (is_builtin_model or is_tenant_model or in_llm_service):
|
||||
@@ -804,7 +823,8 @@ TimeoutException = Union[Type[BaseException], BaseException]
|
||||
OnTimeoutCallback = Union[Callable[..., Any], Coroutine[Any, Any, Any]]
|
||||
|
||||
|
||||
def timeout(seconds: float | int | str = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None, on_timeout: Optional[OnTimeoutCallback] = None):
|
||||
def timeout(seconds: float | int | str = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None,
|
||||
on_timeout: Optional[OnTimeoutCallback] = None):
|
||||
if isinstance(seconds, str):
|
||||
seconds = float(seconds)
|
||||
def decorator(func):
|
||||
@@ -892,7 +912,8 @@ async def is_strong_enough(chat_model, embedding_model):
|
||||
_ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
|
||||
if chat_model:
|
||||
with trio.fail_after(30):
|
||||
res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role": "user", "content": "Are you strong enough!?"}], {}))
|
||||
res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [
|
||||
{"role": "user", "content": "Are you strong enough!?"}], {}))
|
||||
if res.find("**ERROR**") >= 0:
|
||||
raise Exception(res)
|
||||
|
||||
|
||||
25
api/utils/email_templates.py
Normal file
25
api/utils/email_templates.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Reusable HTML email templates and registry.
|
||||
"""
|
||||
|
||||
# Invitation email template
|
||||
INVITE_EMAIL_TMPL = """
|
||||
<p>Hi {{email}},</p>
|
||||
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
||||
<p>Click the link below to complete your registration:<br>
|
||||
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
||||
<p>If you did not request this, please ignore this email.</p>
|
||||
"""
|
||||
|
||||
# Password reset code template
|
||||
RESET_CODE_EMAIL_TMPL = """
|
||||
<p>Hello,</p>
|
||||
<p>Your password reset code is: <b>{{ code }}</b></p>
|
||||
<p>This code will expire in {{ ttl_min }} minutes.</p>
|
||||
"""
|
||||
|
||||
# Template registry
|
||||
EMAIL_TEMPLATES = {
|
||||
"invite": INVITE_EMAIL_TMPL,
|
||||
"reset_code": RESET_CODE_EMAIL_TMPL,
|
||||
}
|
||||
@@ -13,7 +13,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
# Standard library imports
|
||||
import base64
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -22,13 +27,20 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
|
||||
# Typing
|
||||
from typing import List, Union, Tuple
|
||||
|
||||
# Third-party imports
|
||||
import olefile
|
||||
import pdfplumber
|
||||
from cachetools import LRUCache, cached
|
||||
from PIL import Image
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
# Local imports
|
||||
from api.constants import IMG_BASE64_PREFIX
|
||||
from api.db import FileType
|
||||
|
||||
@@ -161,7 +173,7 @@ def filename_type(filename):
|
||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||
return FileType.AURAL.value
|
||||
|
||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
|
||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4|avi|mkv)$", filename):
|
||||
return FileType.VISUAL.value
|
||||
|
||||
return FileType.OTHER.value
|
||||
@@ -284,3 +296,125 @@ def read_potential_broken_pdf(blob):
|
||||
return repaired
|
||||
|
||||
return blob
|
||||
|
||||
|
||||
|
||||
def _is_zip(h: bytes) -> bool:
|
||||
return h.startswith(b"PK\x03\x04") or h.startswith(b"PK\x05\x06") or h.startswith(b"PK\x07\x08")
|
||||
|
||||
def _is_pdf(h: bytes) -> bool:
|
||||
return h.startswith(b"%PDF-")
|
||||
|
||||
def _is_ole(h: bytes) -> bool:
|
||||
return h.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
|
||||
|
||||
def _sha10(b: bytes) -> str:
|
||||
return hashlib.sha256(b).hexdigest()[:10]
|
||||
|
||||
def _guess_ext(b: bytes) -> str:
|
||||
h = b[:8]
|
||||
if _is_zip(h):
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(b), "r") as z:
|
||||
names = [n.lower() for n in z.namelist()]
|
||||
if any(n.startswith("word/") for n in names):
|
||||
return ".docx"
|
||||
if any(n.startswith("ppt/") for n in names):
|
||||
return ".pptx"
|
||||
if any(n.startswith("xl/") for n in names):
|
||||
return ".xlsx"
|
||||
except Exception:
|
||||
pass
|
||||
return ".zip"
|
||||
if _is_pdf(h):
|
||||
return ".pdf"
|
||||
if _is_ole(h):
|
||||
return ".doc"
|
||||
return ".bin"
|
||||
|
||||
# Try to extract the real embedded payload from OLE's Ole10Native
|
||||
def _extract_ole10native_payload(data: bytes) -> bytes:
|
||||
try:
|
||||
pos = 0
|
||||
if len(data) < 4:
|
||||
return data
|
||||
_ = int.from_bytes(data[pos:pos+4], "little")
|
||||
pos += 4
|
||||
# filename/src/tmp (NUL-terminated ANSI)
|
||||
for _ in range(3):
|
||||
z = data.index(b"\x00", pos)
|
||||
pos = z + 1
|
||||
# skip unknown 4 bytes
|
||||
pos += 4
|
||||
if pos + 4 > len(data):
|
||||
return data
|
||||
size = int.from_bytes(data[pos:pos+4], "little")
|
||||
pos += 4
|
||||
if pos + size <= len(data):
|
||||
return data[pos:pos+size]
|
||||
except Exception:
|
||||
pass
|
||||
return data
|
||||
|
||||
def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes]]:
|
||||
"""
|
||||
Only extract the 'first layer' of embedding, returning raw (filename, bytes).
|
||||
"""
|
||||
top = bytes(target)
|
||||
head = top[:8]
|
||||
out: List[Tuple[str, bytes]] = []
|
||||
seen = set()
|
||||
|
||||
def push(b: bytes, name_hint: str = ""):
|
||||
h10 = _sha10(b)
|
||||
if h10 in seen:
|
||||
return
|
||||
seen.add(h10)
|
||||
ext = _guess_ext(b)
|
||||
# If name_hint has an extension use its basename; else fallback to guessed ext
|
||||
if "." in name_hint:
|
||||
fname = name_hint.split("/")[-1]
|
||||
else:
|
||||
fname = f"{h10}{ext}"
|
||||
out.append((fname, b))
|
||||
|
||||
# OOXML/ZIP container (docx/xlsx/pptx)
|
||||
if _is_zip(head):
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(top), "r") as z:
|
||||
embed_dirs = (
|
||||
"word/embeddings/", "word/objects/", "word/activex/",
|
||||
"xl/embeddings/", "ppt/embeddings/"
|
||||
)
|
||||
for name in z.namelist():
|
||||
low = name.lower()
|
||||
if any(low.startswith(d) for d in embed_dirs):
|
||||
try:
|
||||
b = z.read(name)
|
||||
push(b, name)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
# OLE container (doc/ppt/xls)
|
||||
if _is_ole(head):
|
||||
try:
|
||||
with olefile.OleFileIO(io.BytesIO(top)) as ole:
|
||||
for entry in ole.listdir():
|
||||
p = "/".join(entry)
|
||||
try:
|
||||
data = ole.openstream(entry).read()
|
||||
except Exception:
|
||||
continue
|
||||
if not data:
|
||||
continue
|
||||
if "Ole10Native" in p or "ole10native" in p.lower():
|
||||
data = _extract_ole10native_payload(data)
|
||||
push(data, p)
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
return out
|
||||
@@ -74,12 +74,12 @@ def get_es_cluster_stats() -> dict:
|
||||
raise Exception("Elasticsearch is not in use.")
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": ESConnection().get_cluster_stats()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -90,12 +90,12 @@ def get_infinity_status():
|
||||
raise Exception("Infinity is not in use.")
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": InfinityConnection().health()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -107,12 +107,12 @@ def get_mysql_status():
|
||||
headers = ['id', 'user', 'host', 'db', 'command', 'time', 'state', 'info']
|
||||
cursor.close()
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": [dict(zip(headers, r)) for r in res_rows]
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -122,12 +122,12 @@ def check_minio_alive():
|
||||
try:
|
||||
response = requests.get(f'http://{rag_settings.MINIO["host"]}/minio/health/live')
|
||||
if response.status_code == 200:
|
||||
return {'alive': True, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
else:
|
||||
return {'alive': False, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -135,12 +135,12 @@ def check_minio_alive():
|
||||
def get_redis_info():
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": REDIS_CONN.info()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -150,12 +150,12 @@ def check_ragflow_server_alive():
|
||||
try:
|
||||
response = requests.get(f'http://{settings.HOST_IP}:{settings.HOST_PORT}/v1/system/ping')
|
||||
if response.status_code == 200:
|
||||
return {'alive': True, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
else:
|
||||
return {'alive': False, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@@ -192,9 +192,7 @@ def run_health_checks() -> tuple[dict, bool]:
|
||||
except Exception:
|
||||
result["storage"] = "nok"
|
||||
|
||||
|
||||
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (result.get("storage") == "ok")
|
||||
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (
|
||||
result.get("storage") == "ok")
|
||||
result["status"] = "ok" if all_ok else "nok"
|
||||
return result, all_ok
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from urllib.parse import urlparse
|
||||
from api.apps import smtp_mail_server
|
||||
from flask_mail import Message
|
||||
from flask import render_template_string
|
||||
from api.utils.email_templates import EMAIL_TEMPLATES
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
@@ -34,6 +35,12 @@ from selenium.webdriver.support.ui import WebDriverWait
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
|
||||
|
||||
OTP_LENGTH = 8
|
||||
OTP_TTL_SECONDS = 5 * 60
|
||||
ATTEMPT_LIMIT = 5
|
||||
ATTEMPT_LOCK_SECONDS = 30 * 60
|
||||
RESEND_COOLDOWN_SECONDS = 60
|
||||
|
||||
|
||||
CONTENT_TYPE_MAP = {
|
||||
# Office
|
||||
@@ -178,24 +185,49 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
|
||||
return default
|
||||
|
||||
|
||||
INVITE_EMAIL_TMPL = """
|
||||
<p>Hi {{email}},</p>
|
||||
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
||||
<p>Click the link below to complete your registration:<br>
|
||||
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
||||
<p>If you did not request this, please ignore this email.</p>
|
||||
"""
|
||||
def send_email_html(subject: str, to_email: str, template_key: str, **context):
|
||||
"""Generic HTML email sender using shared templates.
|
||||
template_key must exist in EMAIL_TEMPLATES.
|
||||
"""
|
||||
from api.apps import app
|
||||
tmpl = EMAIL_TEMPLATES.get(template_key)
|
||||
if not tmpl:
|
||||
raise ValueError(f"Unknown email template: {template_key}")
|
||||
with app.app_context():
|
||||
msg = Message(subject=subject, recipients=[to_email])
|
||||
msg.html = render_template_string(tmpl, **context)
|
||||
smtp_mail_server.send(msg)
|
||||
|
||||
|
||||
def send_invite_email(to_email, invite_url, tenant_id, inviter):
|
||||
from api.apps import app
|
||||
with app.app_context():
|
||||
msg = Message(subject="RAGFlow Invitation",
|
||||
recipients=[to_email])
|
||||
msg.html = render_template_string(
|
||||
INVITE_EMAIL_TMPL,
|
||||
email=to_email,
|
||||
invite_url=invite_url,
|
||||
tenant_id=tenant_id,
|
||||
inviter=inviter,
|
||||
)
|
||||
smtp_mail_server.send(msg)
|
||||
# Reuse the generic HTML sender with 'invite' template
|
||||
send_email_html(
|
||||
subject="RAGFlow Invitation",
|
||||
to_email=to_email,
|
||||
template_key="invite",
|
||||
email=to_email,
|
||||
invite_url=invite_url,
|
||||
tenant_id=tenant_id,
|
||||
inviter=inviter,
|
||||
)
|
||||
|
||||
|
||||
def otp_keys(email: str):
|
||||
email = (email or "").strip().lower()
|
||||
return (
|
||||
f"otp:{email}",
|
||||
f"otp_attempts:{email}",
|
||||
f"otp_last_sent:{email}",
|
||||
f"otp_lock:{email}",
|
||||
)
|
||||
|
||||
|
||||
def hash_code(code: str, salt: bytes) -> str:
|
||||
import hashlib
|
||||
import hmac
|
||||
return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
|
||||
def captcha_key(email: str) -> str:
|
||||
return f"captcha:{email}"
|
||||
|
||||
|
||||
19
chat_demo/index.html
Normal file
19
chat_demo/index.html
Normal file
@@ -0,0 +1,19 @@
|
||||
<iframe src="http://localhost:9222/next-chats/widget?shared_id=9dcfc68696c611f0bb789b9b8b765d12&from=chat&auth=U4MDU3NzkwOTZjNzExZjBiYjc4OWI5Yj&mode=master&streaming=false"
|
||||
style="position:fixed;bottom:0;right:0;width:100px;height:100px;border:none;background:transparent;z-index:9999"
|
||||
frameborder="0" allow="microphone;camera"></iframe>
|
||||
<script>
|
||||
window.addEventListener('message',e=>{
|
||||
if(e.origin!=='http://localhost:9222')return;
|
||||
if(e.data.type==='CREATE_CHAT_WINDOW'){
|
||||
if(document.getElementById('chat-win'))return;
|
||||
const i=document.createElement('iframe');
|
||||
i.id='chat-win';i.src=e.data.src;
|
||||
i.style.cssText='position:fixed;bottom:104px;right:24px;width:380px;height:500px;border:none;background:transparent;z-index:9998;display:none';
|
||||
i.frameBorder='0';i.allow='microphone;camera';
|
||||
document.body.appendChild(i);
|
||||
}else if(e.data.type==='TOGGLE_CHAT'){
|
||||
const w=document.getElementById('chat-win');
|
||||
if(w)w.style.display=e.data.isOpen?'block':'none';
|
||||
}else if(e.data.type==='SCROLL_PASSTHROUGH')window.scrollBy(0,e.data.deltaY);
|
||||
});
|
||||
</script>
|
||||
154
chat_demo/widget_demo.html
Normal file
154
chat_demo/widget_demo.html
Normal file
@@ -0,0 +1,154 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Floating Chat Widget Demo</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 40px;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
min-height: 100vh;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.demo-content {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.demo-content h1 {
|
||||
text-align: center;
|
||||
font-size: 2.5rem;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.demo-content p {
|
||||
font-size: 1.2rem;
|
||||
line-height: 1.6;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.feature-list {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
border-radius: 10px;
|
||||
padding: 2rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.feature-list h3 {
|
||||
margin-top: 0;
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
.feature-list ul {
|
||||
list-style-type: none;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.feature-list li {
|
||||
padding: 0.5rem 0;
|
||||
padding-left: 1.5rem;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.feature-list li:before {
|
||||
content: "✓";
|
||||
position: absolute;
|
||||
left: 0;
|
||||
color: #4ade80;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="demo-content">
|
||||
<h1>🚀 Floating Chat Widget Demo</h1>
|
||||
|
||||
<p>
|
||||
Welcome to our demo page! This page simulates a real website with content.
|
||||
Look for the floating chat button in the bottom-right corner - just like Intercom!
|
||||
</p>
|
||||
|
||||
<div class="feature-list">
|
||||
<h3>🎯 Widget Features</h3>
|
||||
<ul>
|
||||
<li>Floating button that stays visible while scrolling</li>
|
||||
<li>Click to open/close the chat window</li>
|
||||
<li>Minimize button to collapse the chat</li>
|
||||
<li>Professional Intercom-style design</li>
|
||||
<li>Unread message indicator (red badge)</li>
|
||||
<li>Transparent background integration</li>
|
||||
<li>Responsive design for all screen sizes</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<p>
|
||||
The chat widget is completely separate from your website's content and won't
|
||||
interfere with your existing layout or functionality. It's designed to be
|
||||
lightweight and performant.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Try scrolling this page - notice how the chat button stays in position.
|
||||
Click it to start a conversation with our AI assistant!
|
||||
</p>
|
||||
|
||||
<div class="feature-list">
|
||||
<h3>🔧 Implementation</h3>
|
||||
<ul>
|
||||
<li>Simple iframe embed - just copy and paste</li>
|
||||
<li>No JavaScript dependencies required</li>
|
||||
<li>Works on any website or platform</li>
|
||||
<li>Customizable appearance and behavior</li>
|
||||
<li>Secure and privacy-focused</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<p>
|
||||
This is just placeholder content to demonstrate how the widget integrates
|
||||
seamlessly with your existing website content. The widget floats above
|
||||
everything else without disrupting your user experience.
|
||||
</p>
|
||||
|
||||
<p style="margin-top: 4rem; text-align: center; font-style: italic;">
|
||||
🎉 Ready to add this to your website? Get your embed code from the admin panel!
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<iframe id="main-widget" src="http://localhost:9222/next-chats/widget?shared_id=9dcfc68696c611f0bb789b9b8b765d12&from=chat&auth=U4MDU3NzkwOTZjNzExZjBiYjc4OWI5Yj&visible_avatar=1&locale=zh&mode=master&streaming=false"
|
||||
style="position:fixed;bottom:0;right:0;width:100px;height:100px;border:none;background:transparent;z-index:9999;opacity:0;transition:opacity 0.2s ease"
|
||||
frameborder="0" allow="microphone;camera"></iframe>
|
||||
<script>
|
||||
window.addEventListener('message',e=>{
|
||||
if(e.origin!=='http://localhost:9222')return;
|
||||
if(e.data.type==='WIDGET_READY'){
|
||||
// Show the main widget when React is ready
|
||||
const mainWidget = document.getElementById('main-widget');
|
||||
if(mainWidget) mainWidget.style.opacity = '1';
|
||||
}else if(e.data.type==='CREATE_CHAT_WINDOW'){
|
||||
if(document.getElementById('chat-win'))return;
|
||||
const i=document.createElement('iframe');
|
||||
i.id='chat-win';i.src=e.data.src;
|
||||
i.style.cssText='position:fixed;bottom:104px;right:24px;width:380px;height:500px;border:none;background:transparent;z-index:9998;display:none;opacity:0;transition:opacity 0.2s ease';
|
||||
i.frameBorder='0';i.allow='microphone;camera';
|
||||
document.body.appendChild(i);
|
||||
}else if(e.data.type==='TOGGLE_CHAT'){
|
||||
const w=document.getElementById('chat-win');
|
||||
if(w){
|
||||
if(e.data.isOpen){
|
||||
w.style.display='block';
|
||||
// Wait for the iframe content to be ready before showing
|
||||
setTimeout(() => w.style.opacity='1', 100);
|
||||
}else{
|
||||
w.style.opacity='0';
|
||||
setTimeout(() => w.style.display='none', 200);
|
||||
}
|
||||
}
|
||||
}else if(e.data.type==='SCROLL_PASSTHROUGH')window.scrollBy(0,e.data.deltaY);
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -31,7 +31,6 @@
|
||||
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"pagerank_fea": {"type": "integer", "default": 0},
|
||||
"tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"},
|
||||
|
||||
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
@@ -39,6 +38,6 @@
|
||||
"source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"n_hop_with_weight": {"type": "varchar", "default": ""},
|
||||
"removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
|
||||
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
||||
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
||||
}
|
||||
|
||||
@@ -803,6 +803,12 @@
|
||||
"tags": "TEXT EMBEDDING",
|
||||
"max_tokens": 512,
|
||||
"model_type": "embedding"
|
||||
},
|
||||
{
|
||||
"llm_name": "glm-asr",
|
||||
"tags": "SPEECH2TEXT",
|
||||
"max_tokens": 4096,
|
||||
"model_type": "speech2text"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -965,31 +971,9 @@
|
||||
{
|
||||
"name": "VolcEngine",
|
||||
"logo": "",
|
||||
"tags": "LLM, TEXT EMBEDDING",
|
||||
"tags": "LLM, TEXT EMBEDDING, IMAGE2TEXT",
|
||||
"status": "1",
|
||||
"llm": [
|
||||
{
|
||||
"llm_name": "Doubao-pro-128k",
|
||||
"tags": "LLM,CHAT,128k",
|
||||
"max_tokens": 131072,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Doubao-pro-32k",
|
||||
"tags": "LLM,CHAT,32k",
|
||||
"max_tokens": 32768,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Doubao-pro-4k",
|
||||
"tags": "LLM,CHAT,4k",
|
||||
"max_tokens": 4096,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
}
|
||||
]
|
||||
"llm": []
|
||||
},
|
||||
{
|
||||
"name": "BaiChuan",
|
||||
@@ -1361,35 +1345,35 @@
|
||||
"llm_name": "gemini-2.5-flash",
|
||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.5-pro",
|
||||
"tags": "LLM,CHAT,IMAGE2TEXT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.5-flash-lite",
|
||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.0-flash",
|
||||
"tags": "LLM,CHAT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.0-flash-lite",
|
||||
"tags": "LLM,CHAT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
@@ -2816,6 +2800,13 @@
|
||||
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
|
||||
"status": "1",
|
||||
"llm": [
|
||||
{
|
||||
"llm_name":"THUDM/GLM-4.1V-9B-Thinking",
|
||||
"tags":"LLM,CHAT,IMAGE2TEXT, 64k",
|
||||
"max_tokens":64000,
|
||||
"model_type":"chat",
|
||||
"is_tools": false
|
||||
},
|
||||
{
|
||||
"llm_name": "Qwen/Qwen3-Embedding-8B",
|
||||
"tags": "TEXT EMBEDDING,TEXT RE-RANK,32k",
|
||||
@@ -2996,7 +2987,7 @@
|
||||
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
|
||||
"max_tokens": 32000,
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
"is_tools": false
|
||||
},
|
||||
{
|
||||
"llm_name": "THUDM/GLM-Z1-32B-0414",
|
||||
@@ -3145,13 +3136,6 @@
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Qwen/Qwen2-1.5B-Instruct",
|
||||
"tags": "LLM,CHAT,32k",
|
||||
"max_tokens": 32000,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct",
|
||||
"tags": "LLM,CHAT,32k",
|
||||
@@ -3159,13 +3143,6 @@
|
||||
"model_type": "chat",
|
||||
"is_tools": false
|
||||
},
|
||||
{
|
||||
"llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct",
|
||||
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
|
||||
"max_tokens": 32000,
|
||||
"model_type": "image2text",
|
||||
"is_tools": false
|
||||
},
|
||||
{
|
||||
"llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct",
|
||||
"tags": "LLM,CHAT,32k",
|
||||
@@ -5147,4 +5124,4 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,6 +200,61 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn_vector": {
|
||||
"match": "*_2048_vec",
|
||||
"mapping": {
|
||||
"type": "knn_vector",
|
||||
"index": true,
|
||||
"space_type": "cosinesimil",
|
||||
"dimension": 2048
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn_vector": {
|
||||
"match": "*_4096_vec",
|
||||
"mapping": {
|
||||
"type": "knn_vector",
|
||||
"index": true,
|
||||
"space_type": "cosinesimil",
|
||||
"dimension": 4096
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn_vector": {
|
||||
"match": "*_6144_vec",
|
||||
"mapping": {
|
||||
"type": "knn_vector",
|
||||
"index": true,
|
||||
"space_type": "cosinesimil",
|
||||
"dimension": 6144
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn_vector": {
|
||||
"match": "*_8192_vec",
|
||||
"mapping": {
|
||||
"type": "knn_vector",
|
||||
"index": true,
|
||||
"space_type": "cosinesimil",
|
||||
"dimension": 8192
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn_vector": {
|
||||
"match": "*_10240_vec",
|
||||
"mapping": {
|
||||
"type": "knn_vector",
|
||||
"index": true,
|
||||
"space_type": "cosinesimil",
|
||||
"dimension": 10240
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"binary": {
|
||||
"match": "*_bin",
|
||||
|
||||
@@ -1,122 +0,0 @@
|
||||
English | [简体中文](./README_zh.md)
|
||||
|
||||
# *Deep*Doc
|
||||
|
||||
- [1. Introduction](#1)
|
||||
- [2. Vision](#2)
|
||||
- [3. Parser](#3)
|
||||
|
||||
<a name="1"></a>
|
||||
## 1. Introduction
|
||||
|
||||
With a bunch of documents from various domains with various formats and along with diverse retrieval requirements,
|
||||
an accurate analysis becomes a very challenge task. *Deep*Doc is born for that purpose.
|
||||
There are 2 parts in *Deep*Doc so far: vision and parser.
|
||||
You can run the flowing test programs if you're interested in our results of OCR, layout recognition and TSR.
|
||||
```bash
|
||||
python deepdoc/vision/t_ocr.py -h
|
||||
usage: t_ocr.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF
|
||||
--output_dir OUTPUT_DIR
|
||||
Directory where to store the output images. Default: './ocr_outputs'
|
||||
```
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py -h
|
||||
usage: t_recognizer.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] [--threshold THRESHOLD] [--mode {layout,tsr}]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF
|
||||
--output_dir OUTPUT_DIR
|
||||
Directory where to store the output images. Default: './layouts_outputs'
|
||||
--threshold THRESHOLD
|
||||
A threshold to filter out detections. Default: 0.5
|
||||
--mode {layout,tsr} Task mode: layout recognition or table structure recognition
|
||||
```
|
||||
|
||||
Our models are served on HuggingFace. If you have trouble downloading HuggingFace models, this might help!!
|
||||
```bash
|
||||
export HF_ENDPOINT=https://hf-mirror.com
|
||||
```
|
||||
|
||||
<a name="2"></a>
|
||||
## 2. Vision
|
||||
|
||||
We use vision information to resolve problems as human being.
|
||||
- OCR. Since a lot of documents presented as images or at least be able to transform to image,
|
||||
OCR is a very essential and fundamental or even universal solution for text extraction.
|
||||
```bash
|
||||
python deepdoc/vision/t_ocr.py --inputs=path_to_images_or_pdfs --output_dir=path_to_store_result
|
||||
```
|
||||
The inputs could be directory to images or PDF, or a image or PDF.
|
||||
You can look into the folder 'path_to_store_result' where has images which demonstrate the positions of results,
|
||||
txt files which contain the OCR text.
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/f25bee3d-aaf7-4102-baf5-d5208361d110" width="900"/>
|
||||
</div>
|
||||
|
||||
- Layout recognition. Documents from different domain may have various layouts,
|
||||
like, newspaper, magazine, book and résumé are distinct in terms of layout.
|
||||
Only when machine have an accurate layout analysis, it can decide if these text parts are successive or not,
|
||||
or this part needs Table Structure Recognition(TSR) to process, or this part is a figure and described with this caption.
|
||||
We have 10 basic layout components which covers most cases:
|
||||
- Text
|
||||
- Title
|
||||
- Figure
|
||||
- Figure caption
|
||||
- Table
|
||||
- Table caption
|
||||
- Header
|
||||
- Footer
|
||||
- Reference
|
||||
- Equation
|
||||
|
||||
Have a try on the following command to see the layout detection results.
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=layout --output_dir=path_to_store_result
|
||||
```
|
||||
The inputs could be directory to images or PDF, or a image or PDF.
|
||||
You can look into the folder 'path_to_store_result' where has images which demonstrate the detection results as following:
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/07e0f625-9b28-43d0-9fbb-5bf586cd286f" width="1000"/>
|
||||
</div>
|
||||
|
||||
- Table Structure Recognition(TSR). Data table is a frequently used structure to present data including numbers or text.
|
||||
And the structure of a table might be very complex, like hierarchy headers, spanning cells and projected row headers.
|
||||
Along with TSR, we also reassemble the content into sentences which could be well comprehended by LLM.
|
||||
We have five labels for TSR task:
|
||||
- Column
|
||||
- Row
|
||||
- Column header
|
||||
- Projected row header
|
||||
- Spanning cell
|
||||
|
||||
Have a try on the following command to see the layout detection results.
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=tsr --output_dir=path_to_store_result
|
||||
```
|
||||
The inputs could be directory to images or PDF, or a image or PDF.
|
||||
You can look into the folder 'path_to_store_result' where has both images and html pages which demonstrate the detection results as following:
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/cb24e81b-f2ba-49f3-ac09-883d75606f4c" width="1000"/>
|
||||
</div>
|
||||
|
||||
<a name="3"></a>
|
||||
## 3. Parser
|
||||
|
||||
Four kinds of document formats as PDF, DOCX, EXCEL and PPT have their corresponding parser.
|
||||
The most complex one is PDF parser since PDF's flexibility. The output of PDF parser includes:
|
||||
- Text chunks with their own positions in PDF(page number and rectangular positions).
|
||||
- Tables with cropped image from the PDF, and contents which has already translated into natural language sentences.
|
||||
- Figures with caption and text in the figures.
|
||||
|
||||
### Résumé
|
||||
|
||||
The résumé is a very complicated kind of document. A résumé which is composed of unstructured text
|
||||
with various layouts could be resolved into structured data composed of nearly a hundred of fields.
|
||||
We haven't opened the parser yet, as we open the processing method after parsing procedure.
|
||||
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
[English](./README.md) | 简体中文
|
||||
|
||||
# *Deep*Doc
|
||||
|
||||
- [*Deep*Doc](#deepdoc)
|
||||
- [1. 介绍](#1-介绍)
|
||||
- [2. 视觉处理](#2-视觉处理)
|
||||
- [3. 解析器](#3-解析器)
|
||||
- [简历](#简历)
|
||||
|
||||
<a name="1"></a>
|
||||
## 1. 介绍
|
||||
|
||||
对于来自不同领域、具有不同格式和不同检索要求的大量文档,准确的分析成为一项极具挑战性的任务。*Deep*Doc 就是为了这个目的而诞生的。到目前为止,*Deep*Doc 中有两个组成部分:视觉处理和解析器。如果您对我们的OCR、布局识别和TSR结果感兴趣,您可以运行下面的测试程序。
|
||||
|
||||
```bash
|
||||
python deepdoc/vision/t_ocr.py -h
|
||||
usage: t_ocr.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF
|
||||
--output_dir OUTPUT_DIR
|
||||
Directory where to store the output images. Default: './ocr_outputs'
|
||||
```
|
||||
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py -h
|
||||
usage: t_recognizer.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] [--threshold THRESHOLD] [--mode {layout,tsr}]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF
|
||||
--output_dir OUTPUT_DIR
|
||||
Directory where to store the output images. Default: './layouts_outputs'
|
||||
--threshold THRESHOLD
|
||||
A threshold to filter out detections. Default: 0.5
|
||||
--mode {layout,tsr} Task mode: layout recognition or table structure recognition
|
||||
```
|
||||
|
||||
HuggingFace为我们的模型提供服务。如果你在下载HuggingFace模型时遇到问题,这可能会有所帮助!!
|
||||
|
||||
```bash
|
||||
export HF_ENDPOINT=https://hf-mirror.com
|
||||
```
|
||||
|
||||
<a name="2"></a>
|
||||
## 2. 视觉处理
|
||||
|
||||
作为人类,我们使用视觉信息来解决问题。
|
||||
|
||||
- **OCR(Optical Character Recognition,光学字符识别)**。由于许多文档都是以图像形式呈现的,或者至少能够转换为图像,因此OCR是文本提取的一个非常重要、基本,甚至通用的解决方案。
|
||||
|
||||
```bash
|
||||
python deepdoc/vision/t_ocr.py --inputs=path_to_images_or_pdfs --output_dir=path_to_store_result
|
||||
```
|
||||
|
||||
输入可以是图像或PDF的目录,或者单个图像、PDF文件。您可以查看文件夹 `path_to_store_result` ,其中有演示结果位置的图像,以及包含OCR文本的txt文件。
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/f25bee3d-aaf7-4102-baf5-d5208361d110" width="900"/>
|
||||
</div>
|
||||
|
||||
- 布局识别(Layout recognition)。来自不同领域的文件可能有不同的布局,如报纸、杂志、书籍和简历在布局方面是不同的。只有当机器有准确的布局分析时,它才能决定这些文本部分是连续的还是不连续的,或者这个部分需要表结构识别(Table Structure Recognition,TSR)来处理,或者这个部件是一个图形并用这个标题来描述。我们有10个基本布局组件,涵盖了大多数情况:
|
||||
- 文本
|
||||
- 标题
|
||||
- 配图
|
||||
- 配图标题
|
||||
- 表格
|
||||
- 表格标题
|
||||
- 页头
|
||||
- 页尾
|
||||
- 参考引用
|
||||
- 公式
|
||||
|
||||
请尝试以下命令以查看布局检测结果。
|
||||
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=layout --output_dir=path_to_store_result
|
||||
```
|
||||
|
||||
输入可以是图像或PDF的目录,或者单个图像、PDF文件。您可以查看文件夹 `path_to_store_result` ,其中有显示检测结果的图像,如下所示:
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/07e0f625-9b28-43d0-9fbb-5bf586cd286f" width="1000"/>
|
||||
</div>
|
||||
|
||||
- **TSR(Table Structure Recognition,表结构识别)**。数据表是一种常用的结构,用于表示包括数字或文本在内的数据。表的结构可能非常复杂,比如层次结构标题、跨单元格和投影行标题。除了TSR,我们还将内容重新组合成LLM可以很好理解的句子。TSR任务有五个标签:
|
||||
- 列
|
||||
- 行
|
||||
- 列标题
|
||||
- 行标题
|
||||
- 合并单元格
|
||||
|
||||
请尝试以下命令以查看布局检测结果。
|
||||
|
||||
```bash
|
||||
python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=tsr --output_dir=path_to_store_result
|
||||
```
|
||||
|
||||
输入可以是图像或PDF的目录,或者单个图像、PDF文件。您可以查看文件夹 `path_to_store_result` ,其中包含图像和html页面,这些页面展示了以下检测结果:
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/cb24e81b-f2ba-49f3-ac09-883d75606f4c" width="1000"/>
|
||||
</div>
|
||||
|
||||
<a name="3"></a>
|
||||
## 3. 解析器
|
||||
|
||||
PDF、DOCX、EXCEL和PPT四种文档格式都有相应的解析器。最复杂的是PDF解析器,因为PDF具有灵活性。PDF解析器的输出包括:
|
||||
- 在PDF中有自己位置的文本块(页码和矩形位置)。
|
||||
- 带有PDF裁剪图像的表格,以及已经翻译成自然语言句子的内容。
|
||||
- 图中带标题和文字的图。
|
||||
|
||||
### 简历
|
||||
|
||||
简历是一种非常复杂的文档。由各种格式的非结构化文本构成的简历可以被解析为包含近百个字段的结构化数据。我们还没有启用解析器,因为在解析过程之后才会启动处理方法。
|
||||
@@ -1,18 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
@@ -1,40 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from .docx_parser import RAGFlowDocxParser as DocxParser
|
||||
from .excel_parser import RAGFlowExcelParser as ExcelParser
|
||||
from .html_parser import RAGFlowHtmlParser as HtmlParser
|
||||
from .json_parser import RAGFlowJsonParser as JsonParser
|
||||
from .markdown_parser import MarkdownElementExtractor
|
||||
from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
|
||||
from .pdf_parser import PlainParser
|
||||
from .pdf_parser import RAGFlowPdfParser as PdfParser
|
||||
from .ppt_parser import RAGFlowPptParser as PptParser
|
||||
from .txt_parser import RAGFlowTxtParser as TxtParser
|
||||
|
||||
__all__ = [
|
||||
"PdfParser",
|
||||
"PlainParser",
|
||||
"DocxParser",
|
||||
"ExcelParser",
|
||||
"PptParser",
|
||||
"HtmlParser",
|
||||
"JsonParser",
|
||||
"MarkdownParser",
|
||||
"TxtParser",
|
||||
"MarkdownElementExtractor",
|
||||
]
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from docx import Document
|
||||
import re
|
||||
import pandas as pd
|
||||
from collections import Counter
|
||||
from rag.nlp import rag_tokenizer
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class RAGFlowDocxParser:
|
||||
|
||||
def __extract_table_content(self, tb):
|
||||
df = []
|
||||
for row in tb.rows:
|
||||
df.append([c.text for c in row.cells])
|
||||
return self.__compose_table_content(pd.DataFrame(df))
|
||||
|
||||
def __compose_table_content(self, df):
|
||||
|
||||
def blockType(b):
|
||||
pattern = [
|
||||
("^(20|19)[0-9]{2}[年/-][0-9]{1,2}[月/-][0-9]{1,2}日*$", "Dt"),
|
||||
(r"^(20|19)[0-9]{2}年$", "Dt"),
|
||||
(r"^(20|19)[0-9]{2}[年/-][0-9]{1,2}月*$", "Dt"),
|
||||
("^[0-9]{1,2}[月/-][0-9]{1,2}日*$", "Dt"),
|
||||
(r"^第*[一二三四1-4]季度$", "Dt"),
|
||||
(r"^(20|19)[0-9]{2}年*[一二三四1-4]季度$", "Dt"),
|
||||
(r"^(20|19)[0-9]{2}[ABCDE]$", "DT"),
|
||||
("^[0-9.,+%/ -]+$", "Nu"),
|
||||
(r"^[0-9A-Z/\._~-]+$", "Ca"),
|
||||
(r"^[A-Z]*[a-z' -]+$", "En"),
|
||||
(r"^[0-9.,+-]+[0-9A-Za-z/$¥%<>()()' -]+$", "NE"),
|
||||
(r"^.{1}$", "Sg")
|
||||
]
|
||||
for p, n in pattern:
|
||||
if re.search(p, b):
|
||||
return n
|
||||
tks = [t for t in rag_tokenizer.tokenize(b).split() if len(t) > 1]
|
||||
if len(tks) > 3:
|
||||
if len(tks) < 12:
|
||||
return "Tx"
|
||||
else:
|
||||
return "Lx"
|
||||
|
||||
if len(tks) == 1 and rag_tokenizer.tag(tks[0]) == "nr":
|
||||
return "Nr"
|
||||
|
||||
return "Ot"
|
||||
|
||||
if len(df) < 2:
|
||||
return []
|
||||
max_type = Counter([blockType(str(df.iloc[i, j])) for i in range(
|
||||
1, len(df)) for j in range(len(df.iloc[i, :]))])
|
||||
max_type = max(max_type.items(), key=lambda x: x[1])[0]
|
||||
|
||||
colnm = len(df.iloc[0, :])
|
||||
hdrows = [0] # header is not necessarily appear in the first line
|
||||
if max_type == "Nu":
|
||||
for r in range(1, len(df)):
|
||||
tys = Counter([blockType(str(df.iloc[r, j]))
|
||||
for j in range(len(df.iloc[r, :]))])
|
||||
tys = max(tys.items(), key=lambda x: x[1])[0]
|
||||
if tys != max_type:
|
||||
hdrows.append(r)
|
||||
|
||||
lines = []
|
||||
for i in range(1, len(df)):
|
||||
if i in hdrows:
|
||||
continue
|
||||
hr = [r - i for r in hdrows]
|
||||
hr = [r for r in hr if r < 0]
|
||||
t = len(hr) - 1
|
||||
while t > 0:
|
||||
if hr[t] - hr[t - 1] > 1:
|
||||
hr = hr[t:]
|
||||
break
|
||||
t -= 1
|
||||
headers = []
|
||||
for j in range(len(df.iloc[i, :])):
|
||||
t = []
|
||||
for h in hr:
|
||||
x = str(df.iloc[i + h, j]).strip()
|
||||
if x in t:
|
||||
continue
|
||||
t.append(x)
|
||||
t = ",".join(t)
|
||||
if t:
|
||||
t += ": "
|
||||
headers.append(t)
|
||||
cells = []
|
||||
for j in range(len(df.iloc[i, :])):
|
||||
if not str(df.iloc[i, j]):
|
||||
continue
|
||||
cells.append(headers[j] + str(df.iloc[i, j]))
|
||||
lines.append(";".join(cells))
|
||||
|
||||
if colnm > 3:
|
||||
return lines
|
||||
return ["\n".join(lines)]
|
||||
|
||||
def __call__(self, fnm, from_page=0, to_page=100000000):
|
||||
self.doc = Document(fnm) if isinstance(
|
||||
fnm, str) else Document(BytesIO(fnm))
|
||||
pn = 0 # parsed page
|
||||
secs = [] # parsed contents
|
||||
for p in self.doc.paragraphs:
|
||||
if pn > to_page:
|
||||
break
|
||||
|
||||
runs_within_single_paragraph = [] # save runs within the range of pages
|
||||
for run in p.runs:
|
||||
if pn > to_page:
|
||||
break
|
||||
if from_page <= pn < to_page and p.text.strip():
|
||||
runs_within_single_paragraph.append(run.text) # append run.text first
|
||||
|
||||
# wrap page break checker into a static method
|
||||
if 'lastRenderedPageBreak' in run._element.xml:
|
||||
pn += 1
|
||||
|
||||
secs.append(("".join(runs_within_single_paragraph), p.style.name if hasattr(p.style, 'name') else '')) # then concat run.text as part of the paragraph
|
||||
|
||||
tbls = [self.__extract_table_content(tb) for tb in self.doc.tables]
|
||||
return secs, tbls
|
||||
@@ -1,189 +0,0 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from io import BytesIO
|
||||
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook, load_workbook
|
||||
|
||||
from rag.nlp import find_codec
|
||||
|
||||
# copied from `/openpyxl/cell/cell.py`
|
||||
ILLEGAL_CHARACTERS_RE = re.compile(r"[\000-\010]|[\013-\014]|[\016-\037]")
|
||||
|
||||
|
||||
class RAGFlowExcelParser:
|
||||
@staticmethod
|
||||
def _load_excel_to_workbook(file_like_object):
|
||||
if isinstance(file_like_object, bytes):
|
||||
file_like_object = BytesIO(file_like_object)
|
||||
|
||||
# Read first 4 bytes to determine file type
|
||||
file_like_object.seek(0)
|
||||
file_head = file_like_object.read(4)
|
||||
file_like_object.seek(0)
|
||||
|
||||
if not (file_head.startswith(b"PK\x03\x04") or file_head.startswith(b"\xd0\xcf\x11\xe0")):
|
||||
logging.info("Not an Excel file, converting CSV to Excel Workbook")
|
||||
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
df = pd.read_csv(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
|
||||
except Exception as e_csv:
|
||||
raise Exception(f"Failed to parse CSV and convert to Excel Workbook: {e_csv}")
|
||||
|
||||
try:
|
||||
return load_workbook(file_like_object, data_only=True)
|
||||
except Exception as e:
|
||||
logging.info(f"openpyxl load error: {e}, try pandas instead")
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
try:
|
||||
df = pd.read_excel(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
except Exception as ex:
|
||||
logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
|
||||
file_like_object.seek(0)
|
||||
df = pd.read_excel(file_like_object, engine="calamine")
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
except Exception as e_pandas:
|
||||
raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _clean_dataframe(df: pd.DataFrame):
|
||||
def clean_string(s):
|
||||
if isinstance(s, str):
|
||||
return ILLEGAL_CHARACTERS_RE.sub(" ", s)
|
||||
return s
|
||||
|
||||
return df.apply(lambda col: col.map(clean_string))
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Data"
|
||||
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
return wb
|
||||
|
||||
def html(self, fnm, chunk_rows=256):
|
||||
from html import escape
|
||||
|
||||
file_like_object = BytesIO(fnm) if not isinstance(fnm, str) else fnm
|
||||
wb = RAGFlowExcelParser._load_excel_to_workbook(file_like_object)
|
||||
tb_chunks = []
|
||||
|
||||
def _fmt(v):
|
||||
if v is None:
|
||||
return ""
|
||||
return str(v).strip()
|
||||
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
rows = list(ws.rows)
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
tb_rows_0 = "<tr>"
|
||||
for t in list(rows[0]):
|
||||
tb_rows_0 += f"<th>{escape(_fmt(t.value))}</th>"
|
||||
tb_rows_0 += "</tr>"
|
||||
|
||||
for chunk_i in range((len(rows) - 1) // chunk_rows + 1):
|
||||
tb = ""
|
||||
tb += f"<table><caption>{sheetname}</caption>"
|
||||
tb += tb_rows_0
|
||||
for r in list(rows[1 + chunk_i * chunk_rows : min(1 + (chunk_i + 1) * chunk_rows, len(rows))]):
|
||||
tb += "<tr>"
|
||||
for i, c in enumerate(r):
|
||||
if c.value is None:
|
||||
tb += "<td></td>"
|
||||
else:
|
||||
tb += f"<td>{escape(_fmt(c.value))}</td>"
|
||||
tb += "</tr>"
|
||||
tb += "</table>\n"
|
||||
tb_chunks.append(tb)
|
||||
|
||||
return tb_chunks
|
||||
|
||||
def markdown(self, fnm):
|
||||
import pandas as pd
|
||||
|
||||
file_like_object = BytesIO(fnm) if not isinstance(fnm, str) else fnm
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
df = pd.read_excel(file_like_object)
|
||||
except Exception as e:
|
||||
logging.warning(f"Parse spreadsheet error: {e}, trying to interpret as CSV file")
|
||||
file_like_object.seek(0)
|
||||
df = pd.read_csv(file_like_object)
|
||||
df = df.replace(r"^\s*$", "", regex=True)
|
||||
return df.to_markdown(index=False)
|
||||
|
||||
def __call__(self, fnm):
|
||||
file_like_object = BytesIO(fnm) if not isinstance(fnm, str) else fnm
|
||||
wb = RAGFlowExcelParser._load_excel_to_workbook(file_like_object)
|
||||
|
||||
res = []
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
rows = list(ws.rows)
|
||||
if not rows:
|
||||
continue
|
||||
ti = list(rows[0])
|
||||
for r in list(rows[1:]):
|
||||
fields = []
|
||||
for i, c in enumerate(r):
|
||||
if not c.value:
|
||||
continue
|
||||
t = str(ti[i].value) if i < len(ti) else ""
|
||||
t += (":" if t else "") + str(c.value)
|
||||
fields.append(t)
|
||||
line = "; ".join(fields)
|
||||
if sheetname.lower().find("sheet") < 0:
|
||||
line += " ——" + sheetname
|
||||
res.append(line)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def row_number(fnm, binary):
|
||||
if fnm.split(".")[-1].lower().find("xls") >= 0:
|
||||
wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
|
||||
total = 0
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
total += len(list(ws.rows))
|
||||
return total
|
||||
|
||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||
encoding = find_codec(binary)
|
||||
txt = binary.decode(encoding, errors="ignore")
|
||||
return len(txt.split("\n"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
psr = RAGFlowExcelParser()
|
||||
psr(sys.argv[1])
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user