将flask改成fastapi

This commit is contained in:
2025-10-13 13:18:03 +08:00
commit 88db2539b0
476 changed files with 739741 additions and 0 deletions

128
api/apps/sdk/agent.py Normal file
View File

@@ -0,0 +1,128 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import time
from typing import Any, cast
from api.db.services.canvas_service import UserCanvasService
from api.db.services.user_canvas_version import UserCanvasVersionService
from api.settings import RetCode
from api.utils import get_uuid
from api.utils.api_utils import get_data_error_result, get_error_data_result, get_json_result, token_required
from api.utils.api_utils import get_result
from flask import request
@manager.route('/agents', methods=['GET']) # noqa: F821
@token_required
def list_agents(tenant_id):
id = request.args.get("id")
title = request.args.get("title")
if id or title:
canvas = UserCanvasService.query(id=id, title=title, user_id=tenant_id)
if not canvas:
return get_error_data_result("The agent doesn't exist.")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 30))
orderby = request.args.get("orderby", "update_time")
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
desc = False
else:
desc = True
canvas = UserCanvasService.get_list(tenant_id,page_number,items_per_page,orderby,desc,id,title)
return get_result(data=canvas)
@manager.route("/agents", methods=["POST"]) # noqa: F821
@token_required
def create_agent(tenant_id: str):
req: dict[str, Any] = cast(dict[str, Any], request.json)
req["user_id"] = tenant_id
if req.get("dsl") is not None:
if not isinstance(req["dsl"], str):
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
req["dsl"] = json.loads(req["dsl"])
else:
return get_json_result(data=False, message="No DSL data in request.", code=RetCode.ARGUMENT_ERROR)
if req.get("title") is not None:
req["title"] = req["title"].strip()
else:
return get_json_result(data=False, message="No title in request.", code=RetCode.ARGUMENT_ERROR)
if UserCanvasService.query(user_id=tenant_id, title=req["title"]):
return get_data_error_result(message=f"Agent with title {req['title']} already exists.")
agent_id = get_uuid()
req["id"] = agent_id
if not UserCanvasService.save(**req):
return get_data_error_result(message="Fail to create agent.")
UserCanvasVersionService.insert(
user_canvas_id=agent_id,
title="{0}_{1}".format(req["title"], time.strftime("%Y_%m_%d_%H_%M_%S")),
dsl=req["dsl"]
)
return get_json_result(data=True)
@manager.route("/agents/<agent_id>", methods=["PUT"]) # noqa: F821
@token_required
def update_agent(tenant_id: str, agent_id: str):
req: dict[str, Any] = {k: v for k, v in cast(dict[str, Any], request.json).items() if v is not None}
req["user_id"] = tenant_id
if req.get("dsl") is not None:
if not isinstance(req["dsl"], str):
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
req["dsl"] = json.loads(req["dsl"])
if req.get("title") is not None:
req["title"] = req["title"].strip()
if not UserCanvasService.query(user_id=tenant_id, id=agent_id):
return get_json_result(
data=False, message="Only owner of canvas authorized for this operation.",
code=RetCode.OPERATING_ERROR)
UserCanvasService.update_by_id(agent_id, req)
if req.get("dsl") is not None:
UserCanvasVersionService.insert(
user_canvas_id=agent_id,
title="{0}_{1}".format(req["title"], time.strftime("%Y_%m_%d_%H_%M_%S")),
dsl=req["dsl"]
)
UserCanvasVersionService.delete_all_versions(agent_id)
return get_json_result(data=True)
@manager.route("/agents/<agent_id>", methods=["DELETE"]) # noqa: F821
@token_required
def delete_agent(tenant_id: str, agent_id: str):
if not UserCanvasService.query(user_id=tenant_id, id=agent_id):
return get_json_result(
data=False, message="Only owner of canvas authorized for this operation.",
code=RetCode.OPERATING_ERROR)
UserCanvasService.delete_by_id(agent_id)
return get_json_result(data=True)

325
api/apps/sdk/chat.py Normal file
View File

@@ -0,0 +1,325 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from flask import request
from api import settings
from api.db import StatusEnum
from api.db.services.dialog_service import DialogService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.user_service import TenantService
from api.utils import get_uuid
from api.utils.api_utils import check_duplicate_ids, get_error_data_result, get_result, token_required
@manager.route("/chats", methods=["POST"]) # noqa: F821
@token_required
def create(tenant_id):
req = request.json
ids = [i for i in req.get("dataset_ids", []) if i]
for kb_id in ids:
kbs = KnowledgebaseService.accessible(kb_id=kb_id, user_id=tenant_id)
if not kbs:
return get_error_data_result(f"You don't own the dataset {kb_id}")
kbs = KnowledgebaseService.query(id=kb_id)
kb = kbs[0]
if kb.chunk_num == 0:
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
kbs = KnowledgebaseService.get_by_ids(ids) if ids else []
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
embd_count = list(set(embd_ids))
if len(embd_count) > 1:
return get_result(message='Datasets use different embedding models."', code=settings.RetCode.AUTHENTICATION_ERROR)
req["kb_ids"] = ids
# llm
llm = req.get("llm")
if llm:
if "model_name" in llm:
req["llm_id"] = llm.pop("model_name")
if req.get("llm_id") is not None:
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(req["llm_id"])
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=llm_name, llm_factory=llm_factory, model_type="chat"):
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
req["llm_setting"] = req.pop("llm")
e, tenant = TenantService.get_by_id(tenant_id)
if not e:
return get_error_data_result(message="Tenant not found!")
# prompt
prompt = req.get("prompt")
key_mapping = {"parameters": "variables", "prologue": "opener", "quote": "show_quote", "system": "prompt", "rerank_id": "rerank_model", "vector_similarity_weight": "keywords_similarity_weight"}
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id", "top_k"]
if prompt:
for new_key, old_key in key_mapping.items():
if old_key in prompt:
prompt[new_key] = prompt.pop(old_key)
for key in key_list:
if key in prompt:
req[key] = prompt.pop(key)
req["prompt_config"] = req.pop("prompt")
# init
req["id"] = get_uuid()
req["description"] = req.get("description", "A helpful Assistant")
req["icon"] = req.get("avatar", "")
req["top_n"] = req.get("top_n", 6)
req["top_k"] = req.get("top_k", 1024)
req["rerank_id"] = req.get("rerank_id", "")
if req.get("rerank_id"):
value_rerank_model = ["BAAI/bge-reranker-v2-m3", "maidalun1020/bce-reranker-base_v1"]
if req["rerank_id"] not in value_rerank_model and not TenantLLMService.query(tenant_id=tenant_id, llm_name=req.get("rerank_id"), model_type="rerank"):
return get_error_data_result(f"`rerank_model` {req.get('rerank_id')} doesn't exist")
if not req.get("llm_id"):
req["llm_id"] = tenant.llm_id
if not req.get("name"):
return get_error_data_result(message="`name` is required.")
if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_data_result(message="Duplicated chat name in creating chat.")
# tenant_id
if req.get("tenant_id"):
return get_error_data_result(message="`tenant_id` must not be provided.")
req["tenant_id"] = tenant_id
# prompt more parameter
default_prompt = {
"system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
Here is the knowledge base:
{knowledge}
The above is the knowledge base.""",
"prologue": "Hi! I'm your assistant. What can I do for you?",
"parameters": [{"key": "knowledge", "optional": False}],
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
"quote": True,
"tts": False,
"refine_multiturn": True,
}
key_list_2 = ["system", "prologue", "parameters", "empty_response", "quote", "tts", "refine_multiturn"]
if "prompt_config" not in req:
req["prompt_config"] = {}
for key in key_list_2:
temp = req["prompt_config"].get(key)
if (not temp and key == "system") or (key not in req["prompt_config"]):
req["prompt_config"][key] = default_prompt[key]
for p in req["prompt_config"]["parameters"]:
if p["optional"]:
continue
if req["prompt_config"]["system"].find("{%s}" % p["key"]) < 0:
return get_error_data_result(message="Parameter '{}' is not used".format(p["key"]))
# save
if not DialogService.save(**req):
return get_error_data_result(message="Fail to new a chat!")
# response
e, res = DialogService.get_by_id(req["id"])
if not e:
return get_error_data_result(message="Fail to new a chat!")
res = res.to_json()
renamed_dict = {}
for key, value in res["prompt_config"].items():
new_key = key_mapping.get(key, key)
renamed_dict[new_key] = value
res["prompt"] = renamed_dict
del res["prompt_config"]
new_dict = {"similarity_threshold": res["similarity_threshold"], "keywords_similarity_weight": 1 - res["vector_similarity_weight"], "top_n": res["top_n"], "rerank_model": res["rerank_id"]}
res["prompt"].update(new_dict)
for key in key_list:
del res[key]
res["llm"] = res.pop("llm_setting")
res["llm"]["model_name"] = res.pop("llm_id")
del res["kb_ids"]
res["dataset_ids"] = req.get("dataset_ids", [])
res["avatar"] = res.pop("icon")
return get_result(data=res)
@manager.route("/chats/<chat_id>", methods=["PUT"]) # noqa: F821
@token_required
def update(tenant_id, chat_id):
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
return get_error_data_result(message="You do not own the chat")
req = request.json
ids = req.get("dataset_ids", [])
if "show_quotation" in req:
req["do_refer"] = req.pop("show_quotation")
if ids:
for kb_id in ids:
kbs = KnowledgebaseService.accessible(kb_id=kb_id, user_id=tenant_id)
if not kbs:
return get_error_data_result(f"You don't own the dataset {kb_id}")
kbs = KnowledgebaseService.query(id=kb_id)
kb = kbs[0]
if kb.chunk_num == 0:
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
kbs = KnowledgebaseService.get_by_ids(ids)
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
embd_count = list(set(embd_ids))
if len(embd_count) > 1:
return get_result(message='Datasets use different embedding models."', code=settings.RetCode.AUTHENTICATION_ERROR)
req["kb_ids"] = ids
llm = req.get("llm")
if llm:
if "model_name" in llm:
req["llm_id"] = llm.pop("model_name")
if req.get("llm_id") is not None:
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(req["llm_id"])
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=llm_name, llm_factory=llm_factory, model_type="chat"):
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
req["llm_setting"] = req.pop("llm")
e, tenant = TenantService.get_by_id(tenant_id)
if not e:
return get_error_data_result(message="Tenant not found!")
# prompt
prompt = req.get("prompt")
key_mapping = {"parameters": "variables", "prologue": "opener", "quote": "show_quote", "system": "prompt", "rerank_id": "rerank_model", "vector_similarity_weight": "keywords_similarity_weight"}
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id", "top_k"]
if prompt:
for new_key, old_key in key_mapping.items():
if old_key in prompt:
prompt[new_key] = prompt.pop(old_key)
for key in key_list:
if key in prompt:
req[key] = prompt.pop(key)
req["prompt_config"] = req.pop("prompt")
e, res = DialogService.get_by_id(chat_id)
res = res.to_json()
if req.get("rerank_id"):
value_rerank_model = ["BAAI/bge-reranker-v2-m3", "maidalun1020/bce-reranker-base_v1"]
if req["rerank_id"] not in value_rerank_model and not TenantLLMService.query(tenant_id=tenant_id, llm_name=req.get("rerank_id"), model_type="rerank"):
return get_error_data_result(f"`rerank_model` {req.get('rerank_id')} doesn't exist")
if "name" in req:
if not req.get("name"):
return get_error_data_result(message="`name` cannot be empty.")
if req["name"].lower() != res["name"].lower() and len(DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
return get_error_data_result(message="Duplicated chat name in updating chat.")
if "prompt_config" in req:
res["prompt_config"].update(req["prompt_config"])
for p in res["prompt_config"]["parameters"]:
if p["optional"]:
continue
if res["prompt_config"]["system"].find("{%s}" % p["key"]) < 0:
return get_error_data_result(message="Parameter '{}' is not used".format(p["key"]))
if "llm_setting" in req:
res["llm_setting"].update(req["llm_setting"])
req["prompt_config"] = res["prompt_config"]
req["llm_setting"] = res["llm_setting"]
# avatar
if "avatar" in req:
req["icon"] = req.pop("avatar")
if "dataset_ids" in req:
req.pop("dataset_ids")
if not DialogService.update_by_id(chat_id, req):
return get_error_data_result(message="Chat not found!")
return get_result()
@manager.route("/chats", methods=["DELETE"]) # noqa: F821
@token_required
def delete(tenant_id):
errors = []
success_count = 0
req = request.json
if not req:
ids = None
else:
ids = req.get("ids")
if not ids:
id_list = []
dias = DialogService.query(tenant_id=tenant_id, status=StatusEnum.VALID.value)
for dia in dias:
id_list.append(dia.id)
else:
id_list = ids
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "assistant")
for id in unique_id_list:
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
errors.append(f"Assistant({id}) not found.")
continue
temp_dict = {"status": StatusEnum.INVALID.value}
DialogService.update_by_id(id, temp_dict)
success_count += 1
if errors:
if success_count > 0:
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} chats with {len(errors)} errors")
else:
return get_error_data_result(message="; ".join(errors))
if duplicate_messages:
if success_count > 0:
return get_result(message=f"Partially deleted {success_count} chats with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
else:
return get_error_data_result(message=";".join(duplicate_messages))
return get_result()
@manager.route("/chats", methods=["GET"]) # noqa: F821
@token_required
def list_chat(tenant_id):
id = request.args.get("id")
name = request.args.get("name")
if id or name:
chat = DialogService.query(id=id, name=name, status=StatusEnum.VALID.value, tenant_id=tenant_id)
if not chat:
return get_error_data_result(message="The chat doesn't exist")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 30))
orderby = request.args.get("orderby", "create_time")
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
desc = False
else:
desc = True
chats = DialogService.get_list(tenant_id, page_number, items_per_page, orderby, desc, id, name)
if not chats:
return get_result(data=[])
list_assts = []
key_mapping = {
"parameters": "variables",
"prologue": "opener",
"quote": "show_quote",
"system": "prompt",
"rerank_id": "rerank_model",
"vector_similarity_weight": "keywords_similarity_weight",
"do_refer": "show_quotation",
}
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
for res in chats:
renamed_dict = {}
for key, value in res["prompt_config"].items():
new_key = key_mapping.get(key, key)
renamed_dict[new_key] = value
res["prompt"] = renamed_dict
del res["prompt_config"]
new_dict = {"similarity_threshold": res["similarity_threshold"], "keywords_similarity_weight": 1 - res["vector_similarity_weight"], "top_n": res["top_n"], "rerank_model": res["rerank_id"]}
res["prompt"].update(new_dict)
for key in key_list:
del res[key]
res["llm"] = res.pop("llm_setting")
res["llm"]["model_name"] = res.pop("llm_id")
kb_list = []
for kb_id in res["kb_ids"]:
kb = KnowledgebaseService.query(id=kb_id)
if not kb:
logging.warning(f"The kb {kb_id} does not exist.")
continue
kb_list.append(kb[0].to_json())
del res["kb_ids"]
res["datasets"] = kb_list
res["avatar"] = res.pop("icon")
list_assts.append(res)
return get_result(data=list_assts)

527
api/apps/sdk/dataset.py Normal file
View File

@@ -0,0 +1,527 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
import json
from flask import request
from peewee import OperationalError
from api import settings
from api.db import FileSource, StatusEnum
from api.db.db_models import File
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import TenantService
from api.utils import get_uuid
from api.utils.api_utils import (
deep_merge,
get_error_argument_result,
get_error_data_result,
get_error_operating_result,
get_error_permission_result,
get_parser_config,
get_result,
remap_dictionary_keys,
token_required,
verify_embedding_availability,
)
from api.utils.validation_utils import (
CreateDatasetReq,
DeleteDatasetReq,
ListDatasetReq,
UpdateDatasetReq,
validate_and_parse_json_request,
validate_and_parse_request_args,
)
from rag.nlp import search
from rag.settings import PAGERANK_FLD
@manager.route("/datasets", methods=["POST"]) # noqa: F821
@token_required
def create(tenant_id):
"""
Create a new dataset.
---
tags:
- Datasets
security:
- ApiKeyAuth: []
parameters:
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Dataset creation parameters.
required: true
schema:
type: object
required:
- name
properties:
name:
type: string
description: Name of the dataset.
avatar:
type: string
description: Base64 encoding of the avatar.
description:
type: string
description: Description of the dataset.
embedding_model:
type: string
description: Embedding model Name.
permission:
type: string
enum: ['me', 'team']
description: Dataset permission.
chunk_method:
type: string
enum: ["naive", "book", "email", "laws", "manual", "one", "paper",
"picture", "presentation", "qa", "table", "tag"
]
description: Chunking method.
parser_config:
type: object
description: Parser configuration.
responses:
200:
description: Successful operation.
schema:
type: object
properties:
data:
type: object
"""
# Field name transformations during model dump:
# | Original | Dump Output |
# |----------------|-------------|
# | embedding_model| embd_id |
# | chunk_method | parser_id |
req, err = validate_and_parse_json_request(request, CreateDatasetReq)
if err is not None:
return get_error_argument_result(err)
try:
if KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_operating_result(message=f"Dataset name '{req['name']}' already exists")
req["parser_config"] = get_parser_config(req["parser_id"], req["parser_config"])
req["id"] = get_uuid()
req["tenant_id"] = tenant_id
req["created_by"] = tenant_id
ok, t = TenantService.get_by_id(tenant_id)
if not ok:
return get_error_permission_result(message="Tenant not found")
if not req.get("embd_id"):
req["embd_id"] = t.embd_id
else:
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
if not ok:
return err
if not KnowledgebaseService.save(**req):
return get_error_data_result(message="Create dataset error.(Database error)")
ok, k = KnowledgebaseService.get_by_id(req["id"])
if not ok:
return get_error_data_result(message="Dataset created failed")
response_data = remap_dictionary_keys(k.to_dict())
return get_result(data=response_data)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@manager.route("/datasets", methods=["DELETE"]) # noqa: F821
@token_required
def delete(tenant_id):
"""
Delete datasets.
---
tags:
- Datasets
security:
- ApiKeyAuth: []
parameters:
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Dataset deletion parameters.
required: true
schema:
type: object
required:
- ids
properties:
ids:
type: array or null
items:
type: string
description: |
Specifies the datasets to delete:
- If `null`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.
responses:
200:
description: Successful operation.
schema:
type: object
"""
req, err = validate_and_parse_json_request(request, DeleteDatasetReq)
if err is not None:
return get_error_argument_result(err)
try:
kb_id_instance_pairs = []
if req["ids"] is None:
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
for kb in kbs:
kb_id_instance_pairs.append((kb.id, kb))
else:
error_kb_ids = []
for kb_id in req["ids"]:
kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id)
if kb is None:
error_kb_ids.append(kb_id)
continue
kb_id_instance_pairs.append((kb_id, kb))
if len(error_kb_ids) > 0:
return get_error_permission_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
errors = []
success_count = 0
for kb_id, kb in kb_id_instance_pairs:
for doc in DocumentService.query(kb_id=kb_id):
if not DocumentService.remove_document(doc, tenant_id):
errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'")
continue
f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete(
[
File.source_type == FileSource.KNOWLEDGEBASE,
File.id == f2d[0].file_id,
]
)
File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
if not KnowledgebaseService.delete_by_id(kb_id):
errors.append(f"Delete dataset error for {kb_id}")
continue
success_count += 1
if not errors:
return get_result()
error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..."
if success_count == 0:
return get_error_data_result(message=error_message)
return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821
@token_required
def update(tenant_id, dataset_id):
"""
Update a dataset.
---
tags:
- Datasets
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset to update.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Dataset update parameters.
required: true
schema:
type: object
properties:
name:
type: string
description: New name of the dataset.
avatar:
type: string
description: Updated base64 encoding of the avatar.
description:
type: string
description: Updated description of the dataset.
embedding_model:
type: string
description: Updated embedding model Name.
permission:
type: string
enum: ['me', 'team']
description: Updated dataset permission.
chunk_method:
type: string
enum: ["naive", "book", "email", "laws", "manual", "one", "paper",
"picture", "presentation", "qa", "table", "tag"
]
description: Updated chunking method.
pagerank:
type: integer
description: Updated page rank.
parser_config:
type: object
description: Updated parser configuration.
responses:
200:
description: Successful operation.
schema:
type: object
"""
# Field name transformations during model dump:
# | Original | Dump Output |
# |----------------|-------------|
# | embedding_model| embd_id |
# | chunk_method | parser_id |
extras = {"dataset_id": dataset_id}
req, err = validate_and_parse_json_request(request, UpdateDatasetReq, extras=extras, exclude_unset=True)
if err is not None:
return get_error_argument_result(err)
if not req:
return get_error_argument_result(message="No properties were modified")
try:
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
if kb is None:
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
if req.get("parser_config"):
req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"])
if (chunk_method := req.get("parser_id")) and chunk_method != kb.parser_id:
if not req.get("parser_config"):
req["parser_config"] = get_parser_config(chunk_method, None)
elif "parser_config" in req and not req["parser_config"]:
del req["parser_config"]
if "name" in req and req["name"].lower() != kb.name.lower():
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)
if exists:
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
if "embd_id" in req:
if not req["embd_id"]:
req["embd_id"] = kb.embd_id
if kb.chunk_num != 0 and req["embd_id"] != kb.embd_id:
return get_error_data_result(message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
if not ok:
return err
if "pagerank" in req and req["pagerank"] != kb.pagerank:
if os.environ.get("DOC_ENGINE", "elasticsearch") == "infinity":
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
if req["pagerank"] > 0:
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
else:
# Elasticsearch requires PAGERANK_FLD be non-zero!
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
if not KnowledgebaseService.update_by_id(kb.id, req):
return get_error_data_result(message="Update dataset error.(Database error)")
ok, k = KnowledgebaseService.get_by_id(kb.id)
if not ok:
return get_error_data_result(message="Dataset created failed")
response_data = remap_dictionary_keys(k.to_dict())
return get_result(data=response_data)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@manager.route("/datasets", methods=["GET"]) # noqa: F821
@token_required
def list_datasets(tenant_id):
"""
List datasets.
---
tags:
- Datasets
security:
- ApiKeyAuth: []
parameters:
- in: query
name: id
type: string
required: false
description: Dataset ID to filter.
- in: query
name: name
type: string
required: false
description: Dataset name to filter.
- in: query
name: page
type: integer
required: false
default: 1
description: Page number.
- in: query
name: page_size
type: integer
required: false
default: 30
description: Number of items per page.
- in: query
name: orderby
type: string
required: false
default: "create_time"
description: Field to order by.
- in: query
name: desc
type: boolean
required: false
default: true
description: Order in descending.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Successful operation.
schema:
type: array
items:
type: object
"""
args, err = validate_and_parse_request_args(request, ListDatasetReq)
if err is not None:
return get_error_argument_result(err)
try:
kb_id = request.args.get("id")
name = args.get("name")
if kb_id:
kbs = KnowledgebaseService.get_kb_by_id(kb_id, tenant_id)
if not kbs:
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{kb_id}'")
if name:
kbs = KnowledgebaseService.get_kb_by_name(name, tenant_id)
if not kbs:
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
kbs = KnowledgebaseService.get_list(
[m["tenant_id"] for m in tenants],
tenant_id,
args["page"],
args["page_size"],
args["orderby"],
args["desc"],
kb_id,
name,
)
response_data_list = []
for kb in kbs:
response_data_list.append(remap_dictionary_keys(kb))
return get_result(data=response_data_list)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
@token_required
def knowledge_graph(tenant_id,dataset_id):
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
return get_result(
data=False,
message='No authorization.',
code=settings.RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(dataset_id)
req = {
"kb_id": [dataset_id],
"knowledge_graph_kwd": ["graph"]
}
obj = {"graph": {}, "mind_map": {}}
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
return get_result(data=obj)
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id])
if not len(sres.ids):
return get_result(data=obj)
for id in sres.ids[:1]:
ty = sres.field[id]["knowledge_graph_kwd"]
try:
content_json = json.loads(sres.field[id]["content_with_weight"])
except Exception:
continue
obj[ty] = content_json
if "nodes" in obj["graph"]:
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
if "edges" in obj["graph"]:
node_id_set = { o["id"] for o in obj["graph"]["nodes"] }
filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
return get_result(data=obj)
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
@token_required
def delete_knowledge_graph(tenant_id,dataset_id):
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
return get_result(
data=False,
message='No authorization.',
code=settings.RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(dataset_id)
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id)
return get_result(data=True)

View File

@@ -0,0 +1,104 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from flask import request, jsonify
from api.db import LLMType
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api import settings
from api.utils.api_utils import validate_request, build_error_result, apikey_required
from rag.app.tag import label_question
from api.db.services.dialog_service import meta_filter, convert_conditions
@manager.route('/dify/retrieval', methods=['POST']) # noqa: F821
@apikey_required
@validate_request("knowledge_id", "query")
def retrieval(tenant_id):
req = request.json
question = req["query"]
kb_id = req["knowledge_id"]
use_kg = req.get("use_kg", False)
retrieval_setting = req.get("retrieval_setting", {})
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
top = int(retrieval_setting.get("top_k", 1024))
metadata_condition = req.get("metadata_condition",{})
metas = DocumentService.get_meta_by_kbs([kb_id])
doc_ids = []
try:
e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e:
return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
print(metadata_condition)
print("after",convert_conditions(metadata_condition))
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
print("doc_ids",doc_ids)
if not doc_ids and metadata_condition is not None:
doc_ids = ['-999']
ranks = settings.retrievaler.retrieval(
question,
embd_mdl,
kb.tenant_id,
[kb_id],
page=1,
page_size=top,
similarity_threshold=similarity_threshold,
vector_similarity_weight=0.3,
top=top,
doc_ids=doc_ids,
rank_feature=label_question(question, [kb])
)
if use_kg:
ck = settings.kg_retrievaler.retrieval(question,
[tenant_id],
[kb_id],
embd_mdl,
LLMBundle(kb.tenant_id, LLMType.CHAT))
if ck["content_with_weight"]:
ranks["chunks"].insert(0, ck)
records = []
for c in ranks["chunks"]:
e, doc = DocumentService.get_by_id( c["doc_id"])
c.pop("vector", None)
meta = getattr(doc, 'meta_fields', {})
meta["doc_id"] = c["doc_id"]
records.append({
"content": c["content_with_weight"],
"score": c["similarity"],
"title": c["docnm_kwd"],
"metadata": meta
})
return jsonify({"records": records})
except Exception as e:
if str(e).find("not_found") > 0:
return build_error_result(
message='No chunk found! Check the chunk status please!',
code=settings.RetCode.NOT_FOUND
)
logging.exception(e)
return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)

1497
api/apps/sdk/doc.py Normal file

File diff suppressed because it is too large Load Diff

738
api/apps/sdk/files.py Normal file
View File

@@ -0,0 +1,738 @@
import pathlib
import re
import flask
from flask import request
from pathlib import Path
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import server_error_response, token_required
from api.utils import get_uuid
from api.db import FileType
from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from rag.utils.storage_factory import STORAGE_IMPL
@manager.route('/file/upload', methods=['POST']) # noqa: F821
@token_required
def upload(tenant_id):
"""
Upload a file to the system.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: formData
name: file
type: file
required: true
description: The file to upload
- in: formData
name: parent_id
type: string
description: Parent folder ID where the file will be uploaded. Optional.
responses:
200:
description: Successfully uploaded the file.
schema:
type: object
properties:
data:
type: array
items:
type: object
properties:
id:
type: string
description: File ID
name:
type: string
description: File name
size:
type: integer
description: File size in bytes
type:
type: string
description: File type (e.g., document, folder)
"""
pf_id = request.form.get("parent_id")
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
if 'file' not in request.files:
return get_json_result(data=False, message='No file part!', code=400)
file_objs = request.files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
return get_json_result(data=False, message='No selected file!', code=400)
file_res = []
try:
e, pf_folder = FileService.get_by_id(pf_id)
if not e:
return get_json_result(data=False, message="Can't find this folder!", code=404)
for file_obj in file_objs:
# Handle file path
full_path = '/' + file_obj.filename
file_obj_names = full_path.split('/')
file_len = len(file_obj_names)
# Get folder path ID
file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
len_id_list = len(file_id_list)
# Crete file folder
if file_len != len_id_list:
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
if not e:
return get_json_result(data=False, message="Folder not found!", code=404)
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names, len_id_list)
else:
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
if not e:
return get_json_result(data=False, message="Folder not found!", code=404)
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names, len_id_list)
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = file_obj.read()
filename = duplicate_name(FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id)
file = {
"id": get_uuid(),
"parent_id": last_folder.id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
}
file = FileService.insert(file)
STORAGE_IMPL.put(last_folder.id, location, blob)
file_res.append(file.to_json())
return get_json_result(data=file_res)
except Exception as e:
return server_error_response(e)
@manager.route('/file/create', methods=['POST']) # noqa: F821
@token_required
def create(tenant_id):
"""
Create a new file or folder.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: File creation parameters
required: true
schema:
type: object
properties:
name:
type: string
description: Name of the file/folder
parent_id:
type: string
description: Parent folder ID. Optional.
type:
type: string
enum: ["FOLDER", "VIRTUAL"]
description: Type of the file
responses:
200:
description: File created successfully.
schema:
type: object
properties:
data:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
"""
req = request.json
pf_id = request.json.get("parent_id")
input_file_type = request.json.get("type")
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
try:
if not FileService.is_parent_folder_exist(pf_id):
return get_json_result(data=False, message="Parent Folder Doesn't Exist!", code=400)
if FileService.query(name=req["name"], parent_id=pf_id):
return get_json_result(data=False, message="Duplicated folder name in the same folder.", code=409)
if input_file_type == FileType.FOLDER.value:
file_type = FileType.FOLDER.value
else:
file_type = FileType.VIRTUAL.value
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"name": req["name"],
"location": "",
"size": 0,
"type": file_type
})
return get_json_result(data=file.to_json())
except Exception as e:
return server_error_response(e)
@manager.route('/file/list', methods=['GET']) # noqa: F821
@token_required
def list_files(tenant_id):
"""
List files under a specific folder.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: query
name: parent_id
type: string
description: Folder ID to list files from
- in: query
name: keywords
type: string
description: Search keyword filter
- in: query
name: page
type: integer
default: 1
description: Page number
- in: query
name: page_size
type: integer
default: 15
description: Number of results per page
- in: query
name: orderby
type: string
default: "create_time"
description: Sort by field
- in: query
name: desc
type: boolean
default: true
description: Descending order
responses:
200:
description: Successfully retrieved file list.
schema:
type: object
properties:
total:
type: integer
files:
type: array
items:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
size:
type: integer
create_time:
type: string
format: date-time
"""
pf_id = request.args.get("parent_id")
keywords = request.args.get("keywords", "")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 15))
orderby = request.args.get("orderby", "create_time")
desc = request.args.get("desc", True)
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, tenant_id)
try:
e, file = FileService.get_by_id(pf_id)
if not e:
return get_json_result(message="Folder not found!", code=404)
files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords)
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
return get_json_result(message="File not found!", code=404)
return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
@token_required
def get_root_folder(tenant_id):
"""
Get user's root folder.
---
tags:
- File Management
security:
- ApiKeyAuth: []
responses:
200:
description: Root folder information
schema:
type: object
properties:
data:
type: object
properties:
root_folder:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
"""
try:
root_folder = FileService.get_root_folder(tenant_id)
return get_json_result(data={"root_folder": root_folder})
except Exception as e:
return server_error_response(e)
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
@token_required
def get_parent_folder():
"""
Get parent folder info of a file.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: query
name: file_id
type: string
required: true
description: Target file ID
responses:
200:
description: Parent folder information
schema:
type: object
properties:
data:
type: object
properties:
parent_folder:
type: object
properties:
id:
type: string
name:
type: string
"""
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Folder not found!", code=404)
parent_folder = FileService.get_parent_folder(file_id)
return get_json_result(data={"parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
@token_required
def get_all_parent_folders(tenant_id):
"""
Get all parent folders of a file.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: query
name: file_id
type: string
required: true
description: Target file ID
responses:
200:
description: All parent folders of the file
schema:
type: object
properties:
data:
type: object
properties:
parent_folders:
type: array
items:
type: object
properties:
id:
type: string
name:
type: string
"""
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Folder not found!", code=404)
parent_folders = FileService.get_all_parent_folders(file_id)
parent_folders_res = [folder.to_json() for folder in parent_folders]
return get_json_result(data={"parent_folders": parent_folders_res})
except Exception as e:
return server_error_response(e)
@manager.route('/file/rm', methods=['POST']) # noqa: F821
@token_required
def rm(tenant_id):
"""
Delete one or multiple files/folders.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Files to delete
required: true
schema:
type: object
properties:
file_ids:
type: array
items:
type: string
description: List of file IDs to delete
responses:
200:
description: Successfully deleted files
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = request.json
file_ids = req["file_ids"]
try:
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="File or Folder not found!", code=404)
if not file.tenant_id:
return get_json_result(message="Tenant not found!", code=404)
if file.type == FileType.FOLDER.value:
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
for inner_file_id in file_id_list:
e, file = FileService.get_by_id(inner_file_id)
if not e:
return get_json_result(message="File not found!", code=404)
STORAGE_IMPL.rm(file.parent_id, file.location)
FileService.delete_folder_by_pf_id(tenant_id, file_id)
else:
STORAGE_IMPL.rm(file.parent_id, file.location)
if not FileService.delete(file):
return get_json_result(message="Database error (File removal)!", code=500)
informs = File2DocumentService.get_by_file_id(file_id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_json_result(message="Document not found!", code=404)
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_json_result(message="Tenant not found!", code=404)
if not DocumentService.remove_document(doc, tenant_id):
return get_json_result(message="Database error (Document removal)!", code=500)
File2DocumentService.delete_by_file_id(file_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/rename', methods=['POST']) # noqa: F821
@token_required
def rename(tenant_id):
"""
Rename a file.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Rename file
required: true
schema:
type: object
properties:
file_id:
type: string
description: Target file ID
name:
type: string
description: New name for the file
responses:
200:
description: File renamed successfully
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = request.json
try:
e, file = FileService.get_by_id(req["file_id"])
if not e:
return get_json_result(message="File not found!", code=404)
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(file.name.lower()).suffix:
return get_json_result(data=False, message="The extension of file can't be changed", code=400)
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
if existing_file.name == req["name"]:
return get_json_result(data=False, message="Duplicated file name in the same folder.", code=409)
if not FileService.update_by_id(req["file_id"], {"name": req["name"]}):
return get_json_result(message="Database error (File rename)!", code=500)
informs = File2DocumentService.get_by_file_id(req["file_id"])
if informs:
if not DocumentService.update_by_id(informs[0].document_id, {"name": req["name"]}):
return get_json_result(message="Database error (Document rename)!", code=500)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
@token_required
def get(tenant_id,file_id):
"""
Download a file.
---
tags:
- File Management
security:
- ApiKeyAuth: []
produces:
- application/octet-stream
parameters:
- in: path
name: file_id
type: string
required: true
description: File ID to download
responses:
200:
description: File stream
schema:
type: file
404:
description: File not found
"""
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Document not found!", code=404)
blob = STORAGE_IMPL.get(file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = STORAGE_IMPL.get(b, n)
response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)
if ext:
if file.type == FileType.VISUAL.value:
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
else:
response.headers.set('Content-Type', 'application/%s' % ext.group(1))
return response
except Exception as e:
return server_error_response(e)
@manager.route('/file/mv', methods=['POST']) # noqa: F821
@token_required
def move(tenant_id):
"""
Move one or multiple files to another folder.
---
tags:
- File Management
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Move operation
required: true
schema:
type: object
properties:
src_file_ids:
type: array
items:
type: string
description: Source file IDs
dest_file_id:
type: string
description: Destination folder ID
responses:
200:
description: Files moved successfully
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = request.json
try:
file_ids = req["src_file_ids"]
parent_id = req["dest_file_id"]
files = FileService.get_by_ids(file_ids)
files_dict = {f.id: f for f in files}
for file_id in file_ids:
file = files_dict[file_id]
if not file:
return get_json_result(message="File or Folder not found!", code=404)
if not file.tenant_id:
return get_json_result(message="Tenant not found!", code=404)
fe, _ = FileService.get_by_id(parent_id)
if not fe:
return get_json_result(message="Parent Folder not found!", code=404)
FileService.move_file(file_ids, parent_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/convert', methods=['POST']) # noqa: F821
@token_required
def convert(tenant_id):
req = request.json
kb_ids = req["kb_ids"]
file_ids = req["file_ids"]
file2documents = []
try:
files = FileService.get_by_ids(file_ids)
files_set = dict({file.id: file for file in files})
for file_id in file_ids:
file = files_set[file_id]
if not file:
return get_json_result(message="File not found!", code=404)
file_ids_list = [file_id]
if file.type == FileType.FOLDER.value:
file_ids_list = FileService.get_all_innermost_file_ids(file_id, [])
for id in file_ids_list:
informs = File2DocumentService.get_by_file_id(id)
# delete
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_json_result(message="Document not found!", code=404)
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_json_result(message="Tenant not found!", code=404)
if not DocumentService.remove_document(doc, tenant_id):
return get_json_result(
message="Database error (Document removal)!", code=404)
File2DocumentService.delete_by_file_id(id)
# insert
for kb_id in kb_ids:
e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e:
return get_json_result(
message="Can't find this knowledgebase!", code=404)
e, file = FileService.get_by_id(id)
if not e:
return get_json_result(
message="Can't find this file!", code=404)
doc = DocumentService.insert({
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
"parser_config": kb.parser_config,
"created_by": tenant_id,
"type": file.type,
"name": file.name,
"suffix": Path(file.name).suffix.lstrip("."),
"location": file.location,
"size": file.size
})
file2document = File2DocumentService.insert({
"id": get_uuid(),
"file_id": id,
"document_id": doc.id,
})
file2documents.append(file2document.to_json())
return get_json_result(data=file2documents)
except Exception as e:
return server_error_response(e)

1115
api/apps/sdk/session.py Normal file

File diff suppressed because it is too large Load Diff