将flask改成fastapi
This commit is contained in:
128
api/apps/sdk/agent.py
Normal file
128
api/apps/sdk/agent.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
import time
|
||||
from typing import Any, cast
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from api.db.services.user_canvas_version import UserCanvasVersionService
|
||||
from api.settings import RetCode
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import get_data_error_result, get_error_data_result, get_json_result, token_required
|
||||
from api.utils.api_utils import get_result
|
||||
from flask import request
|
||||
|
||||
@manager.route('/agents', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def list_agents(tenant_id):
|
||||
id = request.args.get("id")
|
||||
title = request.args.get("title")
|
||||
if id or title:
|
||||
canvas = UserCanvasService.query(id=id, title=title, user_id=tenant_id)
|
||||
if not canvas:
|
||||
return get_error_data_result("The agent doesn't exist.")
|
||||
page_number = int(request.args.get("page", 1))
|
||||
items_per_page = int(request.args.get("page_size", 30))
|
||||
orderby = request.args.get("orderby", "update_time")
|
||||
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
canvas = UserCanvasService.get_list(tenant_id,page_number,items_per_page,orderby,desc,id,title)
|
||||
return get_result(data=canvas)
|
||||
|
||||
|
||||
@manager.route("/agents", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
def create_agent(tenant_id: str):
|
||||
req: dict[str, Any] = cast(dict[str, Any], request.json)
|
||||
req["user_id"] = tenant_id
|
||||
|
||||
if req.get("dsl") is not None:
|
||||
if not isinstance(req["dsl"], str):
|
||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
|
||||
req["dsl"] = json.loads(req["dsl"])
|
||||
else:
|
||||
return get_json_result(data=False, message="No DSL data in request.", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
if req.get("title") is not None:
|
||||
req["title"] = req["title"].strip()
|
||||
else:
|
||||
return get_json_result(data=False, message="No title in request.", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
if UserCanvasService.query(user_id=tenant_id, title=req["title"]):
|
||||
return get_data_error_result(message=f"Agent with title {req['title']} already exists.")
|
||||
|
||||
agent_id = get_uuid()
|
||||
req["id"] = agent_id
|
||||
|
||||
if not UserCanvasService.save(**req):
|
||||
return get_data_error_result(message="Fail to create agent.")
|
||||
|
||||
UserCanvasVersionService.insert(
|
||||
user_canvas_id=agent_id,
|
||||
title="{0}_{1}".format(req["title"], time.strftime("%Y_%m_%d_%H_%M_%S")),
|
||||
dsl=req["dsl"]
|
||||
)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route("/agents/<agent_id>", methods=["PUT"]) # noqa: F821
|
||||
@token_required
|
||||
def update_agent(tenant_id: str, agent_id: str):
|
||||
req: dict[str, Any] = {k: v for k, v in cast(dict[str, Any], request.json).items() if v is not None}
|
||||
req["user_id"] = tenant_id
|
||||
|
||||
if req.get("dsl") is not None:
|
||||
if not isinstance(req["dsl"], str):
|
||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
|
||||
req["dsl"] = json.loads(req["dsl"])
|
||||
|
||||
if req.get("title") is not None:
|
||||
req["title"] = req["title"].strip()
|
||||
|
||||
if not UserCanvasService.query(user_id=tenant_id, id=agent_id):
|
||||
return get_json_result(
|
||||
data=False, message="Only owner of canvas authorized for this operation.",
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
UserCanvasService.update_by_id(agent_id, req)
|
||||
|
||||
if req.get("dsl") is not None:
|
||||
UserCanvasVersionService.insert(
|
||||
user_canvas_id=agent_id,
|
||||
title="{0}_{1}".format(req["title"], time.strftime("%Y_%m_%d_%H_%M_%S")),
|
||||
dsl=req["dsl"]
|
||||
)
|
||||
|
||||
UserCanvasVersionService.delete_all_versions(agent_id)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route("/agents/<agent_id>", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
def delete_agent(tenant_id: str, agent_id: str):
|
||||
if not UserCanvasService.query(user_id=tenant_id, id=agent_id):
|
||||
return get_json_result(
|
||||
data=False, message="Only owner of canvas authorized for this operation.",
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
UserCanvasService.delete_by_id(agent_id)
|
||||
return get_json_result(data=True)
|
||||
325
api/apps/sdk/chat.py
Normal file
325
api/apps/sdk/chat.py
Normal file
@@ -0,0 +1,325 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
from flask import request
|
||||
|
||||
from api import settings
|
||||
from api.db import StatusEnum
|
||||
from api.db.services.dialog_service import DialogService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import check_duplicate_ids, get_error_data_result, get_result, token_required
|
||||
|
||||
|
||||
@manager.route("/chats", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
def create(tenant_id):
|
||||
req = request.json
|
||||
ids = [i for i in req.get("dataset_ids", []) if i]
|
||||
for kb_id in ids:
|
||||
kbs = KnowledgebaseService.accessible(kb_id=kb_id, user_id=tenant_id)
|
||||
if not kbs:
|
||||
return get_error_data_result(f"You don't own the dataset {kb_id}")
|
||||
kbs = KnowledgebaseService.query(id=kb_id)
|
||||
kb = kbs[0]
|
||||
if kb.chunk_num == 0:
|
||||
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
||||
|
||||
kbs = KnowledgebaseService.get_by_ids(ids) if ids else []
|
||||
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
||||
embd_count = list(set(embd_ids))
|
||||
if len(embd_count) > 1:
|
||||
return get_result(message='Datasets use different embedding models."', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
req["kb_ids"] = ids
|
||||
# llm
|
||||
llm = req.get("llm")
|
||||
if llm:
|
||||
if "model_name" in llm:
|
||||
req["llm_id"] = llm.pop("model_name")
|
||||
if req.get("llm_id") is not None:
|
||||
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(req["llm_id"])
|
||||
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=llm_name, llm_factory=llm_factory, model_type="chat"):
|
||||
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
|
||||
req["llm_setting"] = req.pop("llm")
|
||||
e, tenant = TenantService.get_by_id(tenant_id)
|
||||
if not e:
|
||||
return get_error_data_result(message="Tenant not found!")
|
||||
# prompt
|
||||
prompt = req.get("prompt")
|
||||
key_mapping = {"parameters": "variables", "prologue": "opener", "quote": "show_quote", "system": "prompt", "rerank_id": "rerank_model", "vector_similarity_weight": "keywords_similarity_weight"}
|
||||
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id", "top_k"]
|
||||
if prompt:
|
||||
for new_key, old_key in key_mapping.items():
|
||||
if old_key in prompt:
|
||||
prompt[new_key] = prompt.pop(old_key)
|
||||
for key in key_list:
|
||||
if key in prompt:
|
||||
req[key] = prompt.pop(key)
|
||||
req["prompt_config"] = req.pop("prompt")
|
||||
# init
|
||||
req["id"] = get_uuid()
|
||||
req["description"] = req.get("description", "A helpful Assistant")
|
||||
req["icon"] = req.get("avatar", "")
|
||||
req["top_n"] = req.get("top_n", 6)
|
||||
req["top_k"] = req.get("top_k", 1024)
|
||||
req["rerank_id"] = req.get("rerank_id", "")
|
||||
if req.get("rerank_id"):
|
||||
value_rerank_model = ["BAAI/bge-reranker-v2-m3", "maidalun1020/bce-reranker-base_v1"]
|
||||
if req["rerank_id"] not in value_rerank_model and not TenantLLMService.query(tenant_id=tenant_id, llm_name=req.get("rerank_id"), model_type="rerank"):
|
||||
return get_error_data_result(f"`rerank_model` {req.get('rerank_id')} doesn't exist")
|
||||
if not req.get("llm_id"):
|
||||
req["llm_id"] = tenant.llm_id
|
||||
if not req.get("name"):
|
||||
return get_error_data_result(message="`name` is required.")
|
||||
if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
||||
return get_error_data_result(message="Duplicated chat name in creating chat.")
|
||||
# tenant_id
|
||||
if req.get("tenant_id"):
|
||||
return get_error_data_result(message="`tenant_id` must not be provided.")
|
||||
req["tenant_id"] = tenant_id
|
||||
# prompt more parameter
|
||||
default_prompt = {
|
||||
"system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
|
||||
Here is the knowledge base:
|
||||
{knowledge}
|
||||
The above is the knowledge base.""",
|
||||
"prologue": "Hi! I'm your assistant. What can I do for you?",
|
||||
"parameters": [{"key": "knowledge", "optional": False}],
|
||||
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
|
||||
"quote": True,
|
||||
"tts": False,
|
||||
"refine_multiturn": True,
|
||||
}
|
||||
key_list_2 = ["system", "prologue", "parameters", "empty_response", "quote", "tts", "refine_multiturn"]
|
||||
if "prompt_config" not in req:
|
||||
req["prompt_config"] = {}
|
||||
for key in key_list_2:
|
||||
temp = req["prompt_config"].get(key)
|
||||
if (not temp and key == "system") or (key not in req["prompt_config"]):
|
||||
req["prompt_config"][key] = default_prompt[key]
|
||||
for p in req["prompt_config"]["parameters"]:
|
||||
if p["optional"]:
|
||||
continue
|
||||
if req["prompt_config"]["system"].find("{%s}" % p["key"]) < 0:
|
||||
return get_error_data_result(message="Parameter '{}' is not used".format(p["key"]))
|
||||
# save
|
||||
if not DialogService.save(**req):
|
||||
return get_error_data_result(message="Fail to new a chat!")
|
||||
# response
|
||||
e, res = DialogService.get_by_id(req["id"])
|
||||
if not e:
|
||||
return get_error_data_result(message="Fail to new a chat!")
|
||||
res = res.to_json()
|
||||
renamed_dict = {}
|
||||
for key, value in res["prompt_config"].items():
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_dict[new_key] = value
|
||||
res["prompt"] = renamed_dict
|
||||
del res["prompt_config"]
|
||||
new_dict = {"similarity_threshold": res["similarity_threshold"], "keywords_similarity_weight": 1 - res["vector_similarity_weight"], "top_n": res["top_n"], "rerank_model": res["rerank_id"]}
|
||||
res["prompt"].update(new_dict)
|
||||
for key in key_list:
|
||||
del res[key]
|
||||
res["llm"] = res.pop("llm_setting")
|
||||
res["llm"]["model_name"] = res.pop("llm_id")
|
||||
del res["kb_ids"]
|
||||
res["dataset_ids"] = req.get("dataset_ids", [])
|
||||
res["avatar"] = res.pop("icon")
|
||||
return get_result(data=res)
|
||||
|
||||
|
||||
@manager.route("/chats/<chat_id>", methods=["PUT"]) # noqa: F821
|
||||
@token_required
|
||||
def update(tenant_id, chat_id):
|
||||
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
||||
return get_error_data_result(message="You do not own the chat")
|
||||
req = request.json
|
||||
ids = req.get("dataset_ids", [])
|
||||
if "show_quotation" in req:
|
||||
req["do_refer"] = req.pop("show_quotation")
|
||||
if ids:
|
||||
for kb_id in ids:
|
||||
kbs = KnowledgebaseService.accessible(kb_id=kb_id, user_id=tenant_id)
|
||||
if not kbs:
|
||||
return get_error_data_result(f"You don't own the dataset {kb_id}")
|
||||
kbs = KnowledgebaseService.query(id=kb_id)
|
||||
kb = kbs[0]
|
||||
if kb.chunk_num == 0:
|
||||
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
||||
|
||||
kbs = KnowledgebaseService.get_by_ids(ids)
|
||||
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
||||
embd_count = list(set(embd_ids))
|
||||
if len(embd_count) > 1:
|
||||
return get_result(message='Datasets use different embedding models."', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
req["kb_ids"] = ids
|
||||
llm = req.get("llm")
|
||||
if llm:
|
||||
if "model_name" in llm:
|
||||
req["llm_id"] = llm.pop("model_name")
|
||||
if req.get("llm_id") is not None:
|
||||
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(req["llm_id"])
|
||||
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=llm_name, llm_factory=llm_factory, model_type="chat"):
|
||||
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
|
||||
req["llm_setting"] = req.pop("llm")
|
||||
e, tenant = TenantService.get_by_id(tenant_id)
|
||||
if not e:
|
||||
return get_error_data_result(message="Tenant not found!")
|
||||
# prompt
|
||||
prompt = req.get("prompt")
|
||||
key_mapping = {"parameters": "variables", "prologue": "opener", "quote": "show_quote", "system": "prompt", "rerank_id": "rerank_model", "vector_similarity_weight": "keywords_similarity_weight"}
|
||||
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id", "top_k"]
|
||||
if prompt:
|
||||
for new_key, old_key in key_mapping.items():
|
||||
if old_key in prompt:
|
||||
prompt[new_key] = prompt.pop(old_key)
|
||||
for key in key_list:
|
||||
if key in prompt:
|
||||
req[key] = prompt.pop(key)
|
||||
req["prompt_config"] = req.pop("prompt")
|
||||
e, res = DialogService.get_by_id(chat_id)
|
||||
res = res.to_json()
|
||||
if req.get("rerank_id"):
|
||||
value_rerank_model = ["BAAI/bge-reranker-v2-m3", "maidalun1020/bce-reranker-base_v1"]
|
||||
if req["rerank_id"] not in value_rerank_model and not TenantLLMService.query(tenant_id=tenant_id, llm_name=req.get("rerank_id"), model_type="rerank"):
|
||||
return get_error_data_result(f"`rerank_model` {req.get('rerank_id')} doesn't exist")
|
||||
if "name" in req:
|
||||
if not req.get("name"):
|
||||
return get_error_data_result(message="`name` cannot be empty.")
|
||||
if req["name"].lower() != res["name"].lower() and len(DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
|
||||
return get_error_data_result(message="Duplicated chat name in updating chat.")
|
||||
if "prompt_config" in req:
|
||||
res["prompt_config"].update(req["prompt_config"])
|
||||
for p in res["prompt_config"]["parameters"]:
|
||||
if p["optional"]:
|
||||
continue
|
||||
if res["prompt_config"]["system"].find("{%s}" % p["key"]) < 0:
|
||||
return get_error_data_result(message="Parameter '{}' is not used".format(p["key"]))
|
||||
if "llm_setting" in req:
|
||||
res["llm_setting"].update(req["llm_setting"])
|
||||
req["prompt_config"] = res["prompt_config"]
|
||||
req["llm_setting"] = res["llm_setting"]
|
||||
# avatar
|
||||
if "avatar" in req:
|
||||
req["icon"] = req.pop("avatar")
|
||||
if "dataset_ids" in req:
|
||||
req.pop("dataset_ids")
|
||||
if not DialogService.update_by_id(chat_id, req):
|
||||
return get_error_data_result(message="Chat not found!")
|
||||
return get_result()
|
||||
|
||||
|
||||
@manager.route("/chats", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
def delete(tenant_id):
|
||||
errors = []
|
||||
success_count = 0
|
||||
req = request.json
|
||||
if not req:
|
||||
ids = None
|
||||
else:
|
||||
ids = req.get("ids")
|
||||
if not ids:
|
||||
id_list = []
|
||||
dias = DialogService.query(tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
||||
for dia in dias:
|
||||
id_list.append(dia.id)
|
||||
else:
|
||||
id_list = ids
|
||||
|
||||
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "assistant")
|
||||
|
||||
for id in unique_id_list:
|
||||
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
||||
errors.append(f"Assistant({id}) not found.")
|
||||
continue
|
||||
temp_dict = {"status": StatusEnum.INVALID.value}
|
||||
DialogService.update_by_id(id, temp_dict)
|
||||
success_count += 1
|
||||
|
||||
if errors:
|
||||
if success_count > 0:
|
||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} chats with {len(errors)} errors")
|
||||
else:
|
||||
return get_error_data_result(message="; ".join(errors))
|
||||
|
||||
if duplicate_messages:
|
||||
if success_count > 0:
|
||||
return get_result(message=f"Partially deleted {success_count} chats with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
|
||||
else:
|
||||
return get_error_data_result(message=";".join(duplicate_messages))
|
||||
|
||||
return get_result()
|
||||
|
||||
|
||||
@manager.route("/chats", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
def list_chat(tenant_id):
|
||||
id = request.args.get("id")
|
||||
name = request.args.get("name")
|
||||
if id or name:
|
||||
chat = DialogService.query(id=id, name=name, status=StatusEnum.VALID.value, tenant_id=tenant_id)
|
||||
if not chat:
|
||||
return get_error_data_result(message="The chat doesn't exist")
|
||||
page_number = int(request.args.get("page", 1))
|
||||
items_per_page = int(request.args.get("page_size", 30))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
chats = DialogService.get_list(tenant_id, page_number, items_per_page, orderby, desc, id, name)
|
||||
if not chats:
|
||||
return get_result(data=[])
|
||||
list_assts = []
|
||||
key_mapping = {
|
||||
"parameters": "variables",
|
||||
"prologue": "opener",
|
||||
"quote": "show_quote",
|
||||
"system": "prompt",
|
||||
"rerank_id": "rerank_model",
|
||||
"vector_similarity_weight": "keywords_similarity_weight",
|
||||
"do_refer": "show_quotation",
|
||||
}
|
||||
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
|
||||
for res in chats:
|
||||
renamed_dict = {}
|
||||
for key, value in res["prompt_config"].items():
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_dict[new_key] = value
|
||||
res["prompt"] = renamed_dict
|
||||
del res["prompt_config"]
|
||||
new_dict = {"similarity_threshold": res["similarity_threshold"], "keywords_similarity_weight": 1 - res["vector_similarity_weight"], "top_n": res["top_n"], "rerank_model": res["rerank_id"]}
|
||||
res["prompt"].update(new_dict)
|
||||
for key in key_list:
|
||||
del res[key]
|
||||
res["llm"] = res.pop("llm_setting")
|
||||
res["llm"]["model_name"] = res.pop("llm_id")
|
||||
kb_list = []
|
||||
for kb_id in res["kb_ids"]:
|
||||
kb = KnowledgebaseService.query(id=kb_id)
|
||||
if not kb:
|
||||
logging.warning(f"The kb {kb_id} does not exist.")
|
||||
continue
|
||||
kb_list.append(kb[0].to_json())
|
||||
del res["kb_ids"]
|
||||
res["datasets"] = kb_list
|
||||
res["avatar"] = res.pop("icon")
|
||||
list_assts.append(res)
|
||||
return get_result(data=list_assts)
|
||||
527
api/apps/sdk/dataset.py
Normal file
527
api/apps/sdk/dataset.py
Normal file
@@ -0,0 +1,527 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
from flask import request
|
||||
from peewee import OperationalError
|
||||
from api import settings
|
||||
from api.db import FileSource, StatusEnum
|
||||
from api.db.db_models import File
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import (
|
||||
deep_merge,
|
||||
get_error_argument_result,
|
||||
get_error_data_result,
|
||||
get_error_operating_result,
|
||||
get_error_permission_result,
|
||||
get_parser_config,
|
||||
get_result,
|
||||
remap_dictionary_keys,
|
||||
token_required,
|
||||
verify_embedding_availability,
|
||||
)
|
||||
from api.utils.validation_utils import (
|
||||
CreateDatasetReq,
|
||||
DeleteDatasetReq,
|
||||
ListDatasetReq,
|
||||
UpdateDatasetReq,
|
||||
validate_and_parse_json_request,
|
||||
validate_and_parse_request_args,
|
||||
)
|
||||
from rag.nlp import search
|
||||
from rag.settings import PAGERANK_FLD
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
def create(tenant_id):
|
||||
"""
|
||||
Create a new dataset.
|
||||
---
|
||||
tags:
|
||||
- Datasets
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Dataset creation parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: Name of the dataset.
|
||||
avatar:
|
||||
type: string
|
||||
description: Base64 encoding of the avatar.
|
||||
description:
|
||||
type: string
|
||||
description: Description of the dataset.
|
||||
embedding_model:
|
||||
type: string
|
||||
description: Embedding model Name.
|
||||
permission:
|
||||
type: string
|
||||
enum: ['me', 'team']
|
||||
description: Dataset permission.
|
||||
chunk_method:
|
||||
type: string
|
||||
enum: ["naive", "book", "email", "laws", "manual", "one", "paper",
|
||||
"picture", "presentation", "qa", "table", "tag"
|
||||
]
|
||||
description: Chunking method.
|
||||
parser_config:
|
||||
type: object
|
||||
description: Parser configuration.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
"""
|
||||
# Field name transformations during model dump:
|
||||
# | Original | Dump Output |
|
||||
# |----------------|-------------|
|
||||
# | embedding_model| embd_id |
|
||||
# | chunk_method | parser_id |
|
||||
req, err = validate_and_parse_json_request(request, CreateDatasetReq)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
try:
|
||||
if KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
||||
return get_error_operating_result(message=f"Dataset name '{req['name']}' already exists")
|
||||
|
||||
req["parser_config"] = get_parser_config(req["parser_id"], req["parser_config"])
|
||||
req["id"] = get_uuid()
|
||||
req["tenant_id"] = tenant_id
|
||||
req["created_by"] = tenant_id
|
||||
|
||||
ok, t = TenantService.get_by_id(tenant_id)
|
||||
if not ok:
|
||||
return get_error_permission_result(message="Tenant not found")
|
||||
|
||||
if not req.get("embd_id"):
|
||||
req["embd_id"] = t.embd_id
|
||||
else:
|
||||
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
|
||||
if not ok:
|
||||
return err
|
||||
|
||||
if not KnowledgebaseService.save(**req):
|
||||
return get_error_data_result(message="Create dataset error.(Database error)")
|
||||
|
||||
ok, k = KnowledgebaseService.get_by_id(req["id"])
|
||||
if not ok:
|
||||
return get_error_data_result(message="Dataset created failed")
|
||||
|
||||
response_data = remap_dictionary_keys(k.to_dict())
|
||||
return get_result(data=response_data)
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
def delete(tenant_id):
|
||||
"""
|
||||
Delete datasets.
|
||||
---
|
||||
tags:
|
||||
- Datasets
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Dataset deletion parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- ids
|
||||
properties:
|
||||
ids:
|
||||
type: array or null
|
||||
items:
|
||||
type: string
|
||||
description: |
|
||||
Specifies the datasets to delete:
|
||||
- If `null`, all datasets will be deleted.
|
||||
- If an array of IDs, only the specified datasets will be deleted.
|
||||
- If an empty array, no datasets will be deleted.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
req, err = validate_and_parse_json_request(request, DeleteDatasetReq)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
try:
|
||||
kb_id_instance_pairs = []
|
||||
if req["ids"] is None:
|
||||
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
|
||||
for kb in kbs:
|
||||
kb_id_instance_pairs.append((kb.id, kb))
|
||||
|
||||
else:
|
||||
error_kb_ids = []
|
||||
for kb_id in req["ids"]:
|
||||
kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id)
|
||||
if kb is None:
|
||||
error_kb_ids.append(kb_id)
|
||||
continue
|
||||
kb_id_instance_pairs.append((kb_id, kb))
|
||||
if len(error_kb_ids) > 0:
|
||||
return get_error_permission_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
||||
|
||||
errors = []
|
||||
success_count = 0
|
||||
for kb_id, kb in kb_id_instance_pairs:
|
||||
for doc in DocumentService.query(kb_id=kb_id):
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'")
|
||||
continue
|
||||
f2d = File2DocumentService.get_by_document_id(doc.id)
|
||||
FileService.filter_delete(
|
||||
[
|
||||
File.source_type == FileSource.KNOWLEDGEBASE,
|
||||
File.id == f2d[0].file_id,
|
||||
]
|
||||
)
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
||||
if not KnowledgebaseService.delete_by_id(kb_id):
|
||||
errors.append(f"Delete dataset error for {kb_id}")
|
||||
continue
|
||||
success_count += 1
|
||||
|
||||
if not errors:
|
||||
return get_result()
|
||||
|
||||
error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..."
|
||||
if success_count == 0:
|
||||
return get_error_data_result(message=error_message)
|
||||
|
||||
return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message)
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821
|
||||
@token_required
|
||||
def update(tenant_id, dataset_id):
|
||||
"""
|
||||
Update a dataset.
|
||||
---
|
||||
tags:
|
||||
- Datasets
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset to update.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Dataset update parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: New name of the dataset.
|
||||
avatar:
|
||||
type: string
|
||||
description: Updated base64 encoding of the avatar.
|
||||
description:
|
||||
type: string
|
||||
description: Updated description of the dataset.
|
||||
embedding_model:
|
||||
type: string
|
||||
description: Updated embedding model Name.
|
||||
permission:
|
||||
type: string
|
||||
enum: ['me', 'team']
|
||||
description: Updated dataset permission.
|
||||
chunk_method:
|
||||
type: string
|
||||
enum: ["naive", "book", "email", "laws", "manual", "one", "paper",
|
||||
"picture", "presentation", "qa", "table", "tag"
|
||||
]
|
||||
description: Updated chunking method.
|
||||
pagerank:
|
||||
type: integer
|
||||
description: Updated page rank.
|
||||
parser_config:
|
||||
type: object
|
||||
description: Updated parser configuration.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
# Field name transformations during model dump:
|
||||
# | Original | Dump Output |
|
||||
# |----------------|-------------|
|
||||
# | embedding_model| embd_id |
|
||||
# | chunk_method | parser_id |
|
||||
extras = {"dataset_id": dataset_id}
|
||||
req, err = validate_and_parse_json_request(request, UpdateDatasetReq, extras=extras, exclude_unset=True)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
if not req:
|
||||
return get_error_argument_result(message="No properties were modified")
|
||||
|
||||
try:
|
||||
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
|
||||
if kb is None:
|
||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
|
||||
|
||||
if req.get("parser_config"):
|
||||
req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"])
|
||||
|
||||
if (chunk_method := req.get("parser_id")) and chunk_method != kb.parser_id:
|
||||
if not req.get("parser_config"):
|
||||
req["parser_config"] = get_parser_config(chunk_method, None)
|
||||
elif "parser_config" in req and not req["parser_config"]:
|
||||
del req["parser_config"]
|
||||
|
||||
if "name" in req and req["name"].lower() != kb.name.lower():
|
||||
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
||||
if exists:
|
||||
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
|
||||
|
||||
if "embd_id" in req:
|
||||
if not req["embd_id"]:
|
||||
req["embd_id"] = kb.embd_id
|
||||
if kb.chunk_num != 0 and req["embd_id"] != kb.embd_id:
|
||||
return get_error_data_result(message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
|
||||
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
|
||||
if not ok:
|
||||
return err
|
||||
|
||||
if "pagerank" in req and req["pagerank"] != kb.pagerank:
|
||||
if os.environ.get("DOC_ENGINE", "elasticsearch") == "infinity":
|
||||
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
|
||||
|
||||
if req["pagerank"] > 0:
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
|
||||
else:
|
||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, req):
|
||||
return get_error_data_result(message="Update dataset error.(Database error)")
|
||||
|
||||
ok, k = KnowledgebaseService.get_by_id(kb.id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Dataset created failed")
|
||||
|
||||
response_data = remap_dictionary_keys(k.to_dict())
|
||||
return get_result(data=response_data)
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
def list_datasets(tenant_id):
|
||||
"""
|
||||
List datasets.
|
||||
---
|
||||
tags:
|
||||
- Datasets
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: query
|
||||
name: id
|
||||
type: string
|
||||
required: false
|
||||
description: Dataset ID to filter.
|
||||
- in: query
|
||||
name: name
|
||||
type: string
|
||||
required: false
|
||||
description: Dataset name to filter.
|
||||
- in: query
|
||||
name: page
|
||||
type: integer
|
||||
required: false
|
||||
default: 1
|
||||
description: Page number.
|
||||
- in: query
|
||||
name: page_size
|
||||
type: integer
|
||||
required: false
|
||||
default: 30
|
||||
description: Number of items per page.
|
||||
- in: query
|
||||
name: orderby
|
||||
type: string
|
||||
required: false
|
||||
default: "create_time"
|
||||
description: Field to order by.
|
||||
- in: query
|
||||
name: desc
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
description: Order in descending.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
"""
|
||||
args, err = validate_and_parse_request_args(request, ListDatasetReq)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
try:
|
||||
kb_id = request.args.get("id")
|
||||
name = args.get("name")
|
||||
if kb_id:
|
||||
kbs = KnowledgebaseService.get_kb_by_id(kb_id, tenant_id)
|
||||
|
||||
if not kbs:
|
||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{kb_id}'")
|
||||
if name:
|
||||
kbs = KnowledgebaseService.get_kb_by_name(name, tenant_id)
|
||||
if not kbs:
|
||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
|
||||
|
||||
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
||||
kbs = KnowledgebaseService.get_list(
|
||||
[m["tenant_id"] for m in tenants],
|
||||
tenant_id,
|
||||
args["page"],
|
||||
args["page_size"],
|
||||
args["orderby"],
|
||||
args["desc"],
|
||||
kb_id,
|
||||
name,
|
||||
)
|
||||
|
||||
response_data_list = []
|
||||
for kb in kbs:
|
||||
response_data_list.append(remap_dictionary_keys(kb))
|
||||
return get_result(data=response_data_list)
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def knowledge_graph(tenant_id,dataset_id):
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
_, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
req = {
|
||||
"kb_id": [dataset_id],
|
||||
"knowledge_graph_kwd": ["graph"]
|
||||
}
|
||||
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
|
||||
return get_result(data=obj)
|
||||
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||
if not len(sres.ids):
|
||||
return get_result(data=obj)
|
||||
|
||||
for id in sres.ids[:1]:
|
||||
ty = sres.field[id]["knowledge_graph_kwd"]
|
||||
try:
|
||||
content_json = json.loads(sres.field[id]["content_with_weight"])
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
obj[ty] = content_json
|
||||
|
||||
if "nodes" in obj["graph"]:
|
||||
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
|
||||
if "edges" in obj["graph"]:
|
||||
node_id_set = { o["id"] for o in obj["graph"]["nodes"] }
|
||||
filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
|
||||
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
|
||||
return get_result(data=obj)
|
||||
|
||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
|
||||
@token_required
|
||||
def delete_knowledge_graph(tenant_id,dataset_id):
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
_, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id)
|
||||
|
||||
return get_result(data=True)
|
||||
104
api/apps/sdk/dify_retrieval.py
Normal file
104
api/apps/sdk/dify_retrieval.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
from flask import request, jsonify
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api import settings
|
||||
from api.utils.api_utils import validate_request, build_error_result, apikey_required
|
||||
from rag.app.tag import label_question
|
||||
from api.db.services.dialog_service import meta_filter, convert_conditions
|
||||
|
||||
|
||||
@manager.route('/dify/retrieval', methods=['POST']) # noqa: F821
|
||||
@apikey_required
|
||||
@validate_request("knowledge_id", "query")
|
||||
def retrieval(tenant_id):
|
||||
req = request.json
|
||||
question = req["query"]
|
||||
kb_id = req["knowledge_id"]
|
||||
use_kg = req.get("use_kg", False)
|
||||
retrieval_setting = req.get("retrieval_setting", {})
|
||||
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
||||
top = int(retrieval_setting.get("top_k", 1024))
|
||||
metadata_condition = req.get("metadata_condition",{})
|
||||
metas = DocumentService.get_meta_by_kbs([kb_id])
|
||||
|
||||
doc_ids = []
|
||||
try:
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||
if not e:
|
||||
return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||
print(metadata_condition)
|
||||
print("after",convert_conditions(metadata_condition))
|
||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
|
||||
print("doc_ids",doc_ids)
|
||||
if not doc_ids and metadata_condition is not None:
|
||||
doc_ids = ['-999']
|
||||
ranks = settings.retrievaler.retrieval(
|
||||
question,
|
||||
embd_mdl,
|
||||
kb.tenant_id,
|
||||
[kb_id],
|
||||
page=1,
|
||||
page_size=top,
|
||||
similarity_threshold=similarity_threshold,
|
||||
vector_similarity_weight=0.3,
|
||||
top=top,
|
||||
doc_ids=doc_ids,
|
||||
rank_feature=label_question(question, [kb])
|
||||
)
|
||||
|
||||
if use_kg:
|
||||
ck = settings.kg_retrievaler.retrieval(question,
|
||||
[tenant_id],
|
||||
[kb_id],
|
||||
embd_mdl,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
|
||||
records = []
|
||||
for c in ranks["chunks"]:
|
||||
e, doc = DocumentService.get_by_id( c["doc_id"])
|
||||
c.pop("vector", None)
|
||||
meta = getattr(doc, 'meta_fields', {})
|
||||
meta["doc_id"] = c["doc_id"]
|
||||
records.append({
|
||||
"content": c["content_with_weight"],
|
||||
"score": c["similarity"],
|
||||
"title": c["docnm_kwd"],
|
||||
"metadata": meta
|
||||
})
|
||||
|
||||
return jsonify({"records": records})
|
||||
except Exception as e:
|
||||
if str(e).find("not_found") > 0:
|
||||
return build_error_result(
|
||||
message='No chunk found! Check the chunk status please!',
|
||||
code=settings.RetCode.NOT_FOUND
|
||||
)
|
||||
logging.exception(e)
|
||||
return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)
|
||||
|
||||
|
||||
1497
api/apps/sdk/doc.py
Normal file
1497
api/apps/sdk/doc.py
Normal file
File diff suppressed because it is too large
Load Diff
738
api/apps/sdk/files.py
Normal file
738
api/apps/sdk/files.py
Normal file
@@ -0,0 +1,738 @@
|
||||
import pathlib
|
||||
import re
|
||||
|
||||
import flask
|
||||
from flask import request
|
||||
from pathlib import Path
|
||||
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.utils.api_utils import server_error_response, token_required
|
||||
from api.utils import get_uuid
|
||||
from api.db import FileType
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils.api_utils import get_json_result
|
||||
from api.utils.file_utils import filename_type
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def upload(tenant_id):
|
||||
"""
|
||||
Upload a file to the system.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: formData
|
||||
name: file
|
||||
type: file
|
||||
required: true
|
||||
description: The file to upload
|
||||
- in: formData
|
||||
name: parent_id
|
||||
type: string
|
||||
description: Parent folder ID where the file will be uploaded. Optional.
|
||||
responses:
|
||||
200:
|
||||
description: Successfully uploaded the file.
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: File ID
|
||||
name:
|
||||
type: string
|
||||
description: File name
|
||||
size:
|
||||
type: integer
|
||||
description: File size in bytes
|
||||
type:
|
||||
type: string
|
||||
description: File type (e.g., document, folder)
|
||||
"""
|
||||
pf_id = request.form.get("parent_id")
|
||||
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
pf_id = root_folder["id"]
|
||||
|
||||
if 'file' not in request.files:
|
||||
return get_json_result(data=False, message='No file part!', code=400)
|
||||
file_objs = request.files.getlist('file')
|
||||
|
||||
for file_obj in file_objs:
|
||||
if file_obj.filename == '':
|
||||
return get_json_result(data=False, message='No selected file!', code=400)
|
||||
|
||||
file_res = []
|
||||
|
||||
try:
|
||||
e, pf_folder = FileService.get_by_id(pf_id)
|
||||
if not e:
|
||||
return get_json_result(data=False, message="Can't find this folder!", code=404)
|
||||
|
||||
for file_obj in file_objs:
|
||||
# Handle file path
|
||||
full_path = '/' + file_obj.filename
|
||||
file_obj_names = full_path.split('/')
|
||||
file_len = len(file_obj_names)
|
||||
|
||||
# Get folder path ID
|
||||
file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
|
||||
len_id_list = len(file_id_list)
|
||||
|
||||
# Crete file folder
|
||||
if file_len != len_id_list:
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
|
||||
if not e:
|
||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names, len_id_list)
|
||||
else:
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
|
||||
if not e:
|
||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names, len_id_list)
|
||||
|
||||
filetype = filename_type(file_obj_names[file_len - 1])
|
||||
location = file_obj_names[file_len - 1]
|
||||
while STORAGE_IMPL.obj_exist(last_folder.id, location):
|
||||
location += "_"
|
||||
blob = file_obj.read()
|
||||
filename = duplicate_name(FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id)
|
||||
|
||||
file = {
|
||||
"id": get_uuid(),
|
||||
"parent_id": last_folder.id,
|
||||
"tenant_id": tenant_id,
|
||||
"created_by": tenant_id,
|
||||
"type": filetype,
|
||||
"name": filename,
|
||||
"location": location,
|
||||
"size": len(blob),
|
||||
}
|
||||
file = FileService.insert(file)
|
||||
STORAGE_IMPL.put(last_folder.id, location, blob)
|
||||
file_res.append(file.to_json())
|
||||
return get_json_result(data=file_res)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/create', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def create(tenant_id):
|
||||
"""
|
||||
Create a new file or folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: body
|
||||
name: body
|
||||
description: File creation parameters
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: Name of the file/folder
|
||||
parent_id:
|
||||
type: string
|
||||
description: Parent folder ID. Optional.
|
||||
type:
|
||||
type: string
|
||||
enum: ["FOLDER", "VIRTUAL"]
|
||||
description: Type of the file
|
||||
responses:
|
||||
200:
|
||||
description: File created successfully.
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
"""
|
||||
req = request.json
|
||||
pf_id = request.json.get("parent_id")
|
||||
input_file_type = request.json.get("type")
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
pf_id = root_folder["id"]
|
||||
|
||||
try:
|
||||
if not FileService.is_parent_folder_exist(pf_id):
|
||||
return get_json_result(data=False, message="Parent Folder Doesn't Exist!", code=400)
|
||||
if FileService.query(name=req["name"], parent_id=pf_id):
|
||||
return get_json_result(data=False, message="Duplicated folder name in the same folder.", code=409)
|
||||
|
||||
if input_file_type == FileType.FOLDER.value:
|
||||
file_type = FileType.FOLDER.value
|
||||
else:
|
||||
file_type = FileType.VIRTUAL.value
|
||||
|
||||
file = FileService.insert({
|
||||
"id": get_uuid(),
|
||||
"parent_id": pf_id,
|
||||
"tenant_id": tenant_id,
|
||||
"created_by": tenant_id,
|
||||
"name": req["name"],
|
||||
"location": "",
|
||||
"size": 0,
|
||||
"type": file_type
|
||||
})
|
||||
|
||||
return get_json_result(data=file.to_json())
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/list', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def list_files(tenant_id):
|
||||
"""
|
||||
List files under a specific folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: query
|
||||
name: parent_id
|
||||
type: string
|
||||
description: Folder ID to list files from
|
||||
- in: query
|
||||
name: keywords
|
||||
type: string
|
||||
description: Search keyword filter
|
||||
- in: query
|
||||
name: page
|
||||
type: integer
|
||||
default: 1
|
||||
description: Page number
|
||||
- in: query
|
||||
name: page_size
|
||||
type: integer
|
||||
default: 15
|
||||
description: Number of results per page
|
||||
- in: query
|
||||
name: orderby
|
||||
type: string
|
||||
default: "create_time"
|
||||
description: Sort by field
|
||||
- in: query
|
||||
name: desc
|
||||
type: boolean
|
||||
default: true
|
||||
description: Descending order
|
||||
responses:
|
||||
200:
|
||||
description: Successfully retrieved file list.
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
total:
|
||||
type: integer
|
||||
files:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
size:
|
||||
type: integer
|
||||
create_time:
|
||||
type: string
|
||||
format: date-time
|
||||
"""
|
||||
pf_id = request.args.get("parent_id")
|
||||
keywords = request.args.get("keywords", "")
|
||||
page_number = int(request.args.get("page", 1))
|
||||
items_per_page = int(request.args.get("page_size", 15))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
desc = request.args.get("desc", True)
|
||||
|
||||
if not pf_id:
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
pf_id = root_folder["id"]
|
||||
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
||||
|
||||
try:
|
||||
e, file = FileService.get_by_id(pf_id)
|
||||
if not e:
|
||||
return get_json_result(message="Folder not found!", code=404)
|
||||
|
||||
files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords)
|
||||
|
||||
parent_folder = FileService.get_parent_folder(pf_id)
|
||||
if not parent_folder:
|
||||
return get_json_result(message="File not found!", code=404)
|
||||
|
||||
return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_root_folder(tenant_id):
|
||||
"""
|
||||
Get user's root folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
responses:
|
||||
200:
|
||||
description: Root folder information
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
properties:
|
||||
root_folder:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
"""
|
||||
try:
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
return get_json_result(data={"root_folder": root_folder})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_parent_folder():
|
||||
"""
|
||||
Get parent folder info of a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: query
|
||||
name: file_id
|
||||
type: string
|
||||
required: true
|
||||
description: Target file ID
|
||||
responses:
|
||||
200:
|
||||
description: Parent folder information
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
properties:
|
||||
parent_folder:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
"""
|
||||
file_id = request.args.get("file_id")
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
return get_json_result(message="Folder not found!", code=404)
|
||||
|
||||
parent_folder = FileService.get_parent_folder(file_id)
|
||||
return get_json_result(data={"parent_folder": parent_folder.to_json()})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get_all_parent_folders(tenant_id):
|
||||
"""
|
||||
Get all parent folders of a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: query
|
||||
name: file_id
|
||||
type: string
|
||||
required: true
|
||||
description: Target file ID
|
||||
responses:
|
||||
200:
|
||||
description: All parent folders of the file
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
properties:
|
||||
parent_folders:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
"""
|
||||
file_id = request.args.get("file_id")
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
return get_json_result(message="Folder not found!", code=404)
|
||||
|
||||
parent_folders = FileService.get_all_parent_folders(file_id)
|
||||
parent_folders_res = [folder.to_json() for folder in parent_folders]
|
||||
return get_json_result(data={"parent_folders": parent_folders_res})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/rm', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def rm(tenant_id):
|
||||
"""
|
||||
Delete one or multiple files/folders.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: body
|
||||
name: body
|
||||
description: Files to delete
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
file_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: List of file IDs to delete
|
||||
responses:
|
||||
200:
|
||||
description: Successfully deleted files
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: boolean
|
||||
example: true
|
||||
"""
|
||||
req = request.json
|
||||
file_ids = req["file_ids"]
|
||||
try:
|
||||
for file_id in file_ids:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
return get_json_result(message="File or Folder not found!", code=404)
|
||||
if not file.tenant_id:
|
||||
return get_json_result(message="Tenant not found!", code=404)
|
||||
|
||||
if file.type == FileType.FOLDER.value:
|
||||
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
||||
for inner_file_id in file_id_list:
|
||||
e, file = FileService.get_by_id(inner_file_id)
|
||||
if not e:
|
||||
return get_json_result(message="File not found!", code=404)
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
FileService.delete_folder_by_pf_id(tenant_id, file_id)
|
||||
else:
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
if not FileService.delete(file):
|
||||
return get_json_result(message="Database error (File removal)!", code=500)
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(file_id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_json_result(message="Document not found!", code=404)
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_json_result(message="Tenant not found!", code=404)
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return get_json_result(message="Database error (Document removal)!", code=500)
|
||||
File2DocumentService.delete_by_file_id(file_id)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/rename', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def rename(tenant_id):
|
||||
"""
|
||||
Rename a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: body
|
||||
name: body
|
||||
description: Rename file
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
file_id:
|
||||
type: string
|
||||
description: Target file ID
|
||||
name:
|
||||
type: string
|
||||
description: New name for the file
|
||||
responses:
|
||||
200:
|
||||
description: File renamed successfully
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: boolean
|
||||
example: true
|
||||
"""
|
||||
req = request.json
|
||||
try:
|
||||
e, file = FileService.get_by_id(req["file_id"])
|
||||
if not e:
|
||||
return get_json_result(message="File not found!", code=404)
|
||||
|
||||
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(file.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=400)
|
||||
|
||||
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
|
||||
if existing_file.name == req["name"]:
|
||||
return get_json_result(data=False, message="Duplicated file name in the same folder.", code=409)
|
||||
|
||||
if not FileService.update_by_id(req["file_id"], {"name": req["name"]}):
|
||||
return get_json_result(message="Database error (File rename)!", code=500)
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(req["file_id"])
|
||||
if informs:
|
||||
if not DocumentService.update_by_id(informs[0].document_id, {"name": req["name"]}):
|
||||
return get_json_result(message="Database error (Document rename)!", code=500)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def get(tenant_id,file_id):
|
||||
"""
|
||||
Download a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
produces:
|
||||
- application/octet-stream
|
||||
parameters:
|
||||
- in: path
|
||||
name: file_id
|
||||
type: string
|
||||
required: true
|
||||
description: File ID to download
|
||||
responses:
|
||||
200:
|
||||
description: File stream
|
||||
schema:
|
||||
type: file
|
||||
404:
|
||||
description: File not found
|
||||
"""
|
||||
try:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
return get_json_result(message="Document not found!", code=404)
|
||||
|
||||
blob = STORAGE_IMPL.get(file.parent_id, file.location)
|
||||
if not blob:
|
||||
b, n = File2DocumentService.get_storage_address(file_id=file_id)
|
||||
blob = STORAGE_IMPL.get(b, n)
|
||||
|
||||
response = flask.make_response(blob)
|
||||
ext = re.search(r"\.([^.]+)$", file.name)
|
||||
if ext:
|
||||
if file.type == FileType.VISUAL.value:
|
||||
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
|
||||
else:
|
||||
response.headers.set('Content-Type', 'application/%s' % ext.group(1))
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def move(tenant_id):
|
||||
"""
|
||||
Move one or multiple files to another folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: body
|
||||
name: body
|
||||
description: Move operation
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
src_file_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: Source file IDs
|
||||
dest_file_id:
|
||||
type: string
|
||||
description: Destination folder ID
|
||||
responses:
|
||||
200:
|
||||
description: Files moved successfully
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: boolean
|
||||
example: true
|
||||
"""
|
||||
req = request.json
|
||||
try:
|
||||
file_ids = req["src_file_ids"]
|
||||
parent_id = req["dest_file_id"]
|
||||
files = FileService.get_by_ids(file_ids)
|
||||
files_dict = {f.id: f for f in files}
|
||||
|
||||
for file_id in file_ids:
|
||||
file = files_dict[file_id]
|
||||
if not file:
|
||||
return get_json_result(message="File or Folder not found!", code=404)
|
||||
if not file.tenant_id:
|
||||
return get_json_result(message="Tenant not found!", code=404)
|
||||
|
||||
fe, _ = FileService.get_by_id(parent_id)
|
||||
if not fe:
|
||||
return get_json_result(message="Parent Folder not found!", code=404)
|
||||
|
||||
FileService.move_file(file_ids, parent_id)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@manager.route('/file/convert', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
def convert(tenant_id):
|
||||
req = request.json
|
||||
kb_ids = req["kb_ids"]
|
||||
file_ids = req["file_ids"]
|
||||
file2documents = []
|
||||
|
||||
try:
|
||||
files = FileService.get_by_ids(file_ids)
|
||||
files_set = dict({file.id: file for file in files})
|
||||
for file_id in file_ids:
|
||||
file = files_set[file_id]
|
||||
if not file:
|
||||
return get_json_result(message="File not found!", code=404)
|
||||
file_ids_list = [file_id]
|
||||
if file.type == FileType.FOLDER.value:
|
||||
file_ids_list = FileService.get_all_innermost_file_ids(file_id, [])
|
||||
for id in file_ids_list:
|
||||
informs = File2DocumentService.get_by_file_id(id)
|
||||
# delete
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_json_result(message="Document not found!", code=404)
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_json_result(message="Tenant not found!", code=404)
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return get_json_result(
|
||||
message="Database error (Document removal)!", code=404)
|
||||
File2DocumentService.delete_by_file_id(id)
|
||||
|
||||
# insert
|
||||
for kb_id in kb_ids:
|
||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||
if not e:
|
||||
return get_json_result(
|
||||
message="Can't find this knowledgebase!", code=404)
|
||||
e, file = FileService.get_by_id(id)
|
||||
if not e:
|
||||
return get_json_result(
|
||||
message="Can't find this file!", code=404)
|
||||
|
||||
doc = DocumentService.insert({
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
|
||||
"parser_config": kb.parser_config,
|
||||
"created_by": tenant_id,
|
||||
"type": file.type,
|
||||
"name": file.name,
|
||||
"suffix": Path(file.name).suffix.lstrip("."),
|
||||
"location": file.location,
|
||||
"size": file.size
|
||||
})
|
||||
file2document = File2DocumentService.insert({
|
||||
"id": get_uuid(),
|
||||
"file_id": id,
|
||||
"document_id": doc.id,
|
||||
})
|
||||
|
||||
file2documents.append(file2document.to_json())
|
||||
return get_json_result(data=file2documents)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
1115
api/apps/sdk/session.py
Normal file
1115
api/apps/sdk/session.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user