Files
TERES_fastapi_backend/api/apps/file_app.py

491 lines
18 KiB
Python
Raw Normal View History

2025-10-13 13:18:03 +08:00
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
#
2025-11-04 16:06:36 +08:00
import logging
2025-10-13 13:18:03 +08:00
import os
import pathlib
import re
from typing import Optional, List
2025-10-13 13:18:03 +08:00
from fastapi import APIRouter, Depends, Query, UploadFile, File, Form
from fastapi.responses import Response
from api.apps.models.auth_dependencies import get_current_user
from api.apps.models.file_models import (
CreateFileRequest,
DeleteFilesRequest,
RenameFileRequest,
MoveFilesRequest,
)
2025-10-13 13:18:03 +08:00
from api.common.check_team_permission import check_file_team_permission
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.utils.api_utils import server_error_response, get_data_error_result
2025-10-13 13:18:03 +08:00
from api.utils import get_uuid
from api.db import FileType, FileSource
from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api import settings
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from rag.utils.storage_factory import STORAGE_IMPL
# 创建路由器
router = APIRouter()
2025-10-13 13:18:03 +08:00
@router.post('/upload')
async def upload(
files: List[UploadFile] = File(...),
parent_id: Optional[str] = Form(None),
current_user = Depends(get_current_user)
):
"""上传文件"""
pf_id = parent_id
2025-10-13 13:18:03 +08:00
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
if not files:
2025-10-13 13:18:03 +08:00
return get_json_result(
data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
for file_obj in files:
if not file_obj.filename or file_obj.filename == '':
2025-10-13 13:18:03 +08:00
return get_json_result(
data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
2025-10-13 13:18:03 +08:00
file_res = []
try:
e, pf_folder = FileService.get_by_id(pf_id)
if not e:
return get_data_error_result(message="Can't find this folder!")
for file_obj in files:
2025-10-13 13:18:03 +08:00
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(current_user.id) >= MAX_FILE_NUM_PER_USER:
return get_data_error_result(message="Exceed the maximum file number of a free user!")
2025-10-13 13:18:03 +08:00
# split file name path
if not file_obj.filename:
file_obj_names = [pf_folder.name, file_obj.filename]
else:
full_path = '/' + file_obj.filename
file_obj_names = full_path.split('/')
file_len = len(file_obj_names)
# get folder
file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
len_id_list = len(file_id_list)
# create folder
if file_len != len_id_list:
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
if not e:
return get_data_error_result(message="Folder not found!")
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
len_id_list)
else:
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
if not e:
return get_data_error_result(message="Folder not found!")
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
len_id_list)
# file type
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = await file_obj.read()
2025-10-13 13:18:03 +08:00
filename = duplicate_name(
FileService.query,
name=file_obj_names[file_len - 1],
parent_id=last_folder.id)
STORAGE_IMPL.put(last_folder.id, location, blob)
file = {
"id": get_uuid(),
"parent_id": last_folder.id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
}
file = FileService.insert(file)
file_res.append(file.to_json())
return get_json_result(data=file_res)
except Exception as e:
return server_error_response(e)
@router.post('/create')
async def create(
request: CreateFileRequest,
current_user = Depends(get_current_user)
):
"""创建文件/文件夹"""
req = request.model_dump(exclude_unset=True)
pf_id = req.get("parent_id")
input_file_type = req.get("type")
2025-10-13 13:18:03 +08:00
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
try:
if not FileService.is_parent_folder_exist(pf_id):
return get_json_result(
data=False, message="Parent Folder Doesn't Exist!", code=settings.RetCode.OPERATING_ERROR)
2025-11-04 16:06:36 +08:00
if FileService.query(name=req["name"], parent_id=pf_id):
2025-10-13 13:18:03 +08:00
return get_data_error_result(
message="Duplicated folder name in the same folder.")
if input_file_type == FileType.FOLDER.value:
file_type = FileType.FOLDER.value
else:
file_type = FileType.VIRTUAL.value
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": current_user.id,
"created_by": current_user.id,
2025-11-04 16:06:36 +08:00
"name": req["name"],
2025-10-13 13:18:03 +08:00
"location": "",
"size": 0,
"type": file_type
})
return get_json_result(data=file.to_json())
except Exception as e:
return server_error_response(e)
@router.get('/list')
async def list_files(
parent_id: Optional[str] = Query(None, description="父文件夹ID"),
keywords: Optional[str] = Query("", description="搜索关键词"),
page: Optional[int] = Query(1, description="页码"),
page_size: Optional[int] = Query(15, description="每页数量"),
orderby: Optional[str] = Query("create_time", description="排序字段"),
desc: Optional[bool] = Query(True, description="是否降序"),
current_user = Depends(get_current_user)
):
"""列出文件"""
pf_id = parent_id
page_number = int(page) if page else 1
items_per_page = int(page_size) if page_size else 15
2025-10-13 13:18:03 +08:00
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, current_user.id)
try:
e, file = FileService.get_by_id(pf_id)
if not e:
return get_data_error_result(message="Folder not found!")
files, total = FileService.get_by_pf_id(
2025-11-04 16:06:36 +08:00
current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
2025-10-13 13:18:03 +08:00
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
return get_json_result(message="File not found!")
return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@router.get('/root_folder')
async def get_root_folder(
current_user = Depends(get_current_user)
):
"""获取根文件夹"""
2025-10-13 13:18:03 +08:00
try:
root_folder = FileService.get_root_folder(current_user.id)
return get_json_result(data={"root_folder": root_folder})
except Exception as e:
return server_error_response(e)
@router.get('/parent_folder')
async def get_parent_folder(
file_id: str = Query(..., description="文件ID"),
current_user = Depends(get_current_user)
):
"""获取父文件夹"""
2025-10-13 13:18:03 +08:00
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Folder not found!")
parent_folder = FileService.get_parent_folder(file_id)
return get_json_result(data={"parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@router.get('/all_parent_folder')
async def get_all_parent_folders(
file_id: str = Query(..., description="文件ID"),
current_user = Depends(get_current_user)
):
"""获取所有父文件夹"""
2025-10-13 13:18:03 +08:00
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Folder not found!")
parent_folders = FileService.get_all_parent_folders(file_id)
parent_folders_res = []
for parent_folder in parent_folders:
parent_folders_res.append(parent_folder.to_json())
return get_json_result(data={"parent_folders": parent_folders_res})
except Exception as e:
return server_error_response(e)
@router.post("/rm")
async def rm(
request: DeleteFilesRequest,
current_user = Depends(get_current_user)
):
"""删除文件"""
file_ids = request.file_ids
2025-11-04 16:06:36 +08:00
def _delete_single_file(file):
try:
if file.location:
STORAGE_IMPL.rm(file.parent_id, file.location)
except Exception:
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
informs = File2DocumentService.get_by_file_id(file.id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if e and doc:
tenant_id = DocumentService.get_tenant_id(doc_id)
if tenant_id:
DocumentService.remove_document(doc, tenant_id)
File2DocumentService.delete_by_file_id(file.id)
FileService.delete(file)
def _delete_folder_recursive(folder, tenant_id):
sub_files = FileService.list_all_files_by_parent_id(folder.id)
for sub_file in sub_files:
if sub_file.type == FileType.FOLDER.value:
_delete_folder_recursive(sub_file, tenant_id)
else:
_delete_single_file(sub_file)
FileService.delete(folder)
2025-10-13 13:18:03 +08:00
try:
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
2025-11-04 16:06:36 +08:00
if not e or not file:
2025-10-13 13:18:03 +08:00
return get_data_error_result(message="File or Folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, current_user.id):
2025-11-04 16:06:36 +08:00
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
2025-10-13 13:18:03 +08:00
if file.source_type == FileSource.KNOWLEDGEBASE:
continue
if file.type == FileType.FOLDER.value:
2025-11-04 16:06:36 +08:00
_delete_folder_recursive(file, current_user.id)
continue
_delete_single_file(file)
2025-10-13 13:18:03 +08:00
return get_json_result(data=True)
2025-11-04 16:06:36 +08:00
2025-10-13 13:18:03 +08:00
except Exception as e:
return server_error_response(e)
@router.post('/rename')
async def rename(
request: RenameFileRequest,
current_user = Depends(get_current_user)
):
"""重命名文件"""
req = request.model_dump()
2025-10-13 13:18:03 +08:00
try:
2025-11-04 16:06:36 +08:00
e, file = FileService.get_by_id(req["file_id"])
2025-10-13 13:18:03 +08:00
if not e:
return get_data_error_result(message="File not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
if file.type != FileType.FOLDER.value \
2025-11-04 16:06:36 +08:00
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
2025-10-13 13:18:03 +08:00
file.name.lower()).suffix:
return get_json_result(
data=False,
message="The extension of file can't be changed",
code=settings.RetCode.ARGUMENT_ERROR)
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
if existing_file.name == req["name"]:
2025-10-13 13:18:03 +08:00
return get_data_error_result(
message="Duplicated file name in the same folder.")
if not FileService.update_by_id(
2025-11-04 16:06:36 +08:00
req["file_id"], {"name": req["name"]}):
2025-10-13 13:18:03 +08:00
return get_data_error_result(
message="Database error (File rename)!")
2025-11-04 16:06:36 +08:00
informs = File2DocumentService.get_by_file_id(req["file_id"])
2025-10-13 13:18:03 +08:00
if informs:
if not DocumentService.update_by_id(
2025-11-04 16:06:36 +08:00
informs[0].document_id, {"name": req["name"]}):
2025-10-13 13:18:03 +08:00
return get_data_error_result(
message="Database error (Document rename)!")
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@router.get('/get/{file_id}')
async def get(
file_id: str,
current_user = Depends(get_current_user)
):
"""获取文件内容"""
2025-10-13 13:18:03 +08:00
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Document not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
blob = STORAGE_IMPL.get(file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = STORAGE_IMPL.get(b, n)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
content_type = "application/octet-stream"
2025-10-13 13:18:03 +08:00
if ext:
if file.type == FileType.VISUAL.value:
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
return Response(content=blob, media_type=content_type)
2025-10-13 13:18:03 +08:00
except Exception as e:
return server_error_response(e)
@router.post("/mv")
async def move(
request: MoveFilesRequest,
current_user = Depends(get_current_user)
):
"""移动文件"""
req = request.model_dump()
2025-10-13 13:18:03 +08:00
try:
2025-11-04 16:06:36 +08:00
file_ids = req["src_file_ids"]
dest_parent_id = req["dest_file_id"]
ok, dest_folder = FileService.get_by_id(dest_parent_id)
if not ok or not dest_folder:
return get_data_error_result(message="Parent Folder not found!")
2025-10-13 13:18:03 +08:00
files = FileService.get_by_ids(file_ids)
2025-11-04 16:06:36 +08:00
if not files:
return get_data_error_result(message="Source files not found!")
files_dict = {f.id: f for f in files}
2025-10-13 13:18:03 +08:00
for file_id in file_ids:
2025-11-04 16:06:36 +08:00
file = files_dict.get(file_id)
2025-10-13 13:18:03 +08:00
if not file:
return get_data_error_result(message="File or Folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, current_user.id):
2025-11-04 16:06:36 +08:00
return get_json_result(
data=False,
message="No authorization.",
code=settings.RetCode.AUTHENTICATION_ERROR,
)
def _move_entry_recursive(source_file_entry, dest_folder):
if source_file_entry.type == FileType.FOLDER.value:
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
if existing_folder:
new_folder = existing_folder[0]
else:
new_folder = FileService.insert(
{
"id": get_uuid(),
"parent_id": dest_folder.id,
"tenant_id": source_file_entry.tenant_id,
"created_by": current_user.id,
"name": source_file_entry.name,
"location": "",
"size": 0,
"type": FileType.FOLDER.value,
}
)
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
for sub_file in sub_files:
_move_entry_recursive(sub_file, new_folder)
FileService.delete_by_id(source_file_entry.id)
return
old_parent_id = source_file_entry.parent_id
old_location = source_file_entry.location
filename = source_file_entry.name
new_location = filename
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
new_location += "_"
try:
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
except Exception as storage_err:
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
FileService.update_by_id(
source_file_entry.id,
{
"parent_id": dest_folder.id,
"location": new_location,
},
)
for file in files:
_move_entry_recursive(file, dest_folder)
2025-10-13 13:18:03 +08:00
return get_json_result(data=True)
2025-11-04 16:06:36 +08:00
2025-10-13 13:18:03 +08:00
except Exception as e:
return server_error_response(e)