v0.21.1-fastapi

This commit is contained in:
2025-11-04 16:06:36 +08:00
parent 3e58c3d0e9
commit d57b5d76ae
218 changed files with 19617 additions and 72339 deletions

View File

@@ -13,15 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License
#
import logging
import os
import pathlib
import re
from typing import List, Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, Query
from fastapi.responses import StreamingResponse
from fastapi.security import HTTPAuthorizationCredentials
from api.utils.api_utils import security
import flask
from flask import request
from flask_login import login_required, current_user
from api.common.check_team_permission import check_file_team_permission
from api.db.services.document_service import DocumentService
@@ -36,110 +35,22 @@ from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from rag.utils.storage_factory import STORAGE_IMPL
from pydantic import BaseModel
# Security
# Pydantic models for request/response
class CreateFileRequest(BaseModel):
name: str
parent_id: Optional[str] = None
type: Optional[str] = None
class RemoveFileRequest(BaseModel):
file_ids: List[str]
class RenameFileRequest(BaseModel):
file_id: str
name: str
class MoveFileRequest(BaseModel):
src_file_ids: List[str]
dest_file_id: str
# Dependency injection
async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
"""获取当前用户"""
from api.db import StatusEnum
from api.db.services.user_service import UserService
from fastapi import HTTPException, status
import logging
try:
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
except ImportError:
# 如果没有itsdangerous使用jwt作为替代
import jwt
Serializer = jwt
jwt = Serializer(secret_key=settings.SECRET_KEY)
authorization = credentials.credentials
if authorization:
try:
access_token = str(jwt.loads(authorization))
if not access_token or not access_token.strip():
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication attempt with empty access token"
)
# Access tokens should be UUIDs (32 hex characters)
if len(access_token.strip()) < 32:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=f"Authentication attempt with invalid token format: {len(access_token)} chars"
)
user = UserService.query(
access_token=access_token, status=StatusEnum.VALID.value
)
if user:
if not user[0].access_token or not user[0].access_token.strip():
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=f"User {user[0].email} has empty access_token in database"
)
return user[0]
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid access token"
)
except Exception as e:
logging.warning(f"load_user got exception {e}")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid access token"
)
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authorization header required"
)
# Create router
router = APIRouter()
@router.post('/upload')
async def upload(
parent_id: Optional[str] = Form(None),
files: List[UploadFile] = File(...),
current_user = Depends(get_current_user)
):
pf_id = parent_id
@manager.route('/upload', methods=['POST']) # noqa: F821
@login_required
# @validate_request("parent_id")
def upload():
pf_id = request.form.get("parent_id")
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
if not files:
if 'file' not in request.files:
return get_json_result(
data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
file_objs = files
file_objs = request.files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
@@ -186,7 +97,7 @@ async def upload(
location = file_obj_names[file_len - 1]
while STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = await file_obj.read()
blob = file_obj.read()
filename = duplicate_name(
FileService.query,
name=file_obj_names[file_len - 1],
@@ -209,13 +120,13 @@ async def upload(
return server_error_response(e)
@router.post('/create')
async def create(
req: CreateFileRequest,
current_user = Depends(get_current_user)
):
pf_id = req.parent_id
input_file_type = req.type
@manager.route('/create', methods=['POST']) # noqa: F821
@login_required
@validate_request("name")
def create():
req = request.json
pf_id = request.json.get("parent_id")
input_file_type = request.json.get("type")
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
@@ -224,7 +135,7 @@ async def create(
if not FileService.is_parent_folder_exist(pf_id):
return get_json_result(
data=False, message="Parent Folder Doesn't Exist!", code=settings.RetCode.OPERATING_ERROR)
if FileService.query(name=req.name, parent_id=pf_id):
if FileService.query(name=req["name"], parent_id=pf_id):
return get_data_error_result(
message="Duplicated folder name in the same folder.")
@@ -238,7 +149,7 @@ async def create(
"parent_id": pf_id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"name": req.name,
"name": req["name"],
"location": "",
"size": 0,
"type": file_type
@@ -249,18 +160,17 @@ async def create(
return server_error_response(e)
@router.get('/list')
async def list_files(
parent_id: Optional[str] = Query(None),
keywords: str = Query(""),
page: int = Query(1),
page_size: int = Query(15),
orderby: str = Query("create_time"),
desc: bool = Query(True),
current_user = Depends(get_current_user)
):
pf_id = parent_id
@manager.route('/list', methods=['GET']) # noqa: F821
@login_required
def list_files():
pf_id = request.args.get("parent_id")
keywords = request.args.get("keywords", "")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 15))
orderby = request.args.get("orderby", "create_time")
desc = request.args.get("desc", True)
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
@@ -271,7 +181,7 @@ async def list_files(
return get_data_error_result(message="Folder not found!")
files, total = FileService.get_by_pf_id(
current_user.id, pf_id, page, page_size, orderby, desc, keywords)
current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
@@ -282,8 +192,9 @@ async def list_files(
return server_error_response(e)
@router.get('/root_folder')
async def get_root_folder(current_user = Depends(get_current_user)):
@manager.route('/root_folder', methods=['GET']) # noqa: F821
@login_required
def get_root_folder():
try:
root_folder = FileService.get_root_folder(current_user.id)
return get_json_result(data={"root_folder": root_folder})
@@ -291,11 +202,10 @@ async def get_root_folder(current_user = Depends(get_current_user)):
return server_error_response(e)
@router.get('/parent_folder')
async def get_parent_folder(
file_id: str = Query(...),
current_user = Depends(get_current_user)
):
@manager.route('/parent_folder', methods=['GET']) # noqa: F821
@login_required
def get_parent_folder():
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
@@ -307,11 +217,10 @@ async def get_parent_folder(
return server_error_response(e)
@router.get('/all_parent_folder')
async def get_all_parent_folders(
file_id: str = Query(...),
current_user = Depends(get_current_user)
):
@manager.route('/all_parent_folder', methods=['GET']) # noqa: F821
@login_required
def get_all_parent_folders():
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
@@ -326,90 +235,99 @@ async def get_all_parent_folders(
return server_error_response(e)
@router.post('/rm')
async def rm(
req: RemoveFileRequest,
current_user = Depends(get_current_user)
):
file_ids = req.file_ids
@manager.route("/rm", methods=["POST"]) # noqa: F821
@login_required
@validate_request("file_ids")
def rm():
req = request.json
file_ids = req["file_ids"]
def _delete_single_file(file):
try:
if file.location:
STORAGE_IMPL.rm(file.parent_id, file.location)
except Exception:
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
informs = File2DocumentService.get_by_file_id(file.id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if e and doc:
tenant_id = DocumentService.get_tenant_id(doc_id)
if tenant_id:
DocumentService.remove_document(doc, tenant_id)
File2DocumentService.delete_by_file_id(file.id)
FileService.delete(file)
def _delete_folder_recursive(folder, tenant_id):
sub_files = FileService.list_all_files_by_parent_id(folder.id)
for sub_file in sub_files:
if sub_file.type == FileType.FOLDER.value:
_delete_folder_recursive(sub_file, tenant_id)
else:
_delete_single_file(sub_file)
FileService.delete(folder)
try:
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
if not e:
if not e or not file:
return get_data_error_result(message="File or Folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
if file.source_type == FileSource.KNOWLEDGEBASE:
continue
if file.type == FileType.FOLDER.value:
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
for inner_file_id in file_id_list:
e, file = FileService.get_by_id(inner_file_id)
if not e:
return get_data_error_result(message="File not found!")
STORAGE_IMPL.rm(file.parent_id, file.location)
FileService.delete_folder_by_pf_id(current_user.id, file_id)
else:
STORAGE_IMPL.rm(file.parent_id, file.location)
if not FileService.delete(file):
return get_data_error_result(
message="Database error (File removal)!")
_delete_folder_recursive(file, current_user.id)
continue
# delete file2document
informs = File2DocumentService.get_by_file_id(file_id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_data_error_result(message="Document not found!")
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_data_error_result(message="Tenant not found!")
if not DocumentService.remove_document(doc, tenant_id):
return get_data_error_result(
message="Database error (Document removal)!")
File2DocumentService.delete_by_file_id(file_id)
_delete_single_file(file)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@router.post('/rename')
async def rename(
req: RenameFileRequest,
current_user = Depends(get_current_user)
):
@manager.route('/rename', methods=['POST']) # noqa: F821
@login_required
@validate_request("file_id", "name")
def rename():
req = request.json
try:
e, file = FileService.get_by_id(req.file_id)
e, file = FileService.get_by_id(req["file_id"])
if not e:
return get_data_error_result(message="File not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
if file.type != FileType.FOLDER.value \
and pathlib.Path(req.name.lower()).suffix != pathlib.Path(
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
file.name.lower()).suffix:
return get_json_result(
data=False,
message="The extension of file can't be changed",
code=settings.RetCode.ARGUMENT_ERROR)
for file in FileService.query(name=req.name, pf_id=file.parent_id):
if file.name == req.name:
for file in FileService.query(name=req["name"], pf_id=file.parent_id):
if file.name == req["name"]:
return get_data_error_result(
message="Duplicated file name in the same folder.")
if not FileService.update_by_id(
req.file_id, {"name": req.name}):
req["file_id"], {"name": req["name"]}):
return get_data_error_result(
message="Database error (File rename)!")
informs = File2DocumentService.get_by_file_id(req.file_id)
informs = File2DocumentService.get_by_file_id(req["file_id"])
if informs:
if not DocumentService.update_by_id(
informs[0].document_id, {"name": req.name}):
informs[0].document_id, {"name": req["name"]}):
return get_data_error_result(
message="Database error (Document rename)!")
@@ -418,8 +336,9 @@ async def rename(
return server_error_response(e)
@router.get('/get/{file_id}')
async def get(file_id: str, current_user = Depends(get_current_user)):
@manager.route('/get/<file_id>', methods=['GET']) # noqa: F821
@login_required
def get(file_id):
try:
e, file = FileService.get_by_id(file_id)
if not e:
@@ -432,6 +351,7 @@ async def get(file_id: str, current_user = Depends(get_current_user)):
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = STORAGE_IMPL.get(b, n)
response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
if ext:
@@ -439,43 +359,95 @@ async def get(file_id: str, current_user = Depends(get_current_user)):
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
else:
content_type = "application/octet-stream"
return StreamingResponse(
iter([blob]),
media_type=content_type,
headers={"Content-Disposition": f"attachment; filename={file.name}"}
)
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)
@router.post('/mv')
async def move(
req: MoveFileRequest,
current_user = Depends(get_current_user)
):
@manager.route("/mv", methods=["POST"]) # noqa: F821
@login_required
@validate_request("src_file_ids", "dest_file_id")
def move():
req = request.json
try:
file_ids = req.src_file_ids
parent_id = req.dest_file_id
file_ids = req["src_file_ids"]
dest_parent_id = req["dest_file_id"]
ok, dest_folder = FileService.get_by_id(dest_parent_id)
if not ok or not dest_folder:
return get_data_error_result(message="Parent Folder not found!")
files = FileService.get_by_ids(file_ids)
files_dict = {}
for file in files:
files_dict[file.id] = file
if not files:
return get_data_error_result(message="Source files not found!")
files_dict = {f.id: f for f in files}
for file_id in file_ids:
file = files_dict[file_id]
file = files_dict.get(file_id)
if not file:
return get_data_error_result(message="File or Folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
fe, _ = FileService.get_by_id(parent_id)
if not fe:
return get_data_error_result(message="Parent Folder not found!")
FileService.move_file(file_ids, parent_id)
return get_json_result(
data=False,
message="No authorization.",
code=settings.RetCode.AUTHENTICATION_ERROR,
)
def _move_entry_recursive(source_file_entry, dest_folder):
if source_file_entry.type == FileType.FOLDER.value:
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
if existing_folder:
new_folder = existing_folder[0]
else:
new_folder = FileService.insert(
{
"id": get_uuid(),
"parent_id": dest_folder.id,
"tenant_id": source_file_entry.tenant_id,
"created_by": current_user.id,
"name": source_file_entry.name,
"location": "",
"size": 0,
"type": FileType.FOLDER.value,
}
)
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
for sub_file in sub_files:
_move_entry_recursive(sub_file, new_folder)
FileService.delete_by_id(source_file_entry.id)
return
old_parent_id = source_file_entry.parent_id
old_location = source_file_entry.location
filename = source_file_entry.name
new_location = filename
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
new_location += "_"
try:
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
except Exception as storage_err:
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
FileService.update_by_id(
source_file_entry.id,
{
"parent_id": dest_folder.id,
"location": new_location,
},
)
for file in files:
_move_entry_recursive(file, dest_folder)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)