v0.21.1-fastapi

This commit is contained in:
2025-11-04 16:06:36 +08:00
parent 3e58c3d0e9
commit d57b5d76ae
218 changed files with 19617 additions and 72339 deletions

View File

@@ -15,7 +15,6 @@
#
import logging
import re
import traceback
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
@@ -421,7 +420,7 @@ class FileService(CommonService):
@classmethod
@DB.connection_context()
async def upload_document(self, kb, file_objs, user_id):
def upload_document(self, kb, file_objs, user_id):
root_folder = self.get_root_folder(user_id)
pf_id = root_folder["id"]
self.init_knowledgebase_docs(pf_id, user_id)
@@ -441,7 +440,14 @@ class FileService(CommonService):
while STORAGE_IMPL.obj_exist(kb.id, location):
location += "_"
blob = await file.read()
# 支持 FastAPI UploadFile直接使用 file 属性进行同步读取
if hasattr(file, 'file') and hasattr(file, 'filename'):
# FastAPI UploadFile
file.file.seek(0)
blob = file.file.read()
else:
# 普通文件对象
blob = file.read()
if filetype == FileType.PDF.value:
blob = read_potential_broken_pdf(blob)
STORAGE_IMPL.put(kb.id, location, blob)
@@ -473,33 +479,33 @@ class FileService(CommonService):
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
files.append((doc, blob))
except Exception as e:
traceback.print_exc()
err.append(file.filename + ": " + str(e))
return err, files
@classmethod
@DB.connection_context()
def list_all_files_by_parent_id(cls, parent_id):
try:
files = cls.model.select().where((cls.model.parent_id == parent_id) & (cls.model.id != parent_id))
return list(files)
except Exception:
logging.exception("list_by_parent_id failed")
raise RuntimeError("Database error (list_by_parent_id)!")
@staticmethod
async def parse_docs(file_objs, user_id):
def parse_docs(file_objs, user_id):
exe = ThreadPoolExecutor(max_workers=12)
threads = []
for file in file_objs:
# Check if file has async read method (UploadFile)
if hasattr(file, 'read') and hasattr(file.read, '__call__'):
try:
# Try to get the coroutine to check if it's async
read_result = file.read()
if hasattr(read_result, '__await__'):
# It's an async method, await it
blob = await read_result
else:
# It's a sync method
blob = read_result
except Exception:
# Fallback to sync read
blob = file.read()
# 支持 FastAPI UploadFile直接使用 file 属性进行同步读取
if hasattr(file, 'file') and hasattr(file, 'filename'):
# FastAPI UploadFile
file.file.seek(0)
blob = file.file.read()
else:
# 普通文件对象
blob = file.read()
threads.append(exe.submit(FileService.parse, file.filename, blob, False))
res = []