v0.21.1-fastapi

This commit is contained in:
2025-11-04 16:06:36 +08:00
parent 3e58c3d0e9
commit d57b5d76ae
218 changed files with 19617 additions and 72339 deletions

View File

@@ -79,7 +79,7 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_list(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, id, name):
orderby, desc, keywords, id, name, suffix=None, run = None):
fields = cls.get_cls_model_fields()
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
.join(File, on = (File.id == File2Document.file_id))\
@@ -96,6 +96,10 @@ class DocumentService(CommonService):
docs = docs.where(
fn.LOWER(cls.model.name).contains(keywords.lower())
)
if suffix:
docs = docs.where(cls.model.suffix.in_(suffix))
if run:
docs = docs.where(cls.model.run.in_(run))
if desc:
docs = docs.order_by(cls.model.getter_by(orderby).desc())
else:
@@ -667,9 +671,11 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def _sync_progress(cls, docs:list[dict]):
from api.db.services.task_service import TaskService
for d in docs:
try:
tsks = Task.query(doc_id=d["id"], order_by=Task.create_time)
tsks = TaskService.query(doc_id=d["id"], order_by=Task.create_time)
if not tsks:
continue
msg = []
@@ -787,21 +793,23 @@ class DocumentService(CommonService):
"cancelled": int(cancelled),
}
def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[]):
def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", doc_ids=[]):
"""
You can provide a fake_doc_id to bypass the restriction of tasks at the knowledgebase level.
Optionally, specify a list of doc_ids to determine which documents participate in the task.
"""
chunking_config = DocumentService.get_chunking_config(doc["id"])
assert ty in ["graphrag", "raptor", "mindmap"], "type should be graphrag, raptor or mindmap"
chunking_config = DocumentService.get_chunking_config(sample_doc_id["id"])
hasher = xxhash.xxh64()
for field in sorted(chunking_config.keys()):
hasher.update(str(chunking_config[field]).encode("utf-8"))
def new_task():
nonlocal doc
nonlocal sample_doc_id
return {
"id": get_uuid(),
"doc_id": fake_doc_id if fake_doc_id else doc["id"],
"doc_id": sample_doc_id["id"],
"from_page": 100000000,
"to_page": 100000000,
"task_type": ty,
@@ -816,9 +824,9 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[])
task["digest"] = hasher.hexdigest()
bulk_insert_into_db(Task, [task], True)
if ty in ["graphrag", "raptor", "mindmap"]:
task["doc_ids"] = doc_ids
DocumentService.begin2parse(doc["id"])
task["doc_id"] = fake_doc_id
task["doc_ids"] = doc_ids
DocumentService.begin2parse(sample_doc_id["id"])
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
return task["id"]
@@ -830,7 +838,7 @@ def get_queue_length(priority):
return int(group_info.get("lag", 0) or 0)
async def doc_upload_and_parse(conversation_id, file_objs, user_id):
def doc_upload_and_parse(conversation_id, file_objs, user_id):
from api.db.services.api_service import API4ConversationService
from api.db.services.conversation_service import ConversationService
from api.db.services.dialog_service import DialogService
@@ -855,7 +863,7 @@ async def doc_upload_and_parse(conversation_id, file_objs, user_id):
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language)
err, files = await FileService.upload_document(kb, file_objs, user_id)
err, files = FileService.upload_document(kb, file_objs, user_id)
assert not err, "\n".join(err)
def dummy(prog=None, msg=""):