Files
TERES_fastapi_backend/api/apps/models/document_models.py
2025-11-06 17:15:46 +08:00

205 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional, Literal, List
from pydantic import BaseModel, Field, model_validator
class CreateDocumentRequest(BaseModel):
"""创建文档请求
支持两种解析类型:
- parse_type=1: 使用内置解析器,需要 parser_idpipeline_id 为空
- parse_type=2: 使用自定义 pipeline需要 pipeline_idparser_id 为空
如果不提供 parse_type则从知识库继承解析配置
"""
name: str
kb_id: str
parse_type: Optional[Literal[1, 2]] = Field(default=None, description="解析类型1=内置解析器2=自定义pipelineNone=从知识库继承")
parser_id: Optional[str] = Field(default="", description="解析器IDparse_type=1时必需")
pipeline_id: Optional[str] = Field(default="", description="流水线IDparse_type=2时必需")
parser_config: Optional[dict] = None
@model_validator(mode='after')
def validate_parse_type_fields(self):
"""根据 parse_type 验证相应字段"""
if self.parse_type is not None:
if self.parse_type == 1:
# parse_type=1: 需要 parser_idpipeline_id 必须为空
parser_id_val = self.parser_id or ""
pipeline_id_val = self.pipeline_id or ""
if parser_id_val.strip() == "":
raise ValueError("parse_type=1时parser_id不能为空")
if pipeline_id_val.strip() != "":
raise ValueError("parse_type=1时pipeline_id必须为空")
elif self.parse_type == 2:
# parse_type=2: 需要 pipeline_idparser_id 必须为空
parser_id_val = self.parser_id or ""
pipeline_id_val = self.pipeline_id or ""
if pipeline_id_val.strip() == "":
raise ValueError("parse_type=2时pipeline_id不能为空")
if parser_id_val.strip() != "":
raise ValueError("parse_type=2时parser_id必须为空")
return self
class ChangeParserRequest(BaseModel):
"""修改文档解析器请求
支持两种解析类型:
- parse_type=1: 使用内置解析器,需要 parser_idpipeline_id 为空
- parse_type=2: 使用自定义 pipeline需要 pipeline_idparser_id 为空
"""
doc_id: str
parse_type: Literal[1, 2] = Field(..., description="解析类型1=内置解析器2=自定义pipeline")
parser_id: Optional[str] = Field(default="", description="解析器IDparse_type=1时必需")
pipeline_id: Optional[str] = Field(default="", description="流水线IDparse_type=2时必需")
parser_config: Optional[dict] = None
@model_validator(mode='after')
def validate_parse_type_fields(self):
"""根据 parse_type 验证相应字段"""
if self.parse_type == 1:
# parse_type=1: 需要 parser_idpipeline_id 必须为空
parser_id_val = self.parser_id or ""
pipeline_id_val = self.pipeline_id or ""
if parser_id_val.strip() == "":
raise ValueError("parse_type=1时parser_id不能为空")
if pipeline_id_val.strip() != "":
raise ValueError("parse_type=1时pipeline_id必须为空")
elif self.parse_type == 2:
# parse_type=2: 需要 pipeline_idparser_id 必须为空
parser_id_val = self.parser_id or ""
pipeline_id_val = self.pipeline_id or ""
if pipeline_id_val.strip() == "":
raise ValueError("parse_type=2时pipeline_id不能为空")
if parser_id_val.strip() != "":
raise ValueError("parse_type=2时parser_id必须为空")
return self
class WebCrawlRequest(BaseModel):
"""网页爬取请求"""
kb_id: str
name: str
url: str
class ListDocumentsQuery(BaseModel):
"""列出文档查询参数"""
kb_id: str
keywords: Optional[str] = ""
page: Optional[int] = 0
page_size: Optional[int] = 0
orderby: Optional[str] = "create_time"
desc: Optional[str] = "true"
create_time_from: Optional[int] = 0
create_time_to: Optional[int] = 0
class ListDocumentsBody(BaseModel):
"""列出文档请求体"""
run_status: Optional[List[str]] = []
types: Optional[List[str]] = []
suffix: Optional[List[str]] = []
class FilterDocumentsRequest(BaseModel):
"""过滤文档请求"""
kb_id: str
keywords: Optional[str] = ""
suffix: Optional[List[str]] = []
run_status: Optional[List[str]] = []
types: Optional[List[str]] = []
class GetDocumentInfosRequest(BaseModel):
"""获取文档信息请求"""
doc_ids: List[str]
class ChangeStatusRequest(BaseModel):
"""修改文档状态请求"""
doc_ids: List[str]
status: int
@model_validator(mode='after')
def validate_status(self):
if self.status not in [0, 1]:
raise ValueError('Status must be either 0 or 1!')
return self
class DeleteDocumentRequest(BaseModel):
"""删除文档请求"""
doc_id: str | List[str] # 支持单个或列表
class RunDocumentRequest(BaseModel):
"""运行文档解析请求"""
doc_ids: List[str]
run: int # TaskStatus 值
delete: Optional[bool] = False
class RenameDocumentRequest(BaseModel):
"""重命名文档请求"""
doc_id: str
name: str
class ChangeParserSimpleRequest(BaseModel):
"""简单修改解析器请求(兼容旧逻辑)"""
doc_id: str
parser_id: Optional[str] = None
pipeline_id: Optional[str] = None
parser_config: Optional[dict] = None
class UploadAndParseRequest(BaseModel):
"""上传并解析请求(仅用于验证 conversation_id"""
conversation_id: str
class ParseRequest(BaseModel):
"""解析请求"""
url: Optional[str] = None
class SetMetaRequest(BaseModel):
"""设置元数据请求"""
doc_id: str
meta: str # JSON 字符串
@model_validator(mode='after')
def validate_meta(self):
import json
try:
meta_dict = json.loads(self.meta)
if not isinstance(meta_dict, dict):
raise ValueError("Only dictionary type supported.")
for k, v in meta_dict.items():
if not isinstance(v, (str, int, float)):
raise ValueError(f"The type is not supported: {v}")
except json.JSONDecodeError as e:
raise ValueError(f"Json syntax error: {e}")
return self