将flask改成fastapi
This commit is contained in:
61
rag/flow/tests/client.py
Normal file
61
rag/flow/tests/client.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import trio
|
||||
|
||||
from api import settings
|
||||
from rag.flow.pipeline import Pipeline
|
||||
|
||||
|
||||
def print_logs(pipeline: Pipeline):
|
||||
last_logs = "[]"
|
||||
while True:
|
||||
time.sleep(5)
|
||||
logs = pipeline.fetch_logs()
|
||||
logs_str = json.dumps(logs, ensure_ascii=False)
|
||||
if logs_str != last_logs:
|
||||
print(logs_str)
|
||||
last_logs = logs_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
dsl_default_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
"dsl_examples",
|
||||
"general_pdf_all.json",
|
||||
)
|
||||
parser.add_argument("-s", "--dsl", default=dsl_default_path, help="input dsl", action="store", required=False)
|
||||
parser.add_argument("-d", "--doc_id", default=False, help="Document ID", action="store", required=True)
|
||||
parser.add_argument("-t", "--tenant_id", default=False, help="Tenant ID", action="store", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
settings.init_settings()
|
||||
pipeline = Pipeline(open(args.dsl, "r").read(), tenant_id=args.tenant_id, doc_id=args.doc_id, task_id="xxxx", flow_id="xxx")
|
||||
pipeline.reset()
|
||||
|
||||
exe = ThreadPoolExecutor(max_workers=5)
|
||||
thr = exe.submit(print_logs, pipeline)
|
||||
|
||||
# queue_dataflow(dsl=open(args.dsl, "r").read(), tenant_id=args.tenant_id, doc_id=args.doc_id, task_id="xxxx", flow_id="xxx", priority=0)
|
||||
|
||||
trio.run(pipeline.run)
|
||||
thr.result()
|
||||
139
rag/flow/tests/dsl_examples/general_pdf_all.json
Normal file
139
rag/flow/tests/dsl_examples/general_pdf_all.json
Normal file
@@ -0,0 +1,139 @@
|
||||
{
|
||||
"components": {
|
||||
"File": {
|
||||
"obj":{
|
||||
"component_name": "File",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": ["Parser:0"],
|
||||
"upstream": []
|
||||
},
|
||||
"Parser:0": {
|
||||
"obj": {
|
||||
"component_name": "Parser",
|
||||
"params": {
|
||||
"setups": {
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc",
|
||||
"vlm_name": "",
|
||||
"lang": "Chinese",
|
||||
"suffix": [
|
||||
"pdf"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"spreadsheet": {
|
||||
"suffix": [
|
||||
"xls",
|
||||
"xlsx",
|
||||
"csv"
|
||||
],
|
||||
"output_format": "html"
|
||||
},
|
||||
"word": {
|
||||
"suffix": [
|
||||
"doc",
|
||||
"docx"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"slides": {
|
||||
"parse_method": "presentation",
|
||||
"suffix": [
|
||||
"pptx"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"markdown": {
|
||||
"suffix": [
|
||||
"md",
|
||||
"markdown"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"text": {
|
||||
"suffix": ["txt"],
|
||||
"output_format": "json"
|
||||
},
|
||||
"image": {
|
||||
"parse_method": "vlm",
|
||||
"llm_id":"glm-4.5v",
|
||||
"lang": "Chinese",
|
||||
"suffix": [
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"png",
|
||||
"gif"
|
||||
],
|
||||
"output_format": "text"
|
||||
},
|
||||
"audio": {
|
||||
"suffix": [
|
||||
"da",
|
||||
"wave",
|
||||
"wav",
|
||||
"mp3",
|
||||
"aac",
|
||||
"flac",
|
||||
"ogg",
|
||||
"aiff",
|
||||
"au",
|
||||
"midi",
|
||||
"wma",
|
||||
"realaudio",
|
||||
"vqf",
|
||||
"oggvorbis",
|
||||
"ape"
|
||||
],
|
||||
"lang": "Chinese",
|
||||
"llm_id": "SenseVoiceSmall",
|
||||
"output_format": "json"
|
||||
},
|
||||
"email": {
|
||||
"suffix": [
|
||||
"msg"
|
||||
],
|
||||
"fields": [
|
||||
"from",
|
||||
"to",
|
||||
"cc",
|
||||
"bcc",
|
||||
"date",
|
||||
"subject",
|
||||
"body",
|
||||
"attachments"
|
||||
],
|
||||
"output_format": "json"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"downstream": ["Splitter:0"],
|
||||
"upstream": ["Begin"]
|
||||
},
|
||||
"Splitter:0": {
|
||||
"obj": {
|
||||
"component_name": "Splitter",
|
||||
"params": {
|
||||
"chunk_token_size": 512,
|
||||
"delimiters": ["\n"],
|
||||
"overlapped_percent": 0
|
||||
}
|
||||
},
|
||||
"downstream": ["Tokenizer:0"],
|
||||
"upstream": ["Parser:0"]
|
||||
},
|
||||
"Tokenizer:0": {
|
||||
"obj": {
|
||||
"component_name": "Tokenizer",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": [],
|
||||
"upstream": ["Chunker:0"]
|
||||
}
|
||||
},
|
||||
"path": []
|
||||
}
|
||||
|
||||
84
rag/flow/tests/dsl_examples/hierarchical_merger.json
Normal file
84
rag/flow/tests/dsl_examples/hierarchical_merger.json
Normal file
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"components": {
|
||||
"File": {
|
||||
"obj":{
|
||||
"component_name": "File",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": ["Parser:0"],
|
||||
"upstream": []
|
||||
},
|
||||
"Parser:0": {
|
||||
"obj": {
|
||||
"component_name": "Parser",
|
||||
"params": {
|
||||
"setups": {
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc",
|
||||
"vlm_name": "",
|
||||
"lang": "Chinese",
|
||||
"suffix": [
|
||||
"pdf"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"spreadsheet": {
|
||||
"suffix": [
|
||||
"xls",
|
||||
"xlsx",
|
||||
"csv"
|
||||
],
|
||||
"output_format": "html"
|
||||
},
|
||||
"word": {
|
||||
"suffix": [
|
||||
"doc",
|
||||
"docx"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"markdown": {
|
||||
"suffix": [
|
||||
"md",
|
||||
"markdown"
|
||||
],
|
||||
"output_format": "text"
|
||||
},
|
||||
"text": {
|
||||
"suffix": ["txt"],
|
||||
"output_format": "json"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"downstream": ["Splitter:0"],
|
||||
"upstream": ["File"]
|
||||
},
|
||||
"Splitter:0": {
|
||||
"obj": {
|
||||
"component_name": "Splitter",
|
||||
"params": {
|
||||
"chunk_token_size": 512,
|
||||
"delimiters": ["\r\n"],
|
||||
"overlapped_percent": 0
|
||||
}
|
||||
},
|
||||
"downstream": ["HierarchicalMerger:0"],
|
||||
"upstream": ["Parser:0"]
|
||||
},
|
||||
"HierarchicalMerger:0": {
|
||||
"obj": {
|
||||
"component_name": "HierarchicalMerger",
|
||||
"params": {
|
||||
"levels": [["^#[^#]"], ["^##[^#]"], ["^###[^#]"], ["^####[^#]"]],
|
||||
"hierarchy": 2
|
||||
}
|
||||
},
|
||||
"downstream": [],
|
||||
"upstream": ["Splitter:0"]
|
||||
}
|
||||
},
|
||||
"path": []
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user