first commit
This commit is contained in:
17
scenarios/dataset_build/real-multi-pdf-build.yaml
Normal file
17
scenarios/dataset_build/real-multi-pdf-build.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
job_name: real-multi-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 4
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/real-multi-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/real-multi-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 3
|
||||
17
scenarios/dataset_build/real-pdf-build.yaml
Normal file
17
scenarios/dataset_build/real-pdf-build.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
job_name: real-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 5
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/real-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/real-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 1
|
||||
18
scenarios/dataset_build/sample-pdf-build.yaml
Normal file
18
scenarios/dataset_build/sample-pdf-build.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
job_name: sample-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
model: qwen3.6-plus
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 10
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/sample-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/sample-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 20
|
||||
Reference in New Issue
Block a user