Files
siemens_ragas/scenarios/dataset_build/sample-pdf-build.yaml

19 lines
506 B
YAML
Raw Permalink Normal View History

2026-06-12 14:02:15 +08:00
job_name: sample-pdf-question-bank
input:
path: ../../datasets/raw/pdfs
glob: "*.pdf"
parser:
provider: aliyun_docmind
failure_mode: fail
generation:
model: qwen3.6-plus
output_type: online_question_bank
review_mode: draft_with_manual_review
max_questions_per_document: 10
max_source_chunks_per_question: 3
output:
dataset_path: ../../datasets/raw/generated/sample-pdf-question-bank.csv
artifact_dir: ../../outputs/dataset-builds/sample-pdf-question-bank
runtime:
max_documents: 20