first commit
This commit is contained in:
17
scenarios/dataset_build/real-multi-pdf-build.yaml
Normal file
17
scenarios/dataset_build/real-multi-pdf-build.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
job_name: real-multi-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 4
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/real-multi-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/real-multi-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 3
|
||||
17
scenarios/dataset_build/real-pdf-build.yaml
Normal file
17
scenarios/dataset_build/real-pdf-build.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
job_name: real-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 5
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/real-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/real-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 1
|
||||
18
scenarios/dataset_build/sample-pdf-build.yaml
Normal file
18
scenarios/dataset_build/sample-pdf-build.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
job_name: sample-pdf-question-bank
|
||||
input:
|
||||
path: ../../datasets/raw/pdfs
|
||||
glob: "*.pdf"
|
||||
parser:
|
||||
provider: aliyun_docmind
|
||||
failure_mode: fail
|
||||
generation:
|
||||
model: qwen3.6-plus
|
||||
output_type: online_question_bank
|
||||
review_mode: draft_with_manual_review
|
||||
max_questions_per_document: 10
|
||||
max_source_chunks_per_question: 3
|
||||
output:
|
||||
dataset_path: ../../datasets/raw/generated/sample-pdf-question-bank.csv
|
||||
artifact_dir: ../../outputs/dataset-builds/sample-pdf-question-bank
|
||||
runtime:
|
||||
max_documents: 20
|
||||
15
scenarios/offline/real-pdf-offline-smoke.yaml
Normal file
15
scenarios/offline/real-pdf-offline-smoke.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
scenario_name: real-pdf-offline-smoke
|
||||
mode: offline
|
||||
app_adapter: null
|
||||
dataset: ../../datasets/normalized/real_multi_pdf_offline_smoke.csv
|
||||
judge_model: deepseek-v4-flash
|
||||
embedding_model: text-embedding-v3
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
output_dir: ../../outputs/real-pdf-offline-smoke
|
||||
runtime:
|
||||
batch_size: 4
|
||||
max_samples: 6
|
||||
15
scenarios/offline/sample-offline.yaml
Normal file
15
scenarios/offline/sample-offline.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
scenario_name: sample-offline-baseline
|
||||
mode: offline
|
||||
app_adapter: null
|
||||
dataset: ../../datasets/normalized/sample_offline_rag_eval.csv
|
||||
judge_model: deepseek-v4-flash
|
||||
embedding_model: text-embedding-v3
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
output_dir: ../../outputs/sample-offline-baseline
|
||||
runtime:
|
||||
batch_size: 4
|
||||
max_samples: 3
|
||||
15
scenarios/offline/sample-pdf-offline-smoke.yaml
Normal file
15
scenarios/offline/sample-pdf-offline-smoke.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
scenario_name: sample-pdf-offline-smoke
|
||||
mode: offline
|
||||
app_adapter: null
|
||||
dataset: ../../datasets/normalized/sample_pdf_offline_smoke.csv
|
||||
judge_model: deepseek-v4-flash
|
||||
embedding_model: text-embedding-v3
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
output_dir: ../../outputs/sample-pdf-offline-smoke
|
||||
runtime:
|
||||
batch_size: 4
|
||||
max_samples: 3
|
||||
22
scenarios/online/sample-pdf-question-bank-online.yaml
Normal file
22
scenarios/online/sample-pdf-question-bank-online.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
scenario_name: sample-pdf-question-bank-online
|
||||
mode: online
|
||||
dataset: ../../datasets/raw/generated/sample-pdf-question-bank.csv
|
||||
judge_model: deepseek-v4-pro
|
||||
embedding_model: text-embedding-v3
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
output_dir: ../../outputs/online/sample-pdf-question-bank
|
||||
runtime:
|
||||
batch_size: 2
|
||||
app_concurrency: 2
|
||||
metric_concurrency: 2
|
||||
max_samples: 45
|
||||
app_adapter:
|
||||
type: python
|
||||
callable: apps.pdf_question_bank.adapter:run
|
||||
static_kwargs:
|
||||
source_chunks_path: ../../outputs/dataset-builds/sample-pdf-question-bank/latest/source_chunks.jsonl
|
||||
model: deepseek-v4-flash
|
||||
Reference in New Issue
Block a user