更新
This commit is contained in:
@@ -1,13 +1,13 @@
|
||||
scenario_name: sample-pdf-question-bank-online
|
||||
mode: online
|
||||
dataset: ../../datasets/raw/generated/sample-pdf-question-bank.csv
|
||||
judge_model: deepseek-v4-pro
|
||||
judge_model: qwen3.5-flash
|
||||
embedding_model: text-embedding-v3
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
output_dir: ../../outputs/online/sample-pdf-question-bank
|
||||
runtime:
|
||||
batch_size: 2
|
||||
@@ -19,4 +19,4 @@ app_adapter:
|
||||
callable: apps.pdf_question_bank.adapter:run
|
||||
static_kwargs:
|
||||
source_chunks_path: ../../outputs/dataset-builds/sample-pdf-question-bank/latest/source_chunks.jsonl
|
||||
model: deepseek-v4-flash
|
||||
model: glm-5
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
scenario_name: siemens-pdf-question-bank-online
|
||||
mode: online
|
||||
dataset: ../../datasets/raw/generated/siemens-pdf-question-bank.csv
|
||||
# judge_model: qwen3.5-flash
|
||||
judge_model: deepseek-v4-flash
|
||||
embedding_model: text-embedding-v3
|
||||
optimization_advisor: true # 评测结束后自动生成优化建议报告
|
||||
optimization_advisor: true
|
||||
metrics:
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
# 已启用:鲁棒性 / 端到端指标(数据集已含 ground_truth)
|
||||
- noise_sensitivity # 鲁棒性:对检索噪声的敏感度
|
||||
- factual_correctness # 端到端:事实正确性(相对标准答案)
|
||||
- semantic_similarity # 端到端:语义相似度(embedding,无 LLM 调用)
|
||||
- faithfulness
|
||||
- answer_relevancy
|
||||
- context_recall
|
||||
- context_precision
|
||||
- noise_sensitivity
|
||||
- factual_correctness
|
||||
- semantic_similarity
|
||||
output_dir: ../../outputs/online/siemens-pdf-question-bank
|
||||
runtime:
|
||||
batch_size: 4
|
||||
app_concurrency: 4
|
||||
metric_concurrency: 4
|
||||
max_samples: 50
|
||||
batch_size: 3
|
||||
app_concurrency: 3
|
||||
metric_concurrency: 3
|
||||
max_samples: 10
|
||||
app_adapter:
|
||||
type: python
|
||||
callable: apps.siemens_pdf_qa.adapter:run
|
||||
static_kwargs:
|
||||
source_chunks_path: ../../outputs/dataset-builds/siemens-pdf-question-bank/latest/source_chunks.jsonl
|
||||
model: deepseek-v4-flash
|
||||
model: glm-5
|
||||
|
||||
Reference in New Issue
Block a user