20 lines
734 B
YAML
20 lines
734 B
YAML
scenario_name: siemens-pdf-offline-smoke
|
||
mode: offline
|
||
app_adapter: null
|
||
dataset: ../../datasets/normalized/siemens_pdf_offline_smoke.csv
|
||
judge_model: deepseek-v4-flash
|
||
embedding_model: text-embedding-v3
|
||
metrics:
|
||
- faithfulness
|
||
- answer_relevancy
|
||
- context_recall
|
||
- context_precision
|
||
# 可选:鲁棒性 / 端到端指标(数据集已含 ground_truth,取消注释即可启用)
|
||
# - noise_sensitivity # 鲁棒性:对检索噪声的敏感度
|
||
# - factual_correctness # 端到端:事实正确性(相对标准答案)
|
||
# - semantic_similarity # 端到端:语义相似度(embedding,无 LLM 调用)
|
||
output_dir: ../../outputs/siemens-pdf-offline-smoke
|
||
runtime:
|
||
batch_size: 4
|
||
max_samples: 30
|