scenario_name: siemens-pdf-offline-smoke mode: offline app_adapter: null dataset: ../../datasets/normalized/siemens_pdf_offline_smoke.csv judge_model: deepseek-v4-flash embedding_model: text-embedding-v3 metrics: - faithfulness - answer_relevancy - context_recall - context_precision # 可选:鲁棒性 / 端到端指标(数据集已含 ground_truth,取消注释即可启用) # - noise_sensitivity # 鲁棒性:对检索噪声的敏感度 # - factual_correctness # 端到端:事实正确性(相对标准答案) # - semantic_similarity # 端到端:语义相似度(embedding,无 LLM 调用) output_dir: ../../outputs/siemens-pdf-offline-smoke runtime: batch_size: 4 max_samples: 30