scenario_name: sample-offline-baseline mode: offline app_adapter: null dataset: ../../datasets/normalized/sample_offline_rag_eval.csv judge_model: deepseek-v4-flash embedding_model: text-embedding-v3 metrics: - faithfulness - answer_relevancy - context_recall - context_precision output_dir: ../../outputs/sample-offline-baseline runtime: batch_size: 4 max_samples: 3