This commit is contained in:
2026-06-16 18:12:33 +08:00
parent ca01e44ad2
commit 24956bbf75
7 changed files with 1496 additions and 21 deletions

View File

@@ -1,28 +1,26 @@
scenario_name: siemens-pdf-question-bank-online
mode: online
dataset: ../../datasets/raw/generated/siemens-pdf-question-bank.csv
# judge_model: qwen3.5-flash
judge_model: deepseek-v4-flash
embedding_model: text-embedding-v3
optimization_advisor: true # 评测结束后自动生成优化建议报告
optimization_advisor: true
metrics:
- faithfulness
- answer_relevancy
- context_recall
- context_precision
# 已启用:鲁棒性 / 端到端指标(数据集已含 ground_truth
- noise_sensitivity # 鲁棒性:对检索噪声的敏感度
- factual_correctness # 端到端:事实正确性(相对标准答案)
- semantic_similarity # 端到端语义相似度embedding无 LLM 调用)
- faithfulness
- answer_relevancy
- context_recall
- context_precision
- noise_sensitivity
- factual_correctness
- semantic_similarity
output_dir: ../../outputs/online/siemens-pdf-question-bank
runtime:
batch_size: 4
app_concurrency: 4
metric_concurrency: 4
max_samples: 50
batch_size: 3
app_concurrency: 3
metric_concurrency: 3
max_samples: 10
app_adapter:
type: python
callable: apps.siemens_pdf_qa.adapter:run
static_kwargs:
source_chunks_path: ../../outputs/dataset-builds/siemens-pdf-question-bank/latest/source_chunks.jsonl
model: deepseek-v4-flash
model: glm-5