This commit is contained in:
2026-06-16 18:12:33 +08:00
parent ca01e44ad2
commit 24956bbf75
7 changed files with 1496 additions and 21 deletions

View File

@@ -94,6 +94,23 @@ class MetricPipeline:
reference=sample.ground_truth,
retrieved_contexts=sample.contexts,
)
elif name == "noise_sensitivity":
coroutine = metric.ascore(
user_input=sample.question,
response=sample.answer,
reference=sample.ground_truth,
retrieved_contexts=sample.contexts,
)
elif name == "factual_correctness":
coroutine = metric.ascore(
response=sample.answer,
reference=sample.ground_truth,
)
elif name == "semantic_similarity":
coroutine = metric.ascore(
reference=sample.ground_truth,
response=sample.answer,
)
else:
raise ValueError(f"Unsupported metric: {name}")

View File

@@ -1,8 +1,13 @@
"""Supported metric names recognized by scenario validation and pipeline setup."""
SUPPORTED_METRICS = {
# Core retrieval / generation metrics (always available).
"faithfulness",
"answer_relevancy",
"context_recall",
"context_precision",
# Robustness and end-to-end metrics (see 架构设计 §10.2).
"noise_sensitivity", # 鲁棒性:对检索噪声的敏感度
"factual_correctness", # 端到端:回答相对标准答案的事实正确性
"semantic_similarity", # 端到端回答与标准答案的语义相似度embedding无 LLM 调用)
}