更新
This commit is contained in:
@@ -94,6 +94,23 @@ class MetricPipeline:
|
||||
reference=sample.ground_truth,
|
||||
retrieved_contexts=sample.contexts,
|
||||
)
|
||||
elif name == "noise_sensitivity":
|
||||
coroutine = metric.ascore(
|
||||
user_input=sample.question,
|
||||
response=sample.answer,
|
||||
reference=sample.ground_truth,
|
||||
retrieved_contexts=sample.contexts,
|
||||
)
|
||||
elif name == "factual_correctness":
|
||||
coroutine = metric.ascore(
|
||||
response=sample.answer,
|
||||
reference=sample.ground_truth,
|
||||
)
|
||||
elif name == "semantic_similarity":
|
||||
coroutine = metric.ascore(
|
||||
reference=sample.ground_truth,
|
||||
response=sample.answer,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported metric: {name}")
|
||||
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
"""Supported metric names recognized by scenario validation and pipeline setup."""
|
||||
|
||||
SUPPORTED_METRICS = {
|
||||
# Core retrieval / generation metrics (always available).
|
||||
"faithfulness",
|
||||
"answer_relevancy",
|
||||
"context_recall",
|
||||
"context_precision",
|
||||
# Robustness and end-to-end metrics (see 架构设计 §10.2).
|
||||
"noise_sensitivity", # 鲁棒性:对检索噪声的敏感度
|
||||
"factual_correctness", # 端到端:回答相对标准答案的事实正确性
|
||||
"semantic_similarity", # 端到端:回答与标准答案的语义相似度(embedding,无 LLM 调用)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user