feat: make contexts optional in /api/score
When contexts is absent, metrics that require retrieved_contexts (faithfulness, context_recall, context_precision, noise_sensitivity) are automatically skipped and appear in skipped_metrics. Only answer_relevancy, factual_correctness, semantic_similarity remain computable without contexts. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -57,9 +57,11 @@ class TestScoreRequest:
|
|||||||
with pytest.raises(ValidationError):
|
with pytest.raises(ValidationError):
|
||||||
ScoreRequest(question="q", contexts="c") # type: ignore[call-arg]
|
ScoreRequest(question="q", contexts="c") # type: ignore[call-arg]
|
||||||
|
|
||||||
def test_missing_contexts_raises(self):
|
def test_missing_contexts_defaults_to_none(self):
|
||||||
with pytest.raises(ValidationError):
|
"""contexts is now optional — missing contexts is allowed."""
|
||||||
ScoreRequest(question="q", answer="a") # type: ignore[call-arg]
|
req = ScoreRequest(question="q", answer="a")
|
||||||
|
assert req.contexts is None
|
||||||
|
assert req.contexts_as_list() == []
|
||||||
|
|
||||||
def test_custom_metrics_accepted(self):
|
def test_custom_metrics_accepted(self):
|
||||||
req = ScoreRequest(
|
req = ScoreRequest(
|
||||||
@@ -115,6 +117,17 @@ class TestScoreRequest:
|
|||||||
"factual_correctness",
|
"factual_correctness",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def test_effective_metrics_drops_context_dependent_when_contexts_absent(self):
|
||||||
|
"""Without contexts, context-dependent metrics are excluded."""
|
||||||
|
req = ScoreRequest(
|
||||||
|
question="q", answer="a",
|
||||||
|
metrics=["faithfulness", "answer_relevancy", "context_precision"],
|
||||||
|
)
|
||||||
|
effective = req.effective_metrics()
|
||||||
|
assert "answer_relevancy" in effective
|
||||||
|
assert "faithfulness" not in effective
|
||||||
|
assert "context_precision" not in effective
|
||||||
|
|
||||||
|
|
||||||
class TestScoreResponse:
|
class TestScoreResponse:
|
||||||
def test_score_response_structure(self):
|
def test_score_response_structure(self):
|
||||||
|
|||||||
@@ -73,7 +73,8 @@ def score_sample(
|
|||||||
用于日志记录、质量监控或触发 Agent 自我改进流程。
|
用于日志记录、质量监控或触发 Agent 自我改进流程。
|
||||||
|
|
||||||
**contexts 格式**:多个检索片段用 `context_separator`(默认 `" |||| "`)拼接为一个字符串,
|
**contexts 格式**:多个检索片段用 `context_separator`(默认 `" |||| "`)拼接为一个字符串,
|
||||||
服务端自动拆分后传入 RAGAS 管道。
|
服务端自动拆分后传入 RAGAS 管道。**contexts 为可选字段**,缺失时自动跳过依赖检索内容的指标
|
||||||
|
(`faithfulness`、`context_recall`、`context_precision`、`noise_sensitivity`)。
|
||||||
|
|
||||||
**ground_truth 可选**:
|
**ground_truth 可选**:
|
||||||
- 提供时:所有指定指标均参与计算。
|
- 提供时:所有指定指标均参与计算。
|
||||||
@@ -99,12 +100,13 @@ def score_sample(
|
|||||||
"""
|
"""
|
||||||
client = f"{raw_request.client.host}:{raw_request.client.port}" if raw_request.client else "unknown"
|
client = f"{raw_request.client.host}:{raw_request.client.port}" if raw_request.client else "unknown"
|
||||||
logger.info(
|
logger.info(
|
||||||
"[score] incoming client=%s method=%s content_type=%s metrics=%s has_gt=%s",
|
"[score] incoming client=%s method=%s content_type=%s metrics=%s has_gt=%s has_ctx=%s",
|
||||||
client,
|
client,
|
||||||
raw_request.method,
|
raw_request.method,
|
||||||
raw_request.headers.get("content-type", ""),
|
raw_request.headers.get("content-type", ""),
|
||||||
request.metrics,
|
request.metrics,
|
||||||
request.ground_truth is not None,
|
request.ground_truth is not None,
|
||||||
|
bool(request.contexts),
|
||||||
)
|
)
|
||||||
settings = _get_settings()
|
settings = _get_settings()
|
||||||
|
|
||||||
|
|||||||
@@ -384,6 +384,14 @@ _GT_DEPENDENT_METRICS: frozenset[str] = frozenset({
|
|||||||
"noise_sensitivity",
|
"noise_sensitivity",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# 需要 contexts 才能计算的指标集合
|
||||||
|
_CONTEXT_DEPENDENT_METRICS: frozenset[str] = frozenset({
|
||||||
|
"faithfulness",
|
||||||
|
"context_recall",
|
||||||
|
"context_precision",
|
||||||
|
"noise_sensitivity",
|
||||||
|
})
|
||||||
|
|
||||||
# 所有合法指标名称
|
# 所有合法指标名称
|
||||||
_VALID_METRICS: frozenset[str] = frozenset({
|
_VALID_METRICS: frozenset[str] = frozenset({
|
||||||
"faithfulness",
|
"faithfulness",
|
||||||
@@ -428,8 +436,9 @@ class ScoreRequest(BaseModel):
|
|||||||
|
|
||||||
question: str = Field(description="问题文本。")
|
question: str = Field(description="问题文本。")
|
||||||
answer: str = Field(description="待评分的回答。")
|
answer: str = Field(description="待评分的回答。")
|
||||||
contexts: str = Field(
|
contexts: str | None = Field(
|
||||||
description="检索上下文字符串,多段之间用 context_separator 拼接。"
|
default=None,
|
||||||
|
description="检索上下文字符串,多段之间用 context_separator 拼接。缺失时自动跳过依赖检索内容的指标(faithfulness、context_recall、context_precision、noise_sensitivity)。",
|
||||||
)
|
)
|
||||||
ground_truth: str | None = Field(
|
ground_truth: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
@@ -467,15 +476,23 @@ class ScoreRequest(BaseModel):
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
def contexts_as_list(self) -> list[str]:
|
def contexts_as_list(self) -> list[str]:
|
||||||
"""Split the contexts string into a list of non-empty fragments."""
|
"""Split the contexts string into a list of non-empty fragments.
|
||||||
|
|
||||||
|
Returns an empty list when contexts is None or blank.
|
||||||
|
"""
|
||||||
|
if not self.contexts:
|
||||||
|
return []
|
||||||
separator = self.context_separator or " |||| "
|
separator = self.context_separator or " |||| "
|
||||||
return [part.strip() for part in self.contexts.split(separator) if part.strip()]
|
return [part.strip() for part in self.contexts.split(separator) if part.strip()]
|
||||||
|
|
||||||
def effective_metrics(self) -> list[str]:
|
def effective_metrics(self) -> list[str]:
|
||||||
"""Return metrics filtered to exclude GT-dependent ones when ground_truth is absent."""
|
"""Return metrics filtered to exclude GT-dependent or context-dependent ones when inputs are absent."""
|
||||||
if self.ground_truth is not None:
|
result = list(self.metrics)
|
||||||
return list(self.metrics)
|
if self.ground_truth is None:
|
||||||
return [metric_name for metric_name in self.metrics if metric_name not in _GT_DEPENDENT_METRICS]
|
result = [m for m in result if m not in _GT_DEPENDENT_METRICS]
|
||||||
|
if not self.contexts:
|
||||||
|
result = [m for m in result if m not in _CONTEXT_DEPENDENT_METRICS]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class ScoreResponse(BaseModel):
|
class ScoreResponse(BaseModel):
|
||||||
|
|||||||
Reference in New Issue
Block a user