diff --git a/tests/webapp/test_score_api.py b/tests/webapp/test_score_api.py index 8fa7a12..1eb788c 100644 --- a/tests/webapp/test_score_api.py +++ b/tests/webapp/test_score_api.py @@ -57,9 +57,11 @@ class TestScoreRequest: with pytest.raises(ValidationError): ScoreRequest(question="q", contexts="c") # type: ignore[call-arg] - def test_missing_contexts_raises(self): - with pytest.raises(ValidationError): - ScoreRequest(question="q", answer="a") # type: ignore[call-arg] + def test_missing_contexts_defaults_to_none(self): + """contexts is now optional — missing contexts is allowed.""" + req = ScoreRequest(question="q", answer="a") + assert req.contexts is None + assert req.contexts_as_list() == [] def test_custom_metrics_accepted(self): req = ScoreRequest( @@ -115,6 +117,17 @@ class TestScoreRequest: "factual_correctness", ] + def test_effective_metrics_drops_context_dependent_when_contexts_absent(self): + """Without contexts, context-dependent metrics are excluded.""" + req = ScoreRequest( + question="q", answer="a", + metrics=["faithfulness", "answer_relevancy", "context_precision"], + ) + effective = req.effective_metrics() + assert "answer_relevancy" in effective + assert "faithfulness" not in effective + assert "context_precision" not in effective + class TestScoreResponse: def test_score_response_structure(self): diff --git a/webapp/api/score.py b/webapp/api/score.py index 6644d09..93cd512 100644 --- a/webapp/api/score.py +++ b/webapp/api/score.py @@ -73,7 +73,8 @@ def score_sample( 用于日志记录、质量监控或触发 Agent 自我改进流程。 **contexts 格式**:多个检索片段用 `context_separator`(默认 `" |||| "`)拼接为一个字符串, - 服务端自动拆分后传入 RAGAS 管道。 + 服务端自动拆分后传入 RAGAS 管道。**contexts 为可选字段**,缺失时自动跳过依赖检索内容的指标 + (`faithfulness`、`context_recall`、`context_precision`、`noise_sensitivity`)。 **ground_truth 可选**: - 提供时:所有指定指标均参与计算。 @@ -99,12 +100,13 @@ def score_sample( """ client = f"{raw_request.client.host}:{raw_request.client.port}" if raw_request.client else "unknown" logger.info( - "[score] incoming client=%s method=%s content_type=%s metrics=%s has_gt=%s", + "[score] incoming client=%s method=%s content_type=%s metrics=%s has_gt=%s has_ctx=%s", client, raw_request.method, raw_request.headers.get("content-type", ""), request.metrics, request.ground_truth is not None, + bool(request.contexts), ) settings = _get_settings() diff --git a/webapp/models.py b/webapp/models.py index 89e6f48..7a0a6db 100644 --- a/webapp/models.py +++ b/webapp/models.py @@ -384,6 +384,14 @@ _GT_DEPENDENT_METRICS: frozenset[str] = frozenset({ "noise_sensitivity", }) +# 需要 contexts 才能计算的指标集合 +_CONTEXT_DEPENDENT_METRICS: frozenset[str] = frozenset({ + "faithfulness", + "context_recall", + "context_precision", + "noise_sensitivity", +}) + # 所有合法指标名称 _VALID_METRICS: frozenset[str] = frozenset({ "faithfulness", @@ -428,8 +436,9 @@ class ScoreRequest(BaseModel): question: str = Field(description="问题文本。") answer: str = Field(description="待评分的回答。") - contexts: str = Field( - description="检索上下文字符串,多段之间用 context_separator 拼接。" + contexts: str | None = Field( + default=None, + description="检索上下文字符串,多段之间用 context_separator 拼接。缺失时自动跳过依赖检索内容的指标(faithfulness、context_recall、context_precision、noise_sensitivity)。", ) ground_truth: str | None = Field( default=None, @@ -467,15 +476,23 @@ class ScoreRequest(BaseModel): return value def contexts_as_list(self) -> list[str]: - """Split the contexts string into a list of non-empty fragments.""" + """Split the contexts string into a list of non-empty fragments. + + Returns an empty list when contexts is None or blank. + """ + if not self.contexts: + return [] separator = self.context_separator or " |||| " return [part.strip() for part in self.contexts.split(separator) if part.strip()] def effective_metrics(self) -> list[str]: - """Return metrics filtered to exclude GT-dependent ones when ground_truth is absent.""" - if self.ground_truth is not None: - return list(self.metrics) - return [metric_name for metric_name in self.metrics if metric_name not in _GT_DEPENDENT_METRICS] + """Return metrics filtered to exclude GT-dependent or context-dependent ones when inputs are absent.""" + result = list(self.metrics) + if self.ground_truth is None: + result = [m for m in result if m not in _GT_DEPENDENT_METRICS] + if not self.contexts: + result = [m for m in result if m not in _CONTEXT_DEPENDENT_METRICS] + return result class ScoreResponse(BaseModel):